Skip to main content

dbmd_core/
validate.rs

1//! `validate` — the validation engine.
2//!
3//! The canonical issue-code vocabulary is **SPEC.md § Validation** (that table
4//! is the single source of truth). This module implements exactly those codes
5//! — no more, no fewer. If a code is added here it must be added to the SPEC
6//! table in the same change. The codes are exposed as the [`codes`] constants
7//! so call sites never spell a code as a bare string literal.
8//!
9//! **Two scopes.** [`validate_working_set`] is the loop default: content files
10//! changed since `since`, plus any file whose wiki-links target a changed path.
11//! The changed set and the per-file checks are O(changed); the incoming linkers
12//! are found by a *single* embedded-ripgrep pass over the store for the whole
13//! changed set at once ([`Store::find_links_to_any`], one scan — not a full read
14//! per changed object, and not the parse-the-tree walk `--all` does). On this
15//! changed-set path it never builds the global cross-file state.
16//!
17//! The **one** exception is the vacuous-pass guard: when the change log records
18//! no objects since the cutoff and no explicit `--since` was given (a fresh
19//! store, a missing/empty `log.md`, or external edits never logged), the default
20//! call falls back to a single per-file content sweep ([`Store::walk`]) so an
21//! externally edited or freshly copied store cannot pass validation vacuously.
22//! That fallback is O(store) by design; the O(changed) guarantee is about the
23//! normal post-write path, not this safety net.
24//!
25//! [`validate_all`] is the full SWEEP: it adds the checks that need the global
26//! cross-file state — entity-dedup `DUP_*`, every-index sync, and `log.md`
27//! ordering.
28//!
29//! ## Why this module is self-contained
30//!
31//! Validation does its own frontmatter split, YAML parse, wiki-link scan,
32//! log-header parse, and file walk here, reading only the two public,
33//! caller-populated fields of a [`Store`]: [`Store::root`] and
34//! [`Store::config`] — rather than routing through the sibling modules
35//! ([`crate::parser`], [`crate::store`], [`crate::log`], [`crate::index`]).
36//! Keeping the checks local lets the validator report precise, per-issue
37//! diagnostics (exact codes, file, and context) without coupling its output to
38//! incidental behavior of the shared readers; the public surface and the
39//! emitted issue vocabulary are the contract.
40
41use std::collections::{BTreeMap, BTreeSet, HashMap};
42use std::path::{Component, Path, PathBuf};
43
44use chrono::{DateTime, FixedOffset, NaiveDateTime};
45use serde_norway::Value;
46
47use crate::parser::{Schema, Shape};
48use crate::store::Store;
49
50/// Severity of a validation [`Issue`]. Any [`Severity::Error`] fails validation
51/// (non-zero exit); warnings and info do not.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum Severity {
54    /// Blocks: a hard violation of the format or doctrine.
55    Error,
56    /// A decision point the agent resolves at its discretion.
57    Warning,
58    /// Visibility only; never affects exit status.
59    Info,
60}
61
62/// A single structured validation finding. Agent-primary and machine-parseable
63/// via `--json`; `suggestion` is a deterministic remediation hint the agent
64/// applies without guessing.
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub struct Issue {
67    /// The severity; only [`Severity::Error`] fails validation.
68    pub severity: Severity,
69    /// The structured code, e.g. `"WIKI_LINK_SHORT_FORM"` — one of [`codes`].
70    pub code: &'static str,
71    /// The file the issue is about.
72    pub file: PathBuf,
73    /// The 1-based line, when applicable.
74    pub line: Option<u32>,
75    /// The frontmatter key, when the issue is about a specific field.
76    pub key: Option<String>,
77    /// A human-readable message.
78    pub message: String,
79    /// A deterministic remediation hint, when one exists.
80    pub suggestion: Option<String>,
81    /// Other files involved (e.g. the duplicate partner in a collision).
82    pub related: Vec<PathBuf>,
83}
84
85impl Issue {
86    /// True if this issue fails validation (i.e. its severity is
87    /// [`Severity::Error`]).
88    pub fn is_error(&self) -> bool {
89        matches!(self.severity, Severity::Error)
90    }
91}
92
93/// The canonical validation issue codes — one constant per row of the SPEC.md
94/// § Validation table. Call sites reference these instead of bare strings so
95/// the code and the SPEC table can never silently drift.
96pub mod codes {
97    /// path has no `DB.md`; not a db.md store.
98    pub const NOT_A_STORE: &str = "NOT_A_STORE";
99    /// the store's `DB.md` is not `type: db-md`.
100    pub const DB_MD_BAD_TYPE: &str = "DB_MD_BAD_TYPE";
101    /// the store's `DB.md` frontmatter lacks `scope` or `owner`.
102    pub const DB_MD_MISSING_FIELD: &str = "DB_MD_MISSING_FIELD";
103    /// `DB.md` has an `##` section other than the three recognized ones.
104    pub const DB_MD_UNKNOWN_SECTION: &str = "DB_MD_UNKNOWN_SECTION";
105    /// a `DB.md ## Schemas` field declaration is malformed (empty or duplicate
106    /// field name) or carries an unrecognized modifier.
107    pub const DB_MD_SCHEMA_FIELD: &str = "DB_MD_SCHEMA_FIELD";
108    /// content file has no `type:`.
109    pub const FM_MISSING_TYPE: &str = "FM_MISSING_TYPE";
110    /// content file has no `created:`.
111    pub const FM_MISSING_CREATED: &str = "FM_MISSING_CREATED";
112    /// content file has no `updated:`.
113    pub const FM_MISSING_UPDATED: &str = "FM_MISSING_UPDATED";
114    /// content file can't be read (not valid UTF-8, or an I/O error).
115    pub const FM_UNREADABLE: &str = "FM_UNREADABLE";
116    /// frontmatter block isn't valid YAML.
117    pub const FM_MALFORMED_YAML: &str = "FM_MALFORMED_YAML";
118    /// `created` or `updated` isn't ISO-8601.
119    pub const FM_BAD_TIMESTAMP: &str = "FM_BAD_TIMESTAMP";
120    /// `meta-type` is present but not one of fact / operational / conclusion.
121    pub const FM_BAD_META_TYPE: &str = "FM_BAD_META_TYPE";
122    /// content file has no `summary`.
123    pub const SUMMARY_MISSING: &str = "SUMMARY_MISSING";
124    /// `summary` present but empty.
125    pub const SUMMARY_EMPTY: &str = "SUMMARY_EMPTY";
126    /// `summary` contains newlines.
127    pub const SUMMARY_MULTILINE: &str = "SUMMARY_MULTILINE";
128    /// `summary` > 200 chars.
129    pub const SUMMARY_TOO_LONG: &str = "SUMMARY_TOO_LONG";
130    /// wiki-link target isn't a full store-relative path.
131    pub const WIKI_LINK_SHORT_FORM: &str = "WIKI_LINK_SHORT_FORM";
132    /// wiki-link target file doesn't exist.
133    pub const WIKI_LINK_BROKEN: &str = "WIKI_LINK_BROKEN";
134    /// wiki-link target matches multiple files (defensive).
135    pub const WIKI_LINK_AMBIGUOUS: &str = "WIKI_LINK_AMBIGUOUS";
136    /// wiki-link target carries a `.md` extension — drop it.
137    pub const WIKI_LINK_HAS_EXTENSION: &str = "WIKI_LINK_HAS_EXTENSION";
138    /// frontmatter list uses inline `[[[a]], [[b]]]` — use block form.
139    pub const WIKI_LINK_FLOW_FORM_LIST: &str = "WIKI_LINK_FLOW_FORM_LIST";
140    /// two files declare the same explicit `id`.
141    pub const DUP_ID: &str = "DUP_ID";
142    /// two records of a type collide on a `DB.md ## Schemas` `unique:` key.
143    pub const DUP_UNIQUE_KEY: &str = "DUP_UNIQUE_KEY";
144    /// a `DB.md` schema requires a field that's absent.
145    pub const SCHEMA_MISSING_REQUIRED: &str = "SCHEMA_MISSING_REQUIRED";
146    /// a value doesn't match the schema's shape modifier.
147    pub const SCHEMA_SHAPE_MISMATCH: &str = "SCHEMA_SHAPE_MISMATCH";
148    /// a `link to <prefix>/` field has a plain or wrong-prefix value.
149    pub const SCHEMA_LINK_PREFIX_MISMATCH: &str = "SCHEMA_LINK_PREFIX_MISMATCH";
150    /// a value isn't in the schema's `enum`.
151    pub const SCHEMA_ENUM_VIOLATION: &str = "SCHEMA_ENUM_VIOLATION";
152    /// a write was attempted on a `### Frozen pages` path (write-time).
153    pub const POLICY_FROZEN_PAGE: &str = "POLICY_FROZEN_PAGE";
154    /// a file with an `### Ignored types` type exists.
155    pub const POLICY_IGNORED_TYPE_PRESENT: &str = "POLICY_IGNORED_TYPE_PRESENT";
156    /// a `meta-type: conclusion` record derives from an ignored-type record.
157    pub const POLICY_IGNORED_TYPE_DERIVED: &str = "POLICY_IGNORED_TYPE_DERIVED";
158    /// a `log.md` entry header timestamp is unparseable.
159    pub const LOG_BAD_TIMESTAMP: &str = "LOG_BAD_TIMESTAMP";
160    /// a `log.md` entry kind isn't recognized.
161    pub const LOG_UNKNOWN_KIND: &str = "LOG_UNKNOWN_KIND";
162    /// `log.md` entries aren't in non-decreasing time order (possible rewrite).
163    pub const LOG_OUT_OF_ORDER: &str = "LOG_OUT_OF_ORDER";
164    /// a non-empty canonical folder lacks `index.md`.
165    pub const INDEX_MISSING: &str = "INDEX_MISSING";
166    /// an `index.md` lists a file that no longer exists.
167    pub const INDEX_STALE_ENTRY: &str = "INDEX_STALE_ENTRY";
168    /// a file isn't listed in its folder's `index.md`.
169    pub const INDEX_MISSING_ENTRY: &str = "INDEX_MISSING_ENTRY";
170    /// an `index.md` sits in an empty / non-canonical folder.
171    pub const INDEX_ORPHAN: &str = "INDEX_ORPHAN";
172    /// an index's `scope:` doesn't match its filesystem location.
173    pub const INDEX_WRONG_SCOPE: &str = "INDEX_WRONG_SCOPE";
174    /// an index entry's text doesn't match the target file's `summary`.
175    pub const INDEX_SUMMARY_MISMATCH: &str = "INDEX_SUMMARY_MISMATCH";
176    /// a type-folder's `index.jsonl` twin is missing.
177    pub const INDEX_JSONL_MISSING: &str = "INDEX_JSONL_MISSING";
178    /// a file isn't in the `index.jsonl`, or a jsonl record points at a missing
179    /// file.
180    pub const INDEX_JSONL_DESYNC: &str = "INDEX_JSONL_DESYNC";
181    /// a `index.jsonl` record's fields don't match the file's frontmatter.
182    pub const INDEX_JSONL_STALE: &str = "INDEX_JSONL_STALE";
183    /// `tags` isn't a flat YAML list of short scalar labels.
184    pub const TAGS_MALFORMED: &str = "TAGS_MALFORMED";
185    /// a line in `assets.jsonl` is not a valid asset record.
186    pub const ASSET_MANIFEST_MALFORMED: &str = "ASSET_MANIFEST_MALFORMED";
187    /// a content file references an `asset`/`assets` path with no record in
188    /// `assets.jsonl` (run `dbmd assets scan`).
189    pub const ASSET_UNDECLARED: &str = "ASSET_UNDECLARED";
190    /// an `assets.jsonl` record names a wrapper file that does not exist.
191    pub const ASSET_WRAPPER_BROKEN: &str = "ASSET_WRAPPER_BROKEN";
192    /// an `assets.jsonl` record's path is referenced by no wrapper.
193    pub const ASSET_MANIFEST_ORPHAN: &str = "ASSET_MANIFEST_ORPHAN";
194    /// an `asset`/`assets` path points at a tracked markdown content file.
195    pub const ASSET_PATH_IS_CONTENT: &str = "ASSET_PATH_IS_CONTENT";
196}
197
198/// The SPEC's `summary` length bound (chars). Over it → `SUMMARY_TOO_LONG`.
199const MAX_SUMMARY_LEN: usize = 200;
200
201/// Recognized `log.md` entry kinds (SPEC § `log.md`). Anything else →
202/// `LOG_UNKNOWN_KIND` (warning, not error).
203const RECOGNIZED_LOG_KINDS: &[&str] = &[
204    "ingest",
205    "create",
206    "update",
207    "delete",
208    "rename",
209    "link",
210    "validate",
211    "index-rebuild",
212    "contradiction",
213];
214
215// ─────────────────────────────────────────────────────────────────────────────
216//  Public entrypoints
217// ─────────────────────────────────────────────────────────────────────────────
218
219/// **Loop default.** Validate the working set: content files changed since
220/// `since` (default: the last `validate` entry in `log.md`), plus any file whose
221/// wiki-links target a changed/renamed/removed path. Per-file *checks* only —
222/// none of the cross-file global passes (entity-dedup, every-index sync,
223/// `log.md` ordering) that `--all` adds. If the default call finds no logged
224/// changed objects, it falls back to a per-file content sweep so an externally
225/// edited or freshly copied store cannot pass vacuously.
226///
227/// **Cost.** The changed set is read from `log.md` — O(changed): every
228/// `create`/`update`/`ingest`/`rename`/`delete`/`link` entry newer than the
229/// cutoff names an object. Per-file frontmatter + link-doctrine checks then run
230/// over that set plus its incoming linkers — also O(changed). The one part that
231/// is *not* O(changed) is discovering those incoming linkers: a link to a
232/// changed path can live in the body or a typed frontmatter field of any file,
233/// so it is found by a **single** embedded-ripgrep pass over the store
234/// ([`Store::find_links_to_any`]) for the whole changed set at once — one store
235/// scan, flat in the changed-set size. (It was previously a full store read
236/// *per* changed object — `O(changed × store)`; that is the blow-up this path
237/// no longer pays.) The unavoidable single content scan is the same shape as
238/// free-text `dbmd search`; the sidecar `links` projection can't replace it
239/// because it omits body/typed-field edges.
240pub fn validate_working_set(
241    store: &Store,
242    since: Option<DateTime<FixedOffset>>,
243) -> crate::Result<Vec<Issue>> {
244    if !store_marker_present(store) {
245        return Ok(vec![not_a_store_issue(store)]);
246    }
247
248    let cutoff = match since {
249        Some(ts) => Some(ts),
250        None => last_validate_at(store),
251    };
252
253    // 1. Changed objects, straight from the log (O(changed) — never a walk).
254    let changed = changed_objects_since(store, cutoff);
255    if changed.is_empty() && since.is_none() {
256        return validate_content_sweep(store);
257    }
258
259    // 2. Add every file with an incoming wiki-link to a changed/renamed/removed
260    //    path (the linker may now be stale even though it didn't change). The
261    //    incoming-linker scan is `Store::find_links_to_any` — ONE embedded-ripgrep
262    //    pass over the store for the WHOLE changed set (one `.md` walk, one
263    //    presence-only/early-exit scan per file), not one walk per object. This
264    //    is the fix for the `O(changed × store)` blow-up that calling
265    //    `find_links_to` in a loop produced (a full store read per changed
266    //    object); the cost is now a single store scan regardless of how many
267    //    objects changed. A returned self-link is harmlessly deduped by the set
268    //    (the object is already inserted below).
269    let changed_targets: Vec<PathBuf> = changed.iter().cloned().collect();
270    let mut working: BTreeSet<PathBuf> = changed;
271    for linker in store.find_links_to_any(&changed_targets)? {
272        working.insert(linker);
273    }
274
275    let mut issues = Vec::new();
276    for rel in &working {
277        let abs = store.root.join(rel);
278        // A changed path can be a *deletion* — skip files that no longer exist;
279        // the incoming-linker scan above already flagged links into them.
280        if !abs.is_file() {
281            continue;
282        }
283        // `None` basename index: the working-set pass does not build the
284        // store-wide basename map (that is a `--all`-only structure), so a bare
285        // short-form target is reported as plain `WIKI_LINK_SHORT_FORM` and the
286        // `--all` sweep does the ambiguity upgrade.
287        check_content_file(store, rel, &abs, None, &mut issues);
288    }
289    issues.sort_by(issue_order);
290    Ok(issues)
291}
292
293fn validate_content_sweep(store: &Store) -> crate::Result<Vec<Issue>> {
294    let mut issues = Vec::new();
295    for rel in store.walk()? {
296        let abs = store.root.join(&rel);
297        check_content_file(store, &rel, &abs, None, &mut issues);
298    }
299    issues.sort_by(issue_order);
300    Ok(issues)
301}
302
303/// **Full SWEEP (O(store)).** Validate every file, every link, and every index,
304/// adding the cross-file checks that need global state: entity-dedup `DUP_*`,
305/// every-index sync (md + jsonl), and `log.md` ordering. CI / recovery, not the
306/// loop.
307pub fn validate_all(store: &Store) -> crate::Result<Vec<Issue>> {
308    if !store_marker_present(store) {
309        return Ok(vec![not_a_store_issue(store)]);
310    }
311
312    let mut issues = Vec::new();
313
314    // Store-identity file: `DB.md` shape (type / required fields / section
315    // headers). A single root file, checked once in the sweep — not a content
316    // file (it carries no `summary`), so it is not part of `walk_content_files`.
317    check_db_md(store, &mut issues);
318
319    let files = walk_content_files(&store.root);
320
321    // The basename index makes the short-form wiki-link check able to upgrade a
322    // bare-basename target to `WIKI_LINK_AMBIGUOUS` when it matches ≥2 files.
323    // Built once from the already-gathered sweep list (no extra walk); only the
324    // `--all` path has it (the working-set path stays O(changed)).
325    let basenames = build_basename_index(&files);
326
327    // Per-file checks over the whole store.
328    let mut parsed: Vec<(PathBuf, Parsed)> = Vec::new();
329    for rel in &files {
330        let abs = store.root.join(rel);
331        if let Some(p) = check_content_file(store, rel, &abs, Some(&basenames), &mut issues) {
332            parsed.push((rel.clone(), p));
333        }
334    }
335
336    // Cross-file: hard `id` + soft schema-declared `unique:` dedup collisions.
337    check_duplicates(store, &parsed, &mut issues);
338
339    // Cross-file: hierarchical index.md + index.jsonl sync.
340    check_indexes(store, &files, &mut issues);
341
342    // Cross-file: log.md well-formedness + ordering.
343    check_log(store, &mut issues);
344
345    // Cross-file: asset manifest (assets.jsonl) integrity against wrapper
346    // declarations. Text-only, no hashing, no byte reads — a SWEEP check like
347    // dedup. Byte presence/correctness is `dbmd assets verify`, not validate, so
348    // a fresh clone with no restored bytes still passes here.
349    check_assets(store, &parsed, &mut issues);
350
351    issues.sort_by(issue_order);
352    Ok(issues)
353}
354
355// ─────────────────────────────────────────────────────────────────────────────
356//  Per-file content checks (shared by both scopes)
357// ─────────────────────────────────────────────────────────────────────────────
358
359/// What `validate_all`'s cross-file pass needs from a per-file parse: the
360/// parsed YAML mapping (for dedup keys) and the raw frontmatter text (for
361/// text-based wiki-link extraction). The body and fence-line are consumed
362/// inline during the per-file pass and not carried here.
363struct Parsed {
364    /// The parsed top-level YAML mapping, keyed by string. `None` ⇒ malformed
365    /// YAML (a `FM_MALFORMED_YAML` was already emitted).
366    fm: Option<BTreeMap<String, Value>>,
367    /// The raw frontmatter YAML text (between the fences) — the source for
368    /// text-based wiki-link extraction in dedup.
369    fm_yaml: String,
370}
371
372/// Run every per-file check on one content file, pushing issues. Returns the
373/// parsed file so `validate_all` can reuse it for cross-file checks. Returns
374/// `None` only when the file is unreadable or has no frontmatter block at all
375/// (which for a content file is itself reported).
376fn check_content_file(
377    store: &Store,
378    rel: &Path,
379    abs: &Path,
380    basenames: Option<&BasenameIndex>,
381    issues: &mut Vec<Issue>,
382) -> Option<Parsed> {
383    let text = match std::fs::read_to_string(abs) {
384        Ok(t) => t,
385        Err(e) => {
386            // The file exists in the walk but can't be read as UTF-8 text
387            // (invalid bytes) or hit an I/O error. Returning `None` silently
388            // here let a store whose only content file was binary garbage pass
389            // `dbmd validate` with exit 0 — the exact vacuous-pass the fallback
390            // sweep exists to prevent. Report it so the agent gets an actionable
391            // diagnostic naming the unreadable file (and `index rebuild`, which
392            // hard-fails on the same file, isn't the only signal).
393            let detail = if e.kind() == std::io::ErrorKind::InvalidData {
394                "file is not valid UTF-8 text".to_string()
395            } else {
396                format!("file could not be read: {e}")
397            };
398            push(
399                issues,
400                Severity::Error,
401                codes::FM_UNREADABLE,
402                rel,
403                None,
404                None,
405                format!("content file is unreadable: {detail}"),
406                Some(
407                    "save the file as UTF-8 text, or remove it if it isn't a db.md content file"
408                        .into(),
409                ),
410                vec![],
411            );
412            return None;
413        }
414    };
415
416    let is_content = is_content_file(rel);
417
418    let (fm_yaml, body, fm_end_line) = match split_frontmatter(&text) {
419        Some(split) => split,
420        None => {
421            // No frontmatter at all. For a content file that means there's no
422            // `type:` and no `summary:` — report both the way a parsed-but-empty
423            // file would, so the agent gets the same actionable codes.
424            if is_content {
425                push(
426                    issues,
427                    Severity::Error,
428                    codes::FM_MISSING_TYPE,
429                    rel,
430                    None,
431                    Some("type".into()),
432                    "content file has no frontmatter `type:`".into(),
433                    Some("add a YAML frontmatter block with `type:`".into()),
434                    vec![],
435                );
436                push(
437                    issues,
438                    Severity::Error,
439                    codes::SUMMARY_MISSING,
440                    rel,
441                    None,
442                    Some("summary".into()),
443                    "content file has no `summary`".into(),
444                    Some("run `dbmd fm init`".into()),
445                    vec![],
446                );
447            }
448            return None;
449        }
450    };
451
452    // Parse the YAML block.
453    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
454        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
455        // An empty frontmatter block parses as Null; treat as an empty mapping.
456        Ok(Value::Null) => Some(BTreeMap::new()),
457        Ok(_) => {
458            // A scalar / sequence at the top level isn't a frontmatter mapping.
459            // Anchor to line 1 — the frontmatter block's opening `---`; the whole
460            // block is opaque, so there is no single offending field line.
461            push(
462                issues,
463                Severity::Error,
464                codes::FM_MALFORMED_YAML,
465                rel,
466                Some(1),
467                None,
468                "frontmatter is not a YAML mapping".into(),
469                Some("repair the frontmatter YAML mapping, then rerun `dbmd validate`".into()),
470                vec![],
471            );
472            None
473        }
474        Err(e) => {
475            // Anchor to line 1 (the opening `---`): an unparseable block has no
476            // single offending field line; the agent re-reads the whole block.
477            push(
478                issues,
479                Severity::Error,
480                codes::FM_MALFORMED_YAML,
481                rel,
482                Some(1),
483                None,
484                format!("frontmatter block isn't valid YAML: {e}"),
485                Some("repair the frontmatter YAML block, then rerun `dbmd validate`".into()),
486                vec![],
487            );
488            None
489        }
490    };
491
492    if let Some(map) = &fm {
493        // The detailed frontmatter checks only run when the YAML parsed.
494        check_frontmatter(store, rel, map, &fm_yaml, basenames, issues, is_content);
495    }
496
497    // Wiki-link doctrine checks run on the body of content files. They are NOT
498    // run on:
499    //   - the root append-only meta files `log.md`/`DB.md` — they reach this
500    //     function only via the working-set incoming-linker scan (`walk_all_md`
501    //     includes them), and `validate --all` never link-checks their bodies. A
502    //     historical `[[deleted-page]]` mention in a `log.md` note, or a `[[…]]`
503    //     in DB.md's `## Agent instructions`, must not be `WIKI_LINK_BROKEN`; the
504    //     log is append-only, so "fix the link" can't even be applied.
505    //   - the derived catalogs `index.md`/`index.jsonl` — their "links" are
506    //     GENERATED catalog entries, not authored body wiki-links. A folder's
507    //     `index.md` is pulled into the working set as an incoming linker (an
508    //     entry `[[records/contacts/a]]` IS a wiki-link to a member, so touching
509    //     or deleting any member drags its folder `index.md` in). Its integrity
510    //     is the job of `check_indexes` under `--all`, which reports a dangling
511    //     entry as `INDEX_STALE_ENTRY` ("run `dbmd index rebuild`"). Body-link-
512    //     checking it here instead emitted `WIKI_LINK_BROKEN` ("create the
513    //     target") for the SAME condition — a different code with the OPPOSITE
514    //     remedy across the loop default vs the sweep, steering an agent to
515    //     recreate deleted data. `walk_content_files` skips `index.md` under
516    //     `--all` for exactly this reason; the working-set scope must match.
517    // Without these guards the two scopes disagree on the same store.
518    if !is_root_meta_file(rel) && !is_index_catalog_file(rel) {
519        check_body_wiki_links(store, rel, &body, fm_end_line, basenames, issues);
520    }
521
522    Some(Parsed { fm, fm_yaml })
523}
524
525/// All frontmatter-level checks for a content file with valid YAML.
526fn check_frontmatter(
527    store: &Store,
528    rel: &Path,
529    fm: &BTreeMap<String, Value>,
530    fm_yaml: &str,
531    basenames: Option<&BasenameIndex>,
532    issues: &mut Vec<Issue>,
533    is_content: bool,
534) {
535    let type_ = fm.get("type").and_then(scalar_string);
536
537    // ── type ────────────────────────────────────────────────────────────────
538    if is_content && type_.is_none() {
539        push(
540            issues,
541            Severity::Error,
542            codes::FM_MISSING_TYPE,
543            rel,
544            fm_key_line_or_top(fm_yaml, "type"),
545            Some("type".into()),
546            "content file has no `type:`".into(),
547            Some("add a `type:` field (e.g. `type: contact`)".into()),
548            vec![],
549        );
550    }
551
552    // ── meta-type (records-only epistemic class; closed enum) ─────────────────
553    // Present-but-out-of-enum is an error; absent is fine (effective default
554    // `fact`). Sources don't normally carry one, but validating the value when
555    // present is layer-agnostic and harmless.
556    if is_content {
557        // Branch on the raw value, NOT `and_then(scalar_string)`. Pre-filtering
558        // through `scalar_string` made a list/mapping value (which returns `None`)
559        // short-circuit the whole check, so a structurally-wrong `meta-type`
560        // slipped through clean AND was silently reclassified as the default
561        // `fact` by the rest of the toolkit. Absent or explicit-`null` is fine
562        // (effective default `fact`); a present non-null value must be a scalar in
563        // the closed enum. This mirrors the sibling timestamp check below, which
564        // was already hardened against the same non-scalar escape.
565        if let Some(v) = fm.get("meta-type").filter(|v| !v.is_null()) {
566            match scalar_string(v) {
567                Some(mt) if matches!(mt.as_str(), "fact" | "operational" | "conclusion") => {}
568                Some(mt) => push(
569                    issues,
570                    Severity::Error,
571                    codes::FM_BAD_META_TYPE,
572                    rel,
573                    fm_key_line_or_top(fm_yaml, "meta-type"),
574                    Some("meta-type".into()),
575                    format!("`meta-type: {mt}` is not one of fact / operational / conclusion"),
576                    Some(
577                        "use one of: fact, operational, conclusion (or omit for the default `fact`)"
578                            .into(),
579                    ),
580                    vec![],
581                ),
582                None => push(
583                    issues,
584                    Severity::Error,
585                    codes::FM_BAD_META_TYPE,
586                    rel,
587                    fm_key_line_or_top(fm_yaml, "meta-type"),
588                    Some("meta-type".into()),
589                    "`meta-type` is not one of fact / operational / conclusion: expected a scalar \
590                     string, found a list or mapping"
591                        .to_string(),
592                    Some(
593                        "use one of: fact, operational, conclusion (or omit for the default `fact`)"
594                            .into(),
595                    ),
596                    vec![],
597                ),
598            }
599        }
600    }
601
602    // ── summary (universal on content files) ──────────────────────────────────
603    if is_content {
604        check_summary(rel, fm, fm_yaml, issues);
605    }
606
607    // ── timestamps: created / updated ─────────────────────────────────────────
608    // The `created`/`updated` contract is content-file-only; meta files
609    // (`DB.md`, `log.md`, index twins) legitimately carry no such timestamps.
610    if is_content {
611        for (key, missing_code) in [
612            ("created", codes::FM_MISSING_CREATED),
613            ("updated", codes::FM_MISSING_UPDATED),
614        ] {
615            // A key that is absent, or present-but-`null`, has *no* timestamp →
616            // `FM_MISSING_*`. The toolkit's parser also treats a null value as
617            // "no timestamp", so a null `created:` must read as missing, not
618            // silently pass.
619            let value = fm.get(key);
620            let missing = value.is_none() || value.is_some_and(Value::is_null);
621            if missing {
622                push(
623                    issues,
624                    Severity::Error,
625                    missing_code,
626                    rel,
627                    fm_key_line_or_top(fm_yaml, key),
628                    Some(key.into()),
629                    format!("content file has no `{key}:` timestamp"),
630                    Some(format!(
631                        "set `{key}` to an RFC3339 timestamp, e.g. 2026-05-27T08:00:00-07:00"
632                    )),
633                    vec![],
634                );
635            } else if let Some(v) = value {
636                // Present and non-null. A scalar is checked for ISO-8601; a
637                // sequence/mapping is not a timestamp string at all and so
638                // cannot be ISO-8601 → `FM_BAD_TIMESTAMP` (it must not slip
639                // through the way it did when `scalar_string` returned `None`
640                // and the branch silently no-oped).
641                match scalar_string(v) {
642                    Some(s) if is_iso8601(&s) => {}
643                    Some(s) => push(
644                        issues,
645                        Severity::Error,
646                        codes::FM_BAD_TIMESTAMP,
647                        rel,
648                        fm_key_line(fm_yaml, key),
649                        Some(key.into()),
650                        format!("`{key}` is not ISO-8601: {s:?}"),
651                        Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
652                        vec![],
653                    ),
654                    None => push(
655                        issues,
656                        Severity::Error,
657                        codes::FM_BAD_TIMESTAMP,
658                        rel,
659                        fm_key_line(fm_yaml, key),
660                        Some(key.into()),
661                        format!(
662                            "`{key}` is not ISO-8601: expected a timestamp string, found a list or mapping"
663                        ),
664                        Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
665                        vec![],
666                    ),
667                }
668            }
669        }
670    }
671    // ── tags shape ────────────────────────────────────────────────────────────
672    if let Some(tags) = fm.get("tags") {
673        if !is_flat_scalar_list(tags) {
674            push(
675                issues,
676                Severity::Warning,
677                codes::TAGS_MALFORMED,
678                rel,
679                fm_key_line(fm_yaml, "tags"),
680                Some("tags".into()),
681                "`tags` must be a flat YAML list of short scalar labels".into(),
682                Some("use block form: one `- <tag>` per line".into()),
683                vec![],
684            );
685        }
686    }
687
688    // ── inline flow-form wiki-link lists in frontmatter ──────────────────────
689    for key in detect_flow_form_link_lists(fm_yaml) {
690        push(
691            issues,
692            Severity::Error,
693            codes::WIKI_LINK_FLOW_FORM_LIST,
694            rel,
695            fm_key_line(fm_yaml, &key),
696            Some(key.clone()),
697            format!("`{key}` uses inline flow form `[[[a]], [[b]]]`"),
698            Some("use YAML block-sequence form: one `- [[...]]` per line".into()),
699            vec![],
700        );
701    }
702
703    // ── frontmatter wiki-link fields: doctrine + integrity ───────────────────
704    // Skip keys that have an explicit `link to` schema spec — those are checked
705    // (with prefix enforcement) in `check_schema`, and double-reporting the same
706    // link via two paths would be noise.
707    let schema_link_keys: BTreeSet<String> =
708        effective_schema(store, type_.as_deref().unwrap_or(""))
709            .map(|s| {
710                s.fields
711                    .iter()
712                    .filter(|f| f.link_prefix.is_some())
713                    .map(|f| f.name.clone())
714                    .collect()
715            })
716            .unwrap_or_default();
717    for (key, link) in frontmatter_link_fields_text(fm_yaml, 2) {
718        if schema_link_keys.contains(&key) {
719            continue;
720        }
721        check_wiki_link(
722            store,
723            rel,
724            &link,
725            Some(link.line),
726            Some(&key),
727            basenames,
728            issues,
729        );
730    }
731
732    // ── policies: ignored types ──────────────────────────────────────────────
733    if let Some(t) = &type_ {
734        if store.config.ignored_types.iter().any(|it| it == t) {
735            push(
736                issues,
737                Severity::Info,
738                codes::POLICY_IGNORED_TYPE_PRESENT,
739                rel,
740                fm_key_line(fm_yaml, "type"),
741                Some("type".into()),
742                format!("file has ignored type `{t}` (per DB.md ## Policies)"),
743                Some(
744                    "change the `type`, or remove it from DB.md `### Ignored types` if it should be managed"
745                        .into(),
746                ),
747                // The policy source: `DB.md` declares the ignored type.
748                vec![PathBuf::from("DB.md")],
749            );
750        }
751        // A conclusion record (`meta-type: conclusion`) deriving from an
752        // ignored-type record → warning. The decision lives in the shared
753        // `derived_from_ignored_type` entry point; this side only supplies the
754        // `derived_from` targets (with their line, which the issue carries) and
755        // renders the finding.
756        let meta_type = fm
757            .get("meta-type")
758            .and_then(scalar_string)
759            .unwrap_or_else(|| "fact".to_string());
760        for link in frontmatter_links_for_key(fm_yaml, "derived_from", 2) {
761            if let Some(hit) =
762                derived_from_ignored_type(store, &meta_type, std::iter::once(link.target.as_str()))
763            {
764                push(
765                    issues,
766                    Severity::Warning,
767                    codes::POLICY_IGNORED_TYPE_DERIVED,
768                    rel,
769                    Some(link.line),
770                    Some("derived_from".into()),
771                    format!(
772                        "conclusion record derives from ignored-type record `{}` (type `{}`)",
773                        hit.target, hit.target_type
774                    ),
775                    Some(
776                        "drop this `derived_from` link, or remove the target type from DB.md `### Ignored types`"
777                            .into(),
778                    ),
779                    // The ignored-type source record, plus `DB.md` (the policy
780                    // source that lists the ignored type).
781                    vec![
782                        PathBuf::from(format!("{}.md", hit.target)),
783                        PathBuf::from("DB.md"),
784                    ],
785                );
786            }
787        }
788    }
789
790    // ── schema enforcement: DB.md ## Schemas (the only schema source) ─────────
791    if let Some(t) = &type_ {
792        if let Some(schema) = effective_schema(store, t) {
793            check_schema(store, rel, fm, fm_yaml, &schema, issues);
794        }
795    }
796}
797
798/// `summary` rules: required, non-empty, single-line, ≤ 200 chars.
799fn check_summary(rel: &Path, fm: &BTreeMap<String, Value>, fm_yaml: &str, issues: &mut Vec<Issue>) {
800    let line = fm_key_line(fm_yaml, "summary");
801    match fm.get("summary") {
802        None => push(
803            issues,
804            Severity::Error,
805            codes::SUMMARY_MISSING,
806            rel,
807            // A missing `summary` key has no line of its own → anchor to the
808            // frontmatter block top (line 1), the EXPECTED field-absence rule.
809            fm_key_line_or_top(fm_yaml, "summary"),
810            Some("summary".into()),
811            "content file has no `summary`".into(),
812            Some("run `dbmd fm init`".into()),
813            vec![],
814        ),
815        Some(v) => {
816            let s = scalar_string(v).unwrap_or_default();
817            if s.trim().is_empty() {
818                push(
819                    issues,
820                    Severity::Error,
821                    codes::SUMMARY_EMPTY,
822                    rel,
823                    line,
824                    Some("summary".into()),
825                    "`summary` is present but empty".into(),
826                    Some("write a one-line summary, or run `dbmd fm init`".into()),
827                    vec![],
828                );
829            } else if s.contains('\n') {
830                push(
831                    issues,
832                    Severity::Error,
833                    codes::SUMMARY_MULTILINE,
834                    rel,
835                    line,
836                    Some("summary".into()),
837                    "`summary` must be one line (contains a newline)".into(),
838                    Some("collapse the summary to a single line".into()),
839                    vec![],
840                );
841            } else if s.chars().count() > MAX_SUMMARY_LEN {
842                push(
843                    issues,
844                    Severity::Warning,
845                    codes::SUMMARY_TOO_LONG,
846                    rel,
847                    line,
848                    Some("summary".into()),
849                    format!(
850                        "`summary` is {} chars (> {MAX_SUMMARY_LEN})",
851                        s.chars().count()
852                    ),
853                    Some(format!("trim the summary to ≤ {MAX_SUMMARY_LEN} chars")),
854                    vec![],
855                );
856            }
857        }
858    }
859}
860
861/// Wiki-link checks for a body. Per-link doctrine (`WIKI_LINK_*`).
862fn check_body_wiki_links(
863    store: &Store,
864    rel: &Path,
865    body: &str,
866    fm_end_line: u32,
867    basenames: Option<&BasenameIndex>,
868    issues: &mut Vec<Issue>,
869) {
870    for link in extract_wiki_links(body) {
871        // Body lines are offset past the frontmatter block. `link.line` is
872        // 1-based within `body`; the body starts at `fm_end_line + 1`.
873        let abs_line = fm_end_line + link.line;
874        check_wiki_link(store, rel, &link, Some(abs_line), None, basenames, issues);
875    }
876}
877
878/// A store-wide map from a file's bare basename (its stem, no `.md`) to every
879/// store-relative path carrying that basename. Built once per `validate --all`
880/// sweep so the short-form wiki-link check can distinguish a merely short-form
881/// target (`WIKI_LINK_SHORT_FORM`) from one that is *ambiguous* because the bare
882/// basename matches two or more files (`WIKI_LINK_AMBIGUOUS`, the defensive
883/// code). `None` in the working-set path — that loop is O(changed) and never
884/// walks the store, so it reports the plain short-form error without the scan.
885type BasenameIndex = HashMap<String, Vec<PathBuf>>;
886
887/// Build the [`BasenameIndex`] from the swept file list (already gathered by
888/// `validate_all`; no extra walk).
889fn build_basename_index(files: &[PathBuf]) -> BasenameIndex {
890    let mut idx: BasenameIndex = HashMap::new();
891    for rel in files {
892        if let Some(stem) = rel.file_stem().and_then(|s| s.to_str()) {
893            idx.entry(stem.to_string()).or_default().push(rel.clone());
894        }
895    }
896    idx
897}
898
899/// The shared per-wiki-link doctrine + integrity check used by both body links
900/// and frontmatter link-fields. `basenames` is `Some` only in the `--all`
901/// sweep, where a no-slash short-form target is upgraded to `WIKI_LINK_AMBIGUOUS`
902/// when its bare basename matches ≥2 files.
903fn check_wiki_link(
904    store: &Store,
905    rel: &Path,
906    link: &Link,
907    line: Option<u32>,
908    key: Option<&str>,
909    basenames: Option<&BasenameIndex>,
910    issues: &mut Vec<Issue>,
911) {
912    let bare = link.target.trim_end_matches(".md");
913
914    // Short-form: not a full store-relative path (no `/`, or first segment isn't
915    // a known layer).
916    if !is_full_store_path(bare) {
917        // Ambiguous (defensive) takes precedence over plain short-form when the
918        // target is a bare basename (no `/`) that matches ≥2 files in the store.
919        // Only computable in the sweep (where `basenames` is populated); the
920        // working-set path falls through to the plain short-form error.
921        if !bare.contains('/') {
922            if let Some(idx) = basenames {
923                if let Some(matches) = idx.get(bare) {
924                    if matches.len() >= 2 {
925                        let mut related = matches.clone();
926                        related.sort();
927                        push(
928                            issues,
929                            Severity::Error,
930                            codes::WIKI_LINK_AMBIGUOUS,
931                            rel,
932                            line,
933                            key.map(str::to_string),
934                            format!(
935                                "short-form wiki-link `[[{}]]` matches multiple files",
936                                link.target
937                            ),
938                            Some("use the full store-relative path to disambiguate".into()),
939                            related,
940                        );
941                        return;
942                    }
943                }
944            }
945        }
946        push(
947            issues,
948            Severity::Error,
949            codes::WIKI_LINK_SHORT_FORM,
950            rel,
951            line,
952            key.map(str::to_string),
953            format!(
954                "wiki-link `[[{}]]` is not a full store-relative path",
955                link.target
956            ),
957            short_form_suggestion(bare),
958            vec![],
959        );
960        // Don't also report broken; the agent must fix the form first.
961        return;
962    }
963
964    // `.md` extension → warning, then still check existence.
965    if link.target.ends_with(".md") {
966        push(
967            issues,
968            Severity::Warning,
969            codes::WIKI_LINK_HAS_EXTENSION,
970            rel,
971            line,
972            key.map(str::to_string),
973            format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
974            Some(format!("drop the extension: [[{bare}]]")),
975            vec![],
976        );
977    }
978
979    // Broken: target file doesn't exist (O(1) stat). Resolve the target the
980    // same way the graph engine does — the literal path first (so a link to a
981    // raw `.eml`/`.pdf` source kept verbatim under `sources/` resolves), then
982    // the `.md`-appended path.
983    match resolve_wiki_target(store, bare) {
984        TargetResolution::Exists => {}
985        TargetResolution::Missing => push(
986            issues,
987            Severity::Error,
988            codes::WIKI_LINK_BROKEN,
989            rel,
990            line,
991            key.map(str::to_string),
992            format!("wiki-link target `{bare}` doesn't exist"),
993            Some(format!(
994                "create `{bare}.md`, or point the link at an existing file"
995            )),
996            vec![],
997        ),
998        TargetResolution::Unsafe => push(
999            issues,
1000            Severity::Error,
1001            codes::WIKI_LINK_BROKEN,
1002            rel,
1003            line,
1004            key.map(str::to_string),
1005            format!("wiki-link target `{bare}` is not a safe store-relative path"),
1006            Some("use a full store-relative path under sources/ or records/".into()),
1007            vec![],
1008        ),
1009    }
1010}
1011
1012// ─────────────────────────────────────────────────────────────────────────────
1013//  Schema enforcement (user-declared DB.md ## Schemas — the only source)
1014// ─────────────────────────────────────────────────────────────────────────────
1015
1016/// The effective schema for a type: the store's explicit `DB.md ## Schemas`
1017/// block, or `None`. This is the **only** source of schema enforcement — the
1018/// toolkit ships no implicit or built-in per-type schema (SPEC § Schemas). A
1019/// store that wants its `contact` / `expense` / etc. fields enforced declares
1020/// them in `## Schemas`; the example schema pack in SPEC § Example types is a
1021/// copy-in starting point.
1022fn effective_schema(store: &Store, type_: &str) -> Option<Schema> {
1023    store.config.schemas.get(type_).cloned()
1024}
1025
1026/// Validate a file's frontmatter against a schema's [`FieldSpec`]s.
1027fn check_schema(
1028    store: &Store,
1029    rel: &Path,
1030    fm: &BTreeMap<String, Value>,
1031    fm_yaml: &str,
1032    schema: &Schema,
1033    issues: &mut Vec<Issue>,
1034) {
1035    for spec in &schema.fields {
1036        let present = fm.get(&spec.name);
1037        let line = fm_key_line(fm_yaml, &spec.name);
1038
1039        // Required. "Empty" means: the key is absent, or its value carries no
1040        // content — a YAML `null` (`name:`), an empty list (`name: []`), an
1041        // empty mapping (`name: {}`), or a blank/whitespace-only scalar
1042        // (`name: ""`). `scalar_string` returns `None` for null/list/mapping, so
1043        // a bare `.unwrap_or(false)` wrongly treated those as non-empty and let
1044        // a required field with a null or empty-collection value pass silently;
1045        // route them through `is_empty_value` instead.
1046        let is_empty = match present {
1047            None => true,
1048            Some(v) => is_empty_value(v),
1049        };
1050        if spec.required && is_empty {
1051            push(
1052                issues,
1053                Severity::Error,
1054                codes::SCHEMA_MISSING_REQUIRED,
1055                rel,
1056                // Absent key → anchor to the frontmatter top (line 1); a
1057                // present-but-empty value keeps its own line.
1058                fm_key_line_or_top(fm_yaml, &spec.name),
1059                Some(spec.name.clone()),
1060                format!("required field `{}` is absent or empty", spec.name),
1061                Some(format!("set `{}` to a non-empty value", spec.name)),
1062                vec![],
1063            );
1064            continue;
1065        }
1066        let Some(value) = present else { continue };
1067
1068        // An OPTIONAL field that is `null` or empty is simply unset — there is
1069        // no value to shape/enum/link-check. (The required+empty case already
1070        // returned above as `SCHEMA_MISSING_REQUIRED`.) Without this, an
1071        // `paid_at: null` on an `invoice` whose schema marks `paid_at (date)`
1072        // would wrongly fire `SCHEMA_SHAPE_MISMATCH` against the empty string.
1073        let value_empty = value.is_null()
1074            || scalar_string(value)
1075                .map(|s| s.trim().is_empty())
1076                .unwrap_or(false);
1077        if !spec.required && value_empty {
1078            continue;
1079        }
1080
1081        // link to <prefix>/ — extract the link target(s) from the raw frontmatter
1082        // text (unquoted `[[...]]` is a YAML nested-sequence, not a string).
1083        if let Some(prefix) = &spec.link_prefix {
1084            check_schema_link(store, rel, &spec.name, fm_yaml, prefix, line, issues);
1085            continue; // a link field is never also shape/enum-checked
1086        }
1087
1088        // A shape- or enum-constrained field expects a SCALAR. A YAML sequence
1089        // or mapping satisfies neither, and would otherwise slip through both
1090        // checks (`scalar_string` returns `None` for non-scalars, so the enum
1091        // and shape bodies silently no-op). Flag it as a shape mismatch rather
1092        // than let a structurally-wrong value validate clean. (Link fields,
1093        // which legitimately take block-form sequences, already `continue`d.)
1094        if (spec.shape.is_some() || spec.enum_values.is_some()) && scalar_string(value).is_none() {
1095            push(
1096                issues,
1097                Severity::Error,
1098                codes::SCHEMA_SHAPE_MISMATCH,
1099                rel,
1100                line,
1101                Some(spec.name.clone()),
1102                format!(
1103                    "`{}` must be a scalar value, found a list or mapping",
1104                    spec.name
1105                ),
1106                Some(format!("set `{}` to a single scalar value", spec.name)),
1107                vec![],
1108            );
1109            continue;
1110        }
1111
1112        // enum
1113        if let Some(allowed) = &spec.enum_values {
1114            if let Some(s) = scalar_string(value) {
1115                if !allowed.iter().any(|a| a == &s) {
1116                    push(
1117                        issues,
1118                        Severity::Error,
1119                        codes::SCHEMA_ENUM_VIOLATION,
1120                        rel,
1121                        line,
1122                        Some(spec.name.clone()),
1123                        format!("`{}` value {s:?} not in enum {allowed:?}", spec.name),
1124                        Some(format!("use one of: {}", allowed.join(", "))),
1125                        vec![],
1126                    );
1127                }
1128            }
1129            continue;
1130        }
1131
1132        // shape
1133        if let Some(shape) = spec.shape {
1134            check_schema_shape(rel, &spec.name, value, shape, line, issues);
1135        }
1136    }
1137}
1138
1139/// `link to <prefix>/` enforcement: the value must be a wiki-link whose target
1140/// starts with `<prefix>`. Reads the link target(s) from the raw frontmatter
1141/// text so unquoted `field: [[...]]` (a YAML nested-sequence, not a string) is
1142/// recognized exactly like the quoted form.
1143fn check_schema_link(
1144    store: &Store,
1145    rel: &Path,
1146    field: &str,
1147    fm_yaml: &str,
1148    prefix: &Path,
1149    line: Option<u32>,
1150    issues: &mut Vec<Issue>,
1151) {
1152    let prefix_str = prefix.to_string_lossy();
1153    let prefix_str = prefix_str.trim_end_matches('/');
1154    let suggestion = |target_leaf: &str| {
1155        Some(format!(
1156            "expected `link to {prefix_str}/`; replace with [[{prefix_str}/{target_leaf}]]"
1157        ))
1158    };
1159
1160    let links = frontmatter_links_for_key(fm_yaml, field, 2);
1161    if links.is_empty() {
1162        // No wiki-link in the field's value → it's a plain string.
1163        let raw = frontmatter_raw_value_for_key(fm_yaml, field, 2).unwrap_or_default();
1164        let raw = raw.trim().trim_matches('"').trim_matches('\'').trim();
1165        let leaf = slugish(raw);
1166        push(
1167            issues,
1168            Severity::Error,
1169            codes::SCHEMA_LINK_PREFIX_MISMATCH,
1170            rel,
1171            line,
1172            Some(field.to_string()),
1173            format!(
1174                "`{field}` is a plain string {raw:?}, expected a wiki-link under `{prefix_str}/`"
1175            ),
1176            suggestion(&leaf),
1177            vec![],
1178        );
1179        return;
1180    }
1181
1182    for link in links {
1183        if link.target.ends_with(".md") {
1184            let bare = link.target.trim_end_matches(".md");
1185            push(
1186                issues,
1187                Severity::Warning,
1188                codes::WIKI_LINK_HAS_EXTENSION,
1189                rel,
1190                Some(link.line),
1191                Some(field.to_string()),
1192                format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
1193                Some(format!("drop the extension: [[{bare}]]")),
1194                vec![],
1195            );
1196        }
1197        let bare = link.target.trim_end_matches(".md");
1198        if !path_under_prefix(bare, prefix_str) {
1199            let leaf = bare.rsplit('/').next().unwrap_or(bare);
1200            push(
1201                issues,
1202                Severity::Error,
1203                codes::SCHEMA_LINK_PREFIX_MISMATCH,
1204                rel,
1205                line,
1206                Some(field.to_string()),
1207                format!("`{field}` target `{bare}` is not under `{prefix_str}/`"),
1208                suggestion(leaf),
1209                vec![],
1210            );
1211        } else {
1212            // Correct prefix — still surface a broken target so the agent sees
1213            // one consistent vocabulary. Resolve like the graph engine (literal
1214            // path first, then `.md`) so a `link to sources/` field pointing at a
1215            // raw `.eml`/`.pdf` source isn't wrongly flagged broken.
1216            match resolve_wiki_target(store, bare) {
1217                TargetResolution::Exists => {}
1218                TargetResolution::Missing => push(
1219                    issues,
1220                    Severity::Error,
1221                    codes::WIKI_LINK_BROKEN,
1222                    rel,
1223                    line,
1224                    Some(field.to_string()),
1225                    format!("wiki-link target `{bare}` doesn't exist"),
1226                    Some(format!(
1227                        "create `{bare}.md`, or point the link at an existing file"
1228                    )),
1229                    vec![],
1230                ),
1231                TargetResolution::Unsafe => push(
1232                    issues,
1233                    Severity::Error,
1234                    codes::WIKI_LINK_BROKEN,
1235                    rel,
1236                    line,
1237                    Some(field.to_string()),
1238                    format!("wiki-link target `{bare}` is not a safe store-relative path"),
1239                    Some("use a full store-relative path under sources/ or records/".into()),
1240                    vec![],
1241                ),
1242            }
1243        }
1244    }
1245}
1246
1247/// Shape enforcement for a non-link, non-enum schema field.
1248fn check_schema_shape(
1249    rel: &Path,
1250    field: &str,
1251    value: &Value,
1252    shape: Shape,
1253    line: Option<u32>,
1254    issues: &mut Vec<Issue>,
1255) {
1256    let s = scalar_string(value).unwrap_or_default();
1257    let ok = match shape {
1258        Shape::String => true, // any scalar string
1259        Shape::Int => value.is_i64() || value.is_u64() || s.trim().parse::<i64>().is_ok(),
1260        Shape::Bool => value.is_bool() || matches!(s.trim(), "true" | "false"),
1261        Shape::Date => is_iso8601_date_or_datetime(&s),
1262        Shape::Email => is_email(&s),
1263        Shape::Currency => is_currency(&s),
1264        Shape::Url => is_url(&s),
1265    };
1266    if !ok {
1267        push(
1268            issues,
1269            Severity::Error,
1270            codes::SCHEMA_SHAPE_MISMATCH,
1271            rel,
1272            line,
1273            Some(field.to_string()),
1274            format!("`{field}` value {s:?} doesn't match shape {shape:?}"),
1275            Some(shape_suggestion(shape)),
1276            vec![],
1277        );
1278    }
1279}
1280
1281// ─────────────────────────────────────────────────────────────────────────────
1282//  Cross-file: entity-dedup collisions (validate_all only)
1283// ─────────────────────────────────────────────────────────────────────────────
1284
1285/// Hard `DUP_ID` + the soft, schema-declared `DUP_UNIQUE_KEY` collisions.
1286///
1287/// `DUP_ID` is universal (two files with the same explicit `id`).
1288/// `DUP_UNIQUE_KEY` is driven entirely by the store's `DB.md ## Schemas`: each
1289/// `- unique: <field>[, <field> …]` directive on a `### <type>` declares a
1290/// uniqueness constraint, and two records of that type whose declared values
1291/// collide warn. No type carries a built-in dedup key — the store opts in.
1292///
1293/// **Reporting precedence (rule #1 in `corpus-b-edges/EXPECTED/README.md`):** a
1294/// collision group of N files yields exactly ONE issue, not N. Its `file` is the
1295/// lexicographically smallest store-relative path in the group (a total order →
1296/// deterministic); `related` is the rest, sorted. A single-field key anchors to
1297/// that field's line on the reported file and carries it as `key`; a multi-field
1298/// key anchors to line 1 with a null key.
1299fn check_duplicates(store: &Store, parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
1300    // Path → frontmatter YAML, for resolving the anchor field's line on the
1301    // reported (smallest-path) member.
1302    let fm_yaml_of: HashMap<&PathBuf, &str> = parsed
1303        .iter()
1304        .map(|(rel, p)| (rel, p.fm_yaml.as_str()))
1305        .collect();
1306
1307    // ── DUP_ID (hard error): two files with the same explicit `id`. ──────────
1308    let mut by_id: HashMap<String, Vec<PathBuf>> = HashMap::new();
1309    for (rel, p) in parsed {
1310        if let Some(map) = &p.fm {
1311            if let Some(id) = map.get("id").and_then(scalar_string) {
1312                if !id.trim().is_empty() {
1313                    by_id.entry(id).or_default().push(rel.clone());
1314                }
1315            }
1316        }
1317    }
1318    for (id, files) in &by_id {
1319        if files.len() > 1 {
1320            let (reported, related) = canonical_and_related(files);
1321            let line = fm_yaml_of.get(&reported).and_then(|y| fm_key_line(y, "id"));
1322            push(
1323                issues,
1324                Severity::Error,
1325                codes::DUP_ID,
1326                &reported,
1327                line,
1328                Some("id".into()),
1329                format!("id {id:?} is declared by more than one file"),
1330                Some("give each file a unique `id` (or drop it to derive from the path)".into()),
1331                related,
1332            );
1333        }
1334    }
1335
1336    // ── DUP_UNIQUE_KEY (warning): schema-declared `unique:` collisions. ───────
1337    // Every constraint comes from the store's `## Schemas`; a type with no
1338    // `unique:` directive is never dedup-checked. Iteration over the BTreeMap is
1339    // key-ordered, so emitted issues are deterministic across runs.
1340    for (type_name, schema) in &store.config.schemas {
1341        for key_fields in &schema.unique_keys {
1342            soft_dup(parsed, issues, type_name, key_fields, &fm_yaml_of);
1343        }
1344    }
1345}
1346
1347/// Emit ONE `DUP_UNIQUE_KEY` warning per group of ≥2 files of `type_` whose
1348/// declared `key_fields` render to the same token tuple. Files missing any key
1349/// field are skipped — an incomplete key is never a collision.
1350///
1351/// Per reporting rule #1 the issue is keyed on the lexicographically smallest
1352/// store-relative path; `related` is the rest. A single-field key anchors to
1353/// that field's line on the reported file and carries it as `key`; a multi-field
1354/// key anchors to line 1 with a null key. `fm_yaml_of` resolves the field line.
1355fn soft_dup(
1356    parsed: &[(PathBuf, Parsed)],
1357    issues: &mut Vec<Issue>,
1358    type_: &str,
1359    key_fields: &[String],
1360    fm_yaml_of: &HashMap<&PathBuf, &str>,
1361) {
1362    if key_fields.is_empty() {
1363        return;
1364    }
1365    let mut groups: HashMap<Vec<String>, Vec<PathBuf>> = HashMap::new();
1366    for (rel, p) in parsed {
1367        let is_type =
1368            p.fm.as_ref()
1369                .and_then(|m| m.get("type"))
1370                .and_then(scalar_string)
1371                .map(|t| t == type_)
1372                .unwrap_or(false);
1373        if !is_type {
1374            continue;
1375        }
1376        if let Some(key) = dedup_key(p, key_fields) {
1377            groups.entry(key).or_default().push(rel.clone());
1378        }
1379    }
1380    // HashMap iteration is nondeterministic; sort by reported member so the
1381    // emitted issue order is stable across runs.
1382    let mut collisions: Vec<(PathBuf, Vec<PathBuf>)> = groups
1383        .values()
1384        .filter(|files| files.len() > 1)
1385        .map(|files| canonical_and_related(files))
1386        .collect();
1387    collisions.sort_by(|a, b| a.0.cmp(&b.0));
1388
1389    let fields_disp = key_fields.join(", ");
1390    for (reported, related) in collisions {
1391        // Single-field keys anchor to the field's line + carry the key; multi-
1392        // field keys anchor to line 1 with a null key.
1393        let (line, key) = if key_fields.len() == 1 {
1394            (
1395                fm_yaml_of
1396                    .get(&reported)
1397                    .and_then(|y| fm_key_line(y, &key_fields[0])),
1398                Some(key_fields[0].clone()),
1399            )
1400        } else {
1401            (Some(1), None)
1402        };
1403        let n = related.len();
1404        push(
1405            issues,
1406            Severity::Warning,
1407            codes::DUP_UNIQUE_KEY,
1408            &reported,
1409            line,
1410            key,
1411            format!("`{type_}` unique key ({fields_disp}) collides with {n} other record(s)"),
1412            Some("merge with `dbmd rename`, or cross-link with `dbmd link`".into()),
1413            related,
1414        );
1415    }
1416}
1417
1418/// Render a type's `unique:` key for one file: each field's dedup token in
1419/// order, or `None` if any field is absent/empty (an incomplete key never
1420/// collides).
1421fn dedup_key(p: &Parsed, key_fields: &[String]) -> Option<Vec<String>> {
1422    let mut out = Vec::with_capacity(key_fields.len());
1423    for f in key_fields {
1424        out.push(dedup_token(p, f)?);
1425    }
1426    Some(out)
1427}
1428
1429/// One field's normalized dedup token, or `None` when absent/empty. Wiki-link
1430/// values (single or block-sequence list) reduce to their lower-cased target
1431/// path(s); a list collapses to a sorted, de-duplicated set so item order never
1432/// matters. Plain scalars (and YAML scalar lists) lower-case and trim.
1433fn dedup_token(p: &Parsed, field: &str) -> Option<String> {
1434    // Wiki-links first — read from the raw frontmatter text so the unquoted
1435    // `field: [[...]]` (a YAML nested-sequence, not a string) is handled.
1436    let links = frontmatter_links_for_key(&p.fm_yaml, field, 2);
1437    if !links.is_empty() {
1438        let set: BTreeSet<String> = links
1439            .into_iter()
1440            .map(|l| l.target.trim_end_matches(".md").to_lowercase())
1441            .filter(|t| !t.is_empty())
1442            .collect();
1443        return if set.is_empty() {
1444            None
1445        } else {
1446            Some(set.into_iter().collect::<Vec<_>>().join(","))
1447        };
1448    }
1449    match p.fm.as_ref()?.get(field) {
1450        Some(Value::Sequence(items)) => {
1451            let set: BTreeSet<String> = items
1452                .iter()
1453                .filter_map(scalar_string)
1454                .map(|s| s.trim().to_lowercase())
1455                .filter(|t| !t.is_empty())
1456                .collect();
1457            if set.is_empty() {
1458                None
1459            } else {
1460                Some(set.into_iter().collect::<Vec<_>>().join(","))
1461            }
1462        }
1463        Some(v) => {
1464            let s = scalar_string(v)?.trim().to_lowercase();
1465            if s.is_empty() {
1466                None
1467            } else {
1468                Some(s)
1469            }
1470        }
1471        None => None,
1472    }
1473}
1474
1475/// Split a non-empty collision group into `(reported, related)`: the
1476/// lexicographically smallest store-relative path is the reported member; the
1477/// rest, sorted ascending, are `related`. Deterministic because store-relative
1478/// path is a total order — the property reporting rule #1 relies on.
1479fn canonical_and_related(files: &[PathBuf]) -> (PathBuf, Vec<PathBuf>) {
1480    let mut sorted = files.to_vec();
1481    sorted.sort();
1482    let reported = sorted[0].clone();
1483    let related = sorted[1..].to_vec();
1484    (reported, related)
1485}
1486
1487// ─────────────────────────────────────────────────────────────────────────────
1488//  Cross-file: hierarchical index.md + index.jsonl sync (validate_all only)
1489// ─────────────────────────────────────────────────────────────────────────────
1490
1491/// All `INDEX_*` and `INDEX_JSONL_*` checks across the three canonical levels.
1492fn check_indexes(store: &Store, files: &[PathBuf], issues: &mut Vec<Issue>) {
1493    // Group content files by their immediate parent folder (the type-folder,
1494    // *across date shards* — a sharded file's "type folder" is the folder right
1495    // under the layer). We key on the type-folder so shards roll up correctly.
1496    let mut type_folders: BTreeMap<PathBuf, Vec<PathBuf>> = BTreeMap::new();
1497    let mut layers_present: BTreeSet<&'static str> = BTreeSet::new();
1498    for rel in files {
1499        // The layer is the first path component — recorded independently of the
1500        // type-folder so a layer containing only loose files still requires an
1501        // `index.md`.
1502        if let Some(layer) = rel.iter().next().and_then(|s| s.to_str()) {
1503            match layer {
1504                "sources" => layers_present.insert("sources"),
1505                "records" => layers_present.insert("records"),
1506                _ => false,
1507            };
1508        }
1509        if let Some(tf) = type_folder_of(rel) {
1510            type_folders.entry(tf).or_default().push(rel.clone());
1511        }
1512    }
1513
1514    // ── Root index.md ─────────────────────────────────────────────────────────
1515    if !files.is_empty() {
1516        let root_index = store.root.join("index.md");
1517        if !root_index.is_file() {
1518            push(
1519                issues,
1520                Severity::Error,
1521                codes::INDEX_MISSING,
1522                Path::new("index.md"),
1523                None,
1524                None,
1525                "store has files but no root `index.md`".into(),
1526                Some("run `dbmd index rebuild`".into()),
1527                vec![],
1528            );
1529        } else {
1530            check_index_scope(store, Path::new("index.md"), "root", None, issues);
1531        }
1532    }
1533
1534    // ── Layer index.md ────────────────────────────────────────────────────────
1535    for layer in &layers_present {
1536        let layer_index_rel = PathBuf::from(layer).join("index.md");
1537        let abs = store.root.join(&layer_index_rel);
1538        if !abs.is_file() {
1539            push(
1540                issues,
1541                Severity::Error,
1542                codes::INDEX_MISSING,
1543                &layer_index_rel,
1544                None,
1545                None,
1546                format!("layer `{layer}/` has files but no `index.md`"),
1547                Some("run `dbmd index rebuild`".into()),
1548                vec![],
1549            );
1550        } else {
1551            check_index_scope(store, &layer_index_rel, "layer", Some(layer), issues);
1552        }
1553    }
1554
1555    // ── Type-folder index.md + index.jsonl ───────────────────────────────────
1556    for (tf, members) in &type_folders {
1557        let index_md_rel = tf.join("index.md");
1558        let index_md_abs = store.root.join(&index_md_rel);
1559        let index_md_present = index_md_abs.is_file();
1560        if !index_md_present {
1561            // The whole folder index is absent → a single `INDEX_MISSING` keyed
1562            // on the FOLDER (not the would-be `index.md` path). When the index is
1563            // entirely missing we do NOT additionally evaluate per-entry
1564            // completeness or the `index.jsonl` twin: one `INDEX_MISSING` covers
1565            // the folder (precedence rule #4 in `corpus-b-edges/EXPECTED`).
1566            push(
1567                issues,
1568                Severity::Error,
1569                codes::INDEX_MISSING,
1570                tf,
1571                None,
1572                None,
1573                format!("non-empty folder `{}` has no index.md", tf.display()),
1574                Some(format!(
1575                    "run `dbmd index rebuild --folder {}`",
1576                    tf.display()
1577                )),
1578                vec![],
1579            );
1580            continue;
1581        }
1582
1583        check_index_scope(store, &index_md_rel, "type-folder", tf.to_str(), issues);
1584        check_type_folder_index_md(store, tf, &index_md_rel, members, issues);
1585
1586        // index.jsonl twin — must exist and be complete (uncapped). Only checked
1587        // when the `index.md` is present (above): a folder whose entire index is
1588        // missing is one `INDEX_MISSING`, not also an `INDEX_JSONL_MISSING`.
1589        let jsonl_rel = tf.join("index.jsonl");
1590        let jsonl_abs = store.root.join(&jsonl_rel);
1591        if !jsonl_abs.is_file() {
1592            push(
1593                issues,
1594                Severity::Error,
1595                codes::INDEX_JSONL_MISSING,
1596                &jsonl_rel,
1597                None,
1598                None,
1599                format!("type-folder `{}/` has no `index.jsonl` twin", tf.display()),
1600                Some("run `dbmd index rebuild`".into()),
1601                vec![],
1602            );
1603        } else {
1604            check_type_folder_index_jsonl(store, tf, &jsonl_rel, members, issues);
1605        }
1606    }
1607
1608    // ── Orphan index.md: an index file in a folder with no content. ──────────
1609    for rel in walk_index_files(&store.root) {
1610        let parent = rel.parent().unwrap_or(Path::new("")).to_path_buf();
1611        let parent_str = parent.to_string_lossy().to_string();
1612        let is_canonical = parent_str.is_empty() // root
1613            || matches!(parent_str.as_str(), "sources" | "records")
1614            || type_folders.contains_key(&parent);
1615        if !is_canonical {
1616            push(
1617                issues,
1618                Severity::Warning,
1619                codes::INDEX_ORPHAN,
1620                &rel,
1621                None,
1622                None,
1623                format!(
1624                    "`{}` sits in an empty or non-canonical folder",
1625                    rel.display()
1626                ),
1627                Some("remove it, or run `dbmd index rebuild`".into()),
1628                vec![],
1629            );
1630        }
1631    }
1632}
1633
1634/// Check a type-folder `index.md`'s entries against the folder's actual files:
1635/// stale entries (target gone), missing entries (file not listed), and
1636/// summary mismatches.
1637fn check_type_folder_index_md(
1638    store: &Store,
1639    tf: &Path,
1640    index_rel: &Path,
1641    members: &[PathBuf],
1642    issues: &mut Vec<Issue>,
1643) {
1644    let abs = store.root.join(index_rel);
1645    let Ok(text) = std::fs::read_to_string(&abs) else {
1646        return;
1647    };
1648    let entries = parse_index_entries(&text);
1649
1650    let listed: BTreeSet<PathBuf> = entries
1651        .iter()
1652        .map(|e| PathBuf::from(e.target.trim_end_matches(".md")))
1653        .collect();
1654
1655    // Stale entries + summary mismatch.
1656    for entry in &entries {
1657        let bare = entry.target.trim_end_matches(".md");
1658        // Resolve like the graph engine (literal path first, then `.md`) so an
1659        // index entry naming a raw `.eml`/`.pdf` source isn't reported stale.
1660        let target_abs = match resolved_target_abs(store, bare) {
1661            Some(abs) => abs,
1662            None => {
1663                if matches!(resolve_wiki_target(store, bare), TargetResolution::Unsafe) {
1664                    push(
1665                        issues,
1666                        Severity::Error,
1667                        codes::INDEX_STALE_ENTRY,
1668                        index_rel,
1669                        Some(entry.line),
1670                        None,
1671                        format!("index entry `[[{bare}]]` is not a safe store-relative path"),
1672                        Some("run `dbmd index rebuild`".into()),
1673                        vec![],
1674                    );
1675                } else {
1676                    push(
1677                        issues,
1678                        Severity::Error,
1679                        codes::INDEX_STALE_ENTRY,
1680                        index_rel,
1681                        Some(entry.line),
1682                        None,
1683                        format!("index entry `[[{bare}]]` points at a missing file"),
1684                        Some("run `dbmd index rebuild`".into()),
1685                        // The stale target the entry names (the file that no
1686                        // longer exists) — so the agent can locate the dangling
1687                        // reference.
1688                        vec![PathBuf::from(format!("{bare}.md"))],
1689                    );
1690                }
1691                continue;
1692            }
1693        };
1694        // Summary mismatch: the entry text must equal the file's `summary`. A
1695        // bare `- [[path]]` entry (no `— <text>`) when the file HAS a non-empty
1696        // summary is also a mismatch — the SPEC requires every type-folder index
1697        // entry to quote the file's `summary` (`- [[path]] — <summary>`), so a
1698        // missing quote can't validate clean just because there's nothing to
1699        // compare.
1700        if let Some(expected) = read_summary(&target_abs) {
1701            match &entry.summary_text {
1702                // Compare with the SAME whitespace normalization the renderer
1703                // applies when it writes the `index.md` browse line
1704                // (`format_md_entry` -> `collapse_whitespace`). `text_part` is the
1705                // already-collapsed text parsed back out of `index.md`; `expected`
1706                // is the RAW file summary. Comparing a collapsed value against a
1707                // raw one falsely flagged any valid one-line summary that carries
1708                // internal whitespace (a double space, a tab) — a permanent,
1709                // rebuild-immune INDEX_SUMMARY_MISMATCH that wedged the store, since
1710                // `index rebuild` regenerates the byte-identical collapsed line.
1711                // Normalizing both sides makes the check compare like with like.
1712                Some(text_part)
1713                    if crate::summary::collapse_whitespace(text_part)
1714                        != crate::summary::collapse_whitespace(&expected) =>
1715                {
1716                    push(
1717                        issues,
1718                        Severity::Error,
1719                        codes::INDEX_SUMMARY_MISMATCH,
1720                        index_rel,
1721                        Some(entry.line),
1722                        None,
1723                        format!("index entry for `{bare}` text doesn't match the file's `summary`"),
1724                        Some("run `dbmd index rebuild`".into()),
1725                        vec![PathBuf::from(format!("{bare}.md"))],
1726                    );
1727                }
1728                None if !expected.trim().is_empty() => {
1729                    push(
1730                        issues,
1731                        Severity::Error,
1732                        codes::INDEX_SUMMARY_MISMATCH,
1733                        index_rel,
1734                        Some(entry.line),
1735                        None,
1736                        format!("index entry for `{bare}` is missing its summary text (the file has a `summary`)"),
1737                        Some("run `dbmd index rebuild`".into()),
1738                        vec![PathBuf::from(format!("{bare}.md"))],
1739                    );
1740                }
1741                _ => {}
1742            }
1743        }
1744    }
1745
1746    // Missing entries: a member file not listed. Skip the index/log meta files.
1747    // The browse view caps at 500; only flag a missing entry when the folder is
1748    // under the cap (a capped folder legitimately omits older files).
1749    let content_members: Vec<&PathBuf> = members.iter().filter(|m| is_content_file(m)).collect();
1750    if content_members.len() <= 500 {
1751        for m in content_members {
1752            let bare = PathBuf::from(m.to_string_lossy().trim_end_matches(".md").to_string());
1753            if !listed.contains(&bare) {
1754                push(
1755                    issues,
1756                    Severity::Error,
1757                    codes::INDEX_MISSING_ENTRY,
1758                    index_rel,
1759                    None,
1760                    None,
1761                    format!(
1762                        "file `{}` is not listed in its folder's `index.md`",
1763                        m.display()
1764                    ),
1765                    Some("run `dbmd index rebuild`".into()),
1766                    vec![(*m).clone()],
1767                );
1768            }
1769        }
1770    }
1771    let _ = tf;
1772}
1773
1774/// Check a type-folder `index.jsonl` twin: it must list **every** file in the
1775/// folder (uncapped), every record must point at a real file, and each record's
1776/// fields must match the file's frontmatter.
1777fn check_type_folder_index_jsonl(
1778    store: &Store,
1779    tf: &Path,
1780    jsonl_rel: &Path,
1781    members: &[PathBuf],
1782    issues: &mut Vec<Issue>,
1783) {
1784    let abs = store.root.join(jsonl_rel);
1785    let Ok(text) = std::fs::read_to_string(&abs) else {
1786        return;
1787    };
1788
1789    // Parse records (last-write-wins by path), tolerating tombstones/blank lines.
1790    let mut records: BTreeMap<PathBuf, serde_json::Value> = BTreeMap::new();
1791    for (i, line) in text.lines().enumerate() {
1792        let line = line.trim();
1793        if line.is_empty() {
1794            continue;
1795        }
1796        let rec: serde_json::Value = match serde_json::from_str(line) {
1797            Ok(v) => v,
1798            Err(e) => {
1799                push(
1800                    issues,
1801                    Severity::Error,
1802                    codes::INDEX_JSONL_DESYNC,
1803                    jsonl_rel,
1804                    Some((i + 1) as u32),
1805                    None,
1806                    format!("`index.jsonl` line {} is not valid JSON: {e}", i + 1),
1807                    Some("run `dbmd index rebuild`".into()),
1808                    vec![],
1809                );
1810                continue;
1811            }
1812        };
1813        if let Some(path) = rec.get("path").and_then(|v| v.as_str()) {
1814            if !is_safe_store_relative_path(Path::new(path)) {
1815                push(
1816                    issues,
1817                    Severity::Error,
1818                    codes::INDEX_JSONL_DESYNC,
1819                    jsonl_rel,
1820                    Some((i + 1) as u32),
1821                    None,
1822                    format!("`index.jsonl` record path `{path}` is not a safe store-relative path"),
1823                    Some("run `dbmd index rebuild`".into()),
1824                    vec![],
1825                );
1826                continue;
1827            }
1828            records.insert(PathBuf::from(path), rec);
1829        }
1830    }
1831
1832    let member_set: BTreeSet<PathBuf> = members
1833        .iter()
1834        .filter(|m| is_content_file(m))
1835        .cloned()
1836        .collect();
1837
1838    // jsonl record → missing file = desync.
1839    for path in records.keys() {
1840        let target_abs = store.root.join(path);
1841        if !target_abs.is_file() {
1842            push(
1843                issues,
1844                Severity::Error,
1845                codes::INDEX_JSONL_DESYNC,
1846                jsonl_rel,
1847                None,
1848                None,
1849                format!(
1850                    "`index.jsonl` record points at missing file `{}`",
1851                    path.display()
1852                ),
1853                Some("run `dbmd index rebuild`".into()),
1854                vec![],
1855            );
1856        }
1857    }
1858
1859    // file not in jsonl = desync (the jsonl is the complete twin — no cap).
1860    for m in &member_set {
1861        if !records.contains_key(m) {
1862            push(
1863                issues,
1864                Severity::Error,
1865                codes::INDEX_JSONL_DESYNC,
1866                jsonl_rel,
1867                None,
1868                None,
1869                format!(
1870                    "file `{}` is missing from the complete `index.jsonl`",
1871                    m.display()
1872                ),
1873                Some("run `dbmd index rebuild`".into()),
1874                vec![m.clone()],
1875            );
1876        }
1877    }
1878
1879    // Record fields stale vs. frontmatter. SPEC § Validation defines
1880    // `INDEX_JSONL_STALE` as "an `index.jsonl` record's fields don't match the
1881    // file's frontmatter" — ANY field, not just `summary`/`type`. The query and
1882    // search paths read every field straight from these sidecars (`tags`,
1883    // `links`, `created`, `updated`, plus type-specific `email` / `domain` /
1884    // `company` / `amount` / `vendor` …), so a single field left unchecked lets
1885    // a stale value answer queries with data that exists in no `.md` file.
1886    //
1887    // Rather than re-list (and drift from) every projected key, rebuild the
1888    // record the canonical projection would write for this file
1889    // ([`IndexRecord::expected_from_file`], the same path `index rebuild` uses)
1890    // and diff the two as flat JSON maps. Every key the projection emits is
1891    // covered automatically; `path` is the join key and is skipped.
1892    for (path, rec) in &records {
1893        let target_abs = store.root.join(path);
1894        if !target_abs.is_file() {
1895            continue;
1896        }
1897        let Ok(expected) = crate::index::IndexRecord::expected_from_file(&target_abs, path.clone())
1898        else {
1899            continue; // unreadable / unparseable frontmatter is reported elsewhere
1900        };
1901        let Ok(expected_json) = serde_json::to_value(&expected) else {
1902            continue;
1903        };
1904        let (Some(have), Some(want)) = (rec.as_object(), expected_json.as_object()) else {
1905            continue;
1906        };
1907
1908        // Compare the union of keys present on either side; a key the file
1909        // projects but the sidecar omits is just as stale as a wrong value.
1910        let mut mismatched_keys: BTreeSet<&str> = BTreeSet::new();
1911        for key in have.keys().chain(want.keys()) {
1912            if key == "path" {
1913                continue;
1914            }
1915            if have.get(key) != want.get(key) {
1916                mismatched_keys.insert(key);
1917            }
1918        }
1919
1920        if !mismatched_keys.is_empty() {
1921            let keys: Vec<&str> = mismatched_keys.into_iter().collect();
1922            push(
1923                issues,
1924                Severity::Error,
1925                codes::INDEX_JSONL_STALE,
1926                jsonl_rel,
1927                None,
1928                Some(keys.join(",")),
1929                format!(
1930                    "`index.jsonl` record for `{}` is stale ({})",
1931                    path.display(),
1932                    keys.join(", ")
1933                ),
1934                Some("run `dbmd index rebuild`".into()),
1935                vec![path.clone()],
1936            );
1937        }
1938    }
1939    let _ = tf;
1940}
1941
1942/// Check an index's `scope:` frontmatter against its filesystem location.
1943fn check_index_scope(
1944    store: &Store,
1945    index_rel: &Path,
1946    expected_scope: &str,
1947    expected_folder: Option<&str>,
1948    issues: &mut Vec<Issue>,
1949) {
1950    let abs = store.root.join(index_rel);
1951    let Ok(text) = std::fs::read_to_string(&abs) else {
1952        return;
1953    };
1954    let Some((yaml, _, _)) = split_frontmatter(&text) else {
1955        return;
1956    };
1957    let Ok(Value::Mapping(map)) = serde_norway::from_str::<Value>(&yaml) else {
1958        return;
1959    };
1960    let fm = yaml_map_to_btree(&map);
1961
1962    if let Some(scope) = fm.get("scope").and_then(scalar_string) {
1963        // Accept "type-folder" and the SPEC example's looser "folder" alias.
1964        let scope_ok =
1965            scope == expected_scope || (expected_scope == "type-folder" && scope == "folder");
1966        if !scope_ok {
1967            push(
1968                issues,
1969                Severity::Warning,
1970                codes::INDEX_WRONG_SCOPE,
1971                index_rel,
1972                fm_key_line(&yaml, "scope"),
1973                Some("scope".into()),
1974                format!(
1975                    "index `scope: {scope}` doesn't match location (expected `{expected_scope}`)"
1976                ),
1977                Some(format!("set `scope: {expected_scope}`")),
1978                vec![],
1979            );
1980        }
1981    }
1982    // folder: must match for layer/type-folder indexes.
1983    if let Some(expected) = expected_folder {
1984        if let Some(folder) = fm.get("folder").and_then(scalar_string) {
1985            if folder.trim_end_matches('/') != expected.trim_end_matches('/') {
1986                push(
1987                    issues,
1988                    Severity::Warning,
1989                    codes::INDEX_WRONG_SCOPE,
1990                    index_rel,
1991                    fm_key_line(&yaml, "folder"),
1992                    Some("folder".into()),
1993                    format!("index `folder: {folder}` doesn't match location `{expected}`"),
1994                    Some(format!("set `folder: {expected}`")),
1995                    vec![],
1996                );
1997            }
1998        }
1999    }
2000}
2001
2002// ─────────────────────────────────────────────────────────────────────────────
2003//  Cross-file: log.md well-formedness + ordering (validate_all only)
2004// ─────────────────────────────────────────────────────────────────────────────
2005
2006/// `LOG_*` checks: bad timestamps, unknown kinds, out-of-order entries — across
2007/// the active `log.md` AND the rotated `log/<YYYY-MM>.md` archives.
2008///
2009/// [`Log::append`] rolls strictly-prior-month entries into `log/<YYYY-MM>.md`,
2010/// and `Log::tail`/`Log::since` deliberately read those archives back. If the
2011/// LOG_* checks read only the active file, an entry `validate --all` flagged
2012/// while it lived in `log.md` would stop being flagged the moment a newer-month
2013/// append rotated it into an archive — even though the log readers still surface
2014/// that exact entry to the curator. Scanning the archives too keeps validate and
2015/// the readers in agreement after a rotation.
2016///
2017/// Order: archives oldest-month first, then the active `log.md` last — the true
2018/// chronological timeline — so the out-of-order check threads `prev` across the
2019/// rotation boundary the same way it does within a single file.
2020fn check_log(store: &Store, issues: &mut Vec<Issue>) {
2021    let mut prev: Option<DateTime<FixedOffset>> = None;
2022    for rel in log_files_chronological(store) {
2023        check_log_file(store, &rel, &mut prev, issues);
2024    }
2025}
2026
2027/// The log files to scan, in chronological order: every `log/<YYYY-MM>.md`
2028/// archive oldest-month first, then the active `log.md` last. Missing files are
2029/// simply absent from the list.
2030fn log_files_chronological(store: &Store) -> Vec<PathBuf> {
2031    let mut files: Vec<PathBuf> = Vec::new();
2032    let archive_dir = store.root.join("log");
2033    if let Ok(entries) = std::fs::read_dir(&archive_dir) {
2034        let mut archives: Vec<PathBuf> = entries
2035            .flatten()
2036            .map(|e| e.path())
2037            .filter(|p| {
2038                p.is_file()
2039                    && p.file_name()
2040                        .and_then(|s| s.to_str())
2041                        .and_then(|n| n.strip_suffix(".md"))
2042                        .is_some_and(is_year_month_archive)
2043            })
2044            .filter_map(|p| p.strip_prefix(&store.root).ok().map(Path::to_path_buf))
2045            .collect();
2046        // `YYYY-MM` stems sort lexically == chronologically; oldest first.
2047        archives.sort();
2048        files.extend(archives);
2049    }
2050    // The active file holds the current month — newest, so it comes last.
2051    if store.root.join("log.md").is_file() {
2052        files.push(PathBuf::from("log.md"));
2053    }
2054    files
2055}
2056
2057/// Scan one log file's entry headers, threading the running `prev` timestamp so
2058/// the out-of-order check spans file (rotation) boundaries. Issues anchor to the
2059/// given store-relative path so an archived entry points at its archive file.
2060fn check_log_file(
2061    store: &Store,
2062    log_rel: &Path,
2063    prev: &mut Option<DateTime<FixedOffset>>,
2064    issues: &mut Vec<Issue>,
2065) {
2066    let abs = store.root.join(log_rel);
2067    let Ok(text) = std::fs::read_to_string(&abs) else {
2068        return;
2069    };
2070
2071    for (i, line) in text.lines().enumerate() {
2072        if !line.starts_with("## [") {
2073            continue;
2074        }
2075        let line_no = (i + 1) as u32;
2076        match parse_log_header(line) {
2077            None => push(
2078                issues,
2079                Severity::Error,
2080                codes::LOG_BAD_TIMESTAMP,
2081                log_rel,
2082                Some(line_no),
2083                None,
2084                format!("log entry header has an unparseable timestamp: {line:?}"),
2085                Some("use `## [YYYY-MM-DD HH:MM] <kind> | <object>`".into()),
2086                vec![],
2087            ),
2088            Some((ts, kind, _object)) => {
2089                if !RECOGNIZED_LOG_KINDS.contains(&kind.as_str()) {
2090                    push(
2091                        issues,
2092                        Severity::Warning,
2093                        codes::LOG_UNKNOWN_KIND,
2094                        log_rel,
2095                        Some(line_no),
2096                        None,
2097                        format!("log entry kind `{kind}` is not recognized"),
2098                        Some(format!("use one of: {}", RECOGNIZED_LOG_KINDS.join(", "))),
2099                        vec![],
2100                    );
2101                }
2102                if let Some(p) = *prev {
2103                    if ts < p {
2104                        push(
2105                            issues,
2106                            Severity::Warning,
2107                            codes::LOG_OUT_OF_ORDER,
2108                            log_rel,
2109                            Some(line_no),
2110                            None,
2111                            "log entry is older than the entry above it (possible rewrite)".into(),
2112                            Some("append corrective entries; never reorder past ones".into()),
2113                            vec![],
2114                        );
2115                    }
2116                }
2117                *prev = Some(ts);
2118            }
2119        }
2120    }
2121}
2122
2123// ─────────────────────────────────────────────────────────────────────────────
2124//  Self-contained primitives (collapse onto sibling modules once they land)
2125// ─────────────────────────────────────────────────────────────────────────────
2126
2127/// A minimal wiki-link found in a body: target, optional display, 1-based line.
2128#[derive(Debug)]
2129struct Link {
2130    target: String,
2131    line: u32,
2132}
2133
2134/// True if the store marker (`DB.md`, uppercase) is present at the root. On a
2135/// case-insensitive filesystem `db.md` would also match `DB.md`; we require the
2136/// exact-cased directory entry to be present.
2137fn store_marker_present(store: &Store) -> bool {
2138    let want = store.root.join("DB.md");
2139    if !want.is_file() {
2140        return false;
2141    }
2142    // Reject a case-folded match (`db.md`) on case-insensitive filesystems.
2143    match std::fs::read_dir(&store.root) {
2144        Ok(entries) => entries
2145            .flatten()
2146            .any(|e| e.file_name().to_str() == Some("DB.md")),
2147        Err(_) => true, // can't enumerate; trust the is_file() above
2148    }
2149}
2150
2151/// Validate the store's identity file, `DB.md`: its frontmatter `type:` must be
2152/// `db-md`, it must carry both `scope` and `owner`, and its body may contain
2153/// only the three recognized `##` sections (`Agent instructions`, `Policies`,
2154/// `Schemas`).
2155///
2156/// `DB.md` is not a content file (no `summary`), so it is checked here rather
2157/// than through `check_content_file`. The marker presence is established by the
2158/// caller (`store_marker_present`); a malformed-frontmatter `DB.md` still counts
2159/// as a store (the marker is the filename), so we report its shape rather than
2160/// `NOT_A_STORE`. Issues anchor to `DB.md` as the store-relative path.
2161fn check_db_md(store: &Store, issues: &mut Vec<Issue>) {
2162    let rel = Path::new("DB.md");
2163    let abs = store.root.join("DB.md");
2164    let Ok(text) = std::fs::read_to_string(&abs) else {
2165        return; // marker present but unreadable: nothing more to say.
2166    };
2167
2168    let Some((fm_yaml, body, fm_end_line)) = split_frontmatter(&text) else {
2169        // No frontmatter block at all → it cannot declare `type: db-md` and has
2170        // neither required field. Report the type and both missing fields,
2171        // anchored to line 1 (the would-be opening fence).
2172        push(
2173            issues,
2174            Severity::Error,
2175            codes::DB_MD_BAD_TYPE,
2176            rel,
2177            Some(1),
2178            Some("type".into()),
2179            "DB.md has no frontmatter; it must declare `type: db-md`".into(),
2180            Some("add a `---` frontmatter block with `type: db-md`".into()),
2181            vec![],
2182        );
2183        for field in ["scope", "owner"] {
2184            push(
2185                issues,
2186                Severity::Error,
2187                codes::DB_MD_MISSING_FIELD,
2188                rel,
2189                Some(1),
2190                Some(field.into()),
2191                format!("DB.md frontmatter is missing required field `{field}`"),
2192                Some(format!("add `{field}:` to the DB.md frontmatter")),
2193                vec![],
2194            );
2195        }
2196        return;
2197    };
2198
2199    // Parse the frontmatter mapping. If it doesn't parse, we can still say the
2200    // identity contract is unmet (no provable `type: db-md`, no provable fields).
2201    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
2202        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
2203        Ok(Value::Null) => Some(BTreeMap::new()),
2204        _ => None,
2205    };
2206
2207    match &fm {
2208        Some(map) => {
2209            // ── type: db-md ──────────────────────────────────────────────────
2210            let type_ = map.get("type").and_then(scalar_string);
2211            if type_.as_deref() != Some("db-md") {
2212                let (line, msg) = match &type_ {
2213                    Some(t) => (
2214                        fm_key_line(&fm_yaml, "type"),
2215                        format!("DB.md has `type: {t}`; a store's DB.md must be `type: db-md`"),
2216                    ),
2217                    None => (
2218                        Some(1),
2219                        "DB.md frontmatter has no `type:`; it must be `type: db-md`".to_string(),
2220                    ),
2221                };
2222                push(
2223                    issues,
2224                    Severity::Error,
2225                    codes::DB_MD_BAD_TYPE,
2226                    rel,
2227                    line,
2228                    Some("type".into()),
2229                    msg,
2230                    Some("set `type: db-md` in the DB.md frontmatter".into()),
2231                    vec![],
2232                );
2233            }
2234
2235            // ── required fields: scope + owner ───────────────────────────────
2236            for field in ["scope", "owner"] {
2237                let present = map
2238                    .get(field)
2239                    .and_then(scalar_string)
2240                    .map(|s| !s.trim().is_empty())
2241                    .unwrap_or(false);
2242                if !present {
2243                    push(
2244                        issues,
2245                        Severity::Error,
2246                        codes::DB_MD_MISSING_FIELD,
2247                        rel,
2248                        // A present-but-empty field anchors to its line; a fully
2249                        // absent one to the block top.
2250                        fm_key_line_or_top(&fm_yaml, field),
2251                        Some(field.into()),
2252                        format!("DB.md frontmatter is missing required field `{field}`"),
2253                        Some(format!("add `{field}:` to the DB.md frontmatter")),
2254                        vec![],
2255                    );
2256                }
2257            }
2258        }
2259        None => {
2260            // Unparseable frontmatter: the identity contract is unprovable. Emit
2261            // the type error and both field errors, anchored to the block top.
2262            push(
2263                issues,
2264                Severity::Error,
2265                codes::DB_MD_BAD_TYPE,
2266                rel,
2267                Some(1),
2268                Some("type".into()),
2269                "DB.md frontmatter isn't valid YAML; it must declare `type: db-md`".into(),
2270                Some("fix the DB.md frontmatter and set `type: db-md`".into()),
2271                vec![],
2272            );
2273            for field in ["scope", "owner"] {
2274                push(
2275                    issues,
2276                    Severity::Error,
2277                    codes::DB_MD_MISSING_FIELD,
2278                    rel,
2279                    Some(1),
2280                    Some(field.into()),
2281                    format!("DB.md frontmatter is missing required field `{field}`"),
2282                    Some(format!("add `{field}:` to the DB.md frontmatter")),
2283                    vec![],
2284                );
2285            }
2286        }
2287    }
2288
2289    // ── recognized `##` section headers only ─────────────────────────────────
2290    // The body's H2 headings must be one of the four the toolkit reads; any
2291    // other is a likely typo / misplacement (warning — the parser ignores it,
2292    // so the config is not corrupted, but the operator wrote a section that will
2293    // never be read). H3 sub-headings (Frozen pages, Ignored types, `### <type>`
2294    // schema blocks) live under their H2 and are not flagged here.
2295    //
2296    // `## Folders` is recognized: `parse_db_md` reads it into `Config.folders`
2297    // (parser.rs) and the index renders folder display names + descriptions from
2298    // it (index.rs `render_*_md_from_stats`). Flagging it `DB_MD_UNKNOWN_SECTION`
2299    // with "remove this heading" told the operator to delete a working,
2300    // round-tripped config block — destroying curator-authored rollup names. It
2301    // is a real, shipped section; SPEC.md documents it alongside the other three.
2302    for section in crate::parser::extract_sections(&body) {
2303        if section.level != 2 {
2304            continue;
2305        }
2306        let name = section.heading.trim().to_ascii_lowercase();
2307        if matches!(
2308            name.as_str(),
2309            "agent instructions" | "policies" | "schemas" | "folders"
2310        ) {
2311            continue;
2312        }
2313        // `Section::line` is 1-based within the body; the body begins at file
2314        // line `fm_end_line + 1`.
2315        let file_line = fm_end_line + section.line;
2316        push(
2317            issues,
2318            Severity::Warning,
2319            codes::DB_MD_UNKNOWN_SECTION,
2320            rel,
2321            Some(file_line),
2322            None,
2323            format!(
2324                "DB.md has an unrecognized `## {}` section",
2325                section.heading.trim()
2326            ),
2327            Some(
2328                "DB.md sections are `## Agent instructions`, `## Policies`, `## Schemas`, \
2329                 `## Folders` — remove or rename this heading"
2330                    .into(),
2331            ),
2332            vec![],
2333        );
2334    }
2335
2336    // ── `## Schemas` field-declaration lint ──────────────────────────────────
2337    // Without this, every schema misparse is silent: the operator/agent gets no
2338    // signal that DB.md is interpreting their schema differently from what they
2339    // wrote, and downstream records are validated against the degraded schema.
2340    check_db_md_schemas(store, rel, &body, fm_end_line, issues);
2341}
2342
2343/// Lint the parsed `## Schemas` field declarations: an empty field name, a
2344/// duplicate field name within a type, or an unrecognized modifier all parse
2345/// "successfully" into a degraded [`Schema`] today, so a bad declaration never
2346/// surfaces. The parsed schemas live in `store.config.schemas` (directives
2347/// already separated out); this pass reports the suspicious *field* shapes,
2348/// anchored to the `### <type>` heading line so the agent can find the block.
2349fn check_db_md_schemas(
2350    store: &Store,
2351    rel: &Path,
2352    body: &str,
2353    fm_end_line: u32,
2354    issues: &mut Vec<Issue>,
2355) {
2356    if store.config.schemas.is_empty() {
2357        return;
2358    }
2359
2360    // Map each `### <type>` heading (under `## Schemas`) to its file line, so a
2361    // per-type issue can anchor to the declaration block. `extract_sections`
2362    // returns a flat list with 1-based body lines; the body starts at file line
2363    // `fm_end_line + 1`.
2364    let mut type_line: BTreeMap<String, u32> = BTreeMap::new();
2365    let mut current_h2: Option<String> = None;
2366    for section in crate::parser::extract_sections(body) {
2367        match section.level {
2368            2 => current_h2 = Some(section.heading.trim().to_ascii_lowercase()),
2369            3 if current_h2.as_deref() == Some("schemas") => {
2370                // The H3 heading text (as written) is the type name — the same
2371                // key `parse_db_md` inserts into `config.schemas`.
2372                type_line
2373                    .entry(section.heading.trim().to_string())
2374                    .or_insert(fm_end_line + section.line);
2375            }
2376            _ => {}
2377        }
2378    }
2379
2380    for (type_name, schema) in &store.config.schemas {
2381        let line = type_line.get(type_name).copied();
2382        let mut seen: BTreeSet<String> = BTreeSet::new();
2383        for field in &schema.fields {
2384            let name = field.name.trim();
2385
2386            // Empty field name: a `- (string)` / bare `- ` bullet parses to a
2387            // nameless field that can never match a frontmatter key, so its
2388            // required/shape/enum constraints silently never apply.
2389            if name.is_empty() {
2390                push(
2391                    issues,
2392                    Severity::Warning,
2393                    codes::DB_MD_SCHEMA_FIELD,
2394                    rel,
2395                    line,
2396                    None,
2397                    format!("`### {type_name}` has a schema field bullet with no field name"),
2398                    Some(
2399                        "write each field as `- <name> (<modifiers>)`, e.g. `- email (required, email)`"
2400                            .into(),
2401                    ),
2402                    vec![],
2403                );
2404                continue;
2405            }
2406
2407            // Duplicate field name within a type: the second declaration's
2408            // constraints are interpreted independently of the first, so the
2409            // author's intent is ambiguous and likely wrong.
2410            if !seen.insert(name.to_string()) {
2411                push(
2412                    issues,
2413                    Severity::Warning,
2414                    codes::DB_MD_SCHEMA_FIELD,
2415                    rel,
2416                    line,
2417                    Some(name.to_string()),
2418                    format!("`### {type_name}` declares field `{name}` more than once"),
2419                    Some(
2420                        "remove the duplicate field bullet, or merge the modifiers onto one".into(),
2421                    ),
2422                    vec![],
2423                );
2424            }
2425
2426            // Unrecognized modifiers: the parser stashes anything outside the
2427            // known vocabulary (`required` / a shape / `link to …` / `default …`
2428            // / `enum: …`) in `unknown_modifiers`. Surface them as Info so a
2429            // typo'd modifier (`requierd`, `unqiue`) doesn't silently do nothing.
2430            for modifier in &field.unknown_modifiers {
2431                let modifier = modifier.trim();
2432                if modifier.is_empty() {
2433                    continue;
2434                }
2435                push(
2436                    issues,
2437                    Severity::Info,
2438                    codes::DB_MD_SCHEMA_FIELD,
2439                    rel,
2440                    line,
2441                    Some(name.to_string()),
2442                    format!(
2443                        "`### {type_name}` field `{name}` has an unrecognized modifier `{modifier}`"
2444                    ),
2445                    Some(
2446                        "recognized modifiers are `required`, a shape (`string`/`int`/`bool`/`date`/`email`/`currency`/`url`), `link to <prefix>/`, `default <value>`, `enum: <v1>, <v2>, …`"
2447                            .into(),
2448                    ),
2449                    vec![],
2450                );
2451            }
2452        }
2453    }
2454}
2455
2456/// The `NOT_A_STORE` issue for a root with no `DB.md`.
2457fn not_a_store_issue(store: &Store) -> Issue {
2458    Issue {
2459        severity: Severity::Error,
2460        code: codes::NOT_A_STORE,
2461        file: store.root.clone(),
2462        line: None,
2463        key: None,
2464        message: format!("{} has no DB.md; not a db.md store", store.root.display()),
2465        suggestion: Some("create a `DB.md` at the store root".into()),
2466        related: vec![],
2467    }
2468}
2469
2470/// True if a store-relative path is a content file: under `sources/` or
2471/// `records/` and not an `index.md`/`index.jsonl`/`log.md`.
2472fn is_content_file(rel: &Path) -> bool {
2473    // Defense in depth: a real content file is always a forward (Normal-only)
2474    // store-relative path. Reject any `..`/absolute/prefix component so a
2475    // malformed object slot judged only by its FIRST component (`records/../..`)
2476    // can never turn a per-file read into a store escape, even if a future caller
2477    // forgets the path-safety gate `changed_objects_since` now applies.
2478    if !is_safe_store_relative_path(rel) {
2479        return false;
2480    }
2481    let Some(first) = rel.iter().next().and_then(|s| s.to_str()) else {
2482        return false;
2483    };
2484    if !matches!(first, "sources" | "records") {
2485        return false;
2486    }
2487    let name = rel.file_name().and_then(|s| s.to_str()).unwrap_or("");
2488    // Only the derived catalog twins are meta INSIDE a layer. `DB.md` / `log.md`
2489    // are reserved meta only at the store ROOT, which the `first` layer check
2490    // above already excludes — so a content file named `log.md` / `DB.md` inside
2491    // a layer (e.g. `records/docs/log.md`) is real content, consistent with
2492    // `Store::walk`.
2493    if matches!(name, "index.md" | "index.jsonl") {
2494        return false;
2495    }
2496    name.ends_with(".md")
2497}
2498
2499/// True for the store's ROOT append-only meta files (`DB.md` / `log.md`): a
2500/// single-component store-relative path whose name is one of those two. An
2501/// in-layer `records/docs/log.md` is real content (multiple components), not a
2502/// root meta file. These reach `check_content_file` only via the working-set
2503/// incoming-linker scan; their bodies are deliberately not link-checked there
2504/// because `validate --all` doesn't link-check them either.
2505fn is_root_meta_file(rel: &Path) -> bool {
2506    let mut comps = rel.components();
2507    let Some(Component::Normal(only)) = comps.next() else {
2508        return false;
2509    };
2510    if comps.next().is_some() {
2511        return false; // has a parent dir → not a root file
2512    }
2513    matches!(only.to_str(), Some("DB.md") | Some("log.md"))
2514}
2515
2516/// True for a derived index-catalog file (`index.md` / `index.jsonl`) at any
2517/// depth. Its entries are GENERATED wiki-links to type-folder members, not
2518/// authored body links: in the working-set scope it is pulled in as an incoming
2519/// linker, but its integrity belongs to `check_indexes` under `--all` (which
2520/// reports a dangling entry as `INDEX_STALE_ENTRY`, not `WIKI_LINK_BROKEN`). So
2521/// `check_content_file` never body-link-checks it, matching `walk_content_files`
2522/// (which skips `index.md` under `--all`).
2523fn is_index_catalog_file(rel: &Path) -> bool {
2524    matches!(
2525        rel.file_name().and_then(|n| n.to_str()),
2526        Some("index.md") | Some("index.jsonl")
2527    )
2528}
2529
2530/// Split a file into `(frontmatter_yaml, body, closing_fence_line)`. The block
2531/// must start at the very first line with `---` and end at the next `---`.
2532/// Returns `None` if there's no leading frontmatter block.
2533fn split_frontmatter(text: &str) -> Option<(String, String, u32)> {
2534    // Tolerate a single leading UTF-8 BOM, matching parser/store/index (which
2535    // already strip it). Without this, a BOM-prefixed file is read as having no
2536    // frontmatter here while the catalog still indexes it — so validate would
2537    // silently skip frontmatter checks on a file the rest of the toolkit sees.
2538    let text = text.strip_prefix('\u{feff}').unwrap_or(text);
2539    let mut lines = text.lines();
2540    let first = lines.next()?;
2541    if first.trim_end() != "---" {
2542        return None;
2543    }
2544    let mut yaml = String::new();
2545    let mut close_line: Option<u32> = None;
2546    // line 1 is the opening fence; YAML starts at line 2.
2547    let mut current = 1u32;
2548    for line in lines {
2549        current += 1;
2550        if line.trim_end() == "---" {
2551            close_line = Some(current);
2552            break;
2553        }
2554        yaml.push_str(line);
2555        yaml.push('\n');
2556    }
2557    let close_line = close_line?;
2558    // Body = everything after the closing fence.
2559    let body: String = text
2560        .lines()
2561        .skip(close_line as usize)
2562        .collect::<Vec<_>>()
2563        .join("\n");
2564    Some((yaml, body, close_line))
2565}
2566
2567/// Read just the `summary` field of a file, or `None` if absent/unparseable.
2568fn read_summary(abs: &Path) -> Option<String> {
2569    let text = std::fs::read_to_string(abs).ok()?;
2570    let (yaml, _, _) = split_frontmatter(&text)?;
2571    let value: Value = serde_norway::from_str(&yaml).ok()?;
2572    if let Value::Mapping(m) = value {
2573        m.get(Value::String("summary".into()))
2574            .and_then(scalar_string)
2575    } else {
2576        None
2577    }
2578}
2579
2580/// Convert a `serde_norway` mapping into a string-keyed [`BTreeMap`], dropping
2581/// non-string keys (frontmatter keys are always strings).
2582fn yaml_map_to_btree(map: &serde_norway::Mapping) -> BTreeMap<String, Value> {
2583    let mut out = BTreeMap::new();
2584    for (k, v) in map {
2585        if let Value::String(s) = k {
2586            out.insert(s.clone(), v.clone());
2587        }
2588    }
2589    out
2590}
2591
2592/// A scalar YAML value as a string (`String`/`Number`/`Bool`); `None` for
2593/// sequences/mappings/null.
2594fn scalar_string(v: &Value) -> Option<String> {
2595    match v {
2596        Value::String(s) => Some(s.clone()),
2597        Value::Number(n) => Some(n.to_string()),
2598        Value::Bool(b) => Some(b.to_string()),
2599        _ => None,
2600    }
2601}
2602
2603/// True if a frontmatter value carries no content for a *required*-field check:
2604/// a YAML `null` (`name:`), an empty sequence (`name: []`), an empty mapping
2605/// (`name: {}`), or a blank/whitespace-only scalar (`name: ""`). A non-empty
2606/// list or mapping is NOT treated as empty here — a structurally-wrong value on
2607/// a shape/enum field is caught by the later non-scalar shape check, not by the
2608/// required-presence check.
2609fn is_empty_value(v: &Value) -> bool {
2610    match v {
2611        Value::Null => true,
2612        Value::Sequence(items) => items.is_empty(),
2613        Value::Mapping(map) => map.is_empty(),
2614        other => scalar_string(other)
2615            .map(|s| s.trim().is_empty())
2616            .unwrap_or(true),
2617    }
2618}
2619
2620/// True if `tags` is a flat YAML sequence of scalars. A mapping, a scalar, or a
2621/// sequence containing a nested sequence/mapping → false (`TAGS_MALFORMED`).
2622fn is_flat_scalar_list(v: &Value) -> bool {
2623    match v {
2624        Value::Sequence(items) => items.iter().all(|it| scalar_string(it).is_some()),
2625        _ => false,
2626    }
2627}
2628
2629/// Extract every frontmatter wiki-link, returning `(key, Link)` pairs with the
2630/// link's 1-based file line. **Text-based, by necessity:** an unquoted
2631/// `company: [[records/companies/x]]` parses in YAML as a nested *sequence*, not
2632/// a string (because `[[x]]` is YAML flow-list-in-a-list); a quoted
2633/// `"[[...]]"` parses as a string. Scanning the raw frontmatter text catches
2634/// both forms uniformly, the way the link textually appears — the doctrine view.
2635///
2636/// `fm_start_line` is the file line of the first YAML line (file line 2, since
2637/// line 1 is the opening `---`), so the returned `Link::line` is absolute.
2638fn frontmatter_link_fields_text(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, Link)> {
2639    let mut out = Vec::new();
2640    for (key, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2641        for link in links {
2642            out.push((key.clone(), link));
2643        }
2644    }
2645    out
2646}
2647
2648/// The wiki-link targets declared under a single top-level frontmatter key
2649/// (text-based; handles quoted + unquoted forms). Empty if the key is absent or
2650/// carries no `[[...]]`.
2651fn frontmatter_links_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Vec<Link> {
2652    for (k, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2653        if k == key {
2654            return links;
2655        }
2656    }
2657    Vec::new()
2658}
2659
2660/// The raw value text under a single top-level frontmatter key (the remainder of
2661/// the key line plus any indented continuation/sequence lines), trimmed. Used to
2662/// decide whether a `link to` field holds a plain string vs. a wiki-link.
2663fn frontmatter_raw_value_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Option<String> {
2664    for (k, value_text, _links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2665        if k == key {
2666            return Some(value_text);
2667        }
2668    }
2669    None
2670}
2671
2672/// Split a frontmatter YAML block into `(key, raw_value_text, wiki_links)` for
2673/// each top-level key. A top-level key is a line with no leading indentation in
2674/// `name:` form; its value spans the rest of that line plus any deeper-indented
2675/// continuation lines (block scalars, block sequences) until the next top-level
2676/// key. Wiki-links are every `[[...]]` found anywhere in that span, with their
2677/// absolute file line.
2678fn frontmatter_key_blocks(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, String, Vec<Link>)> {
2679    let mut blocks: Vec<(String, String, Vec<Link>)> = Vec::new();
2680    let mut current: Option<(String, String, Vec<Link>)> = None;
2681
2682    for (idx, raw_line) in fm_yaml.lines().enumerate() {
2683        let file_line = fm_start_line + idx as u32;
2684        let indented = raw_line.starts_with(' ') || raw_line.starts_with('\t');
2685        let trimmed = raw_line.trim();
2686
2687        // A new top-level key: no indentation, `name:` prefix, not a list dash or
2688        // comment. (Indented or dash lines belong to the current key's value.)
2689        let new_key = if !indented && !trimmed.starts_with('#') && !trimmed.starts_with('-') {
2690            top_level_key(raw_line)
2691        } else {
2692            None
2693        };
2694
2695        if let Some((key, after)) = new_key {
2696            if let Some(done) = current.take() {
2697                blocks.push(done);
2698            }
2699            let mut links = Vec::new();
2700            collect_line_links(after, file_line, &mut links);
2701            current = Some((key, after.trim().to_string(), links));
2702        } else if let Some((_k, value_text, links)) = current.as_mut() {
2703            // Continuation of the current key's value (indented or dash line).
2704            if !value_text.is_empty() {
2705                value_text.push('\n');
2706            }
2707            value_text.push_str(trimmed);
2708            collect_line_links(raw_line, file_line, links);
2709        }
2710    }
2711    if let Some(done) = current.take() {
2712        blocks.push(done);
2713    }
2714    blocks
2715}
2716
2717/// Parse a top-level frontmatter key line into `(key, value_after_colon)`.
2718/// `None` if the line isn't a `name:` mapping entry.
2719fn top_level_key(line: &str) -> Option<(String, &str)> {
2720    let (key, rest) = line.split_once(':')?;
2721    let key = key.trim();
2722    if key.is_empty()
2723        || !key
2724            .chars()
2725            .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
2726    {
2727        return None;
2728    }
2729    Some((key.to_string(), rest))
2730}
2731
2732/// Append every `[[target]]` / `[[target|display]]` found in `s` to `links`,
2733/// each tagged with `file_line`.
2734fn collect_line_links(s: &str, file_line: u32, links: &mut Vec<Link>) {
2735    let bytes = s.as_bytes();
2736    let mut i = 0;
2737    while i + 1 < bytes.len() {
2738        if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2739            if let Some(close) = s[i + 2..].find("]]") {
2740                let inner = &s[i + 2..i + 2 + close];
2741                // Guard against `[[[` (nested) double-counting: the inner must
2742                // not itself open another `[[`.
2743                let target = inner
2744                    .trim_start_matches('[')
2745                    .split('|')
2746                    .next()
2747                    .unwrap_or(inner)
2748                    .trim()
2749                    .to_string();
2750                if !target.is_empty() {
2751                    links.push(Link {
2752                        target,
2753                        line: file_line,
2754                    });
2755                }
2756                i = i + 2 + close + 2;
2757                continue;
2758            }
2759        }
2760        i += 1;
2761    }
2762}
2763
2764/// Extract every `[[...]]` wiki-link from a body, with 1-based line numbers.
2765/// Skips fenced code blocks, so example links in docs don't trip the validator.
2766///
2767/// Fence tracking matches the toolkit's parser ([`crate::parser`]'s
2768/// `extract_sections`): an open fence is `(fence char, run length)` and closes
2769/// only on a line that is the **same** fence character with a run **at least as
2770/// long**. A naive "toggle a bool on any ``` or ~~~ line" inverts the state when
2771/// a `~~~` block legally contains a ```` ``` ```` line (the standard way to
2772/// document a backtick fence) — the inner backtick line would flip `in_fence`
2773/// off and the demo `[[…]]` inside the code block would be checked as a live
2774/// link, falsely flagging a legal store.
2775fn extract_wiki_links(body: &str) -> Vec<Link> {
2776    let mut out = Vec::new();
2777    let mut fence: Option<(u8, usize)> = None;
2778    for (idx, line) in body.lines().enumerate() {
2779        let content = line.trim_end_matches('\r');
2780        if let Some(f) = fence {
2781            // Inside a fence: the only thing that matters is whether THIS line
2782            // closes it (matching char, run ≥ the opening run). Everything else
2783            // is opaque code — no link extraction.
2784            if fence_closes(content, f) {
2785                fence = None;
2786            }
2787            continue;
2788        }
2789        if let Some(opened) = fence_opens(content) {
2790            fence = Some(opened);
2791            continue;
2792        }
2793        let line_no = (idx + 1) as u32;
2794        let bytes = line.as_bytes();
2795        let mut i = 0;
2796        while i + 1 < bytes.len() {
2797            if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2798                if let Some(close) = line[i + 2..].find("]]") {
2799                    let inner = &line[i + 2..i + 2 + close];
2800                    let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
2801                    // Skip a triple-bracket `[[[…` opening: the inner content
2802                    // starts with `[`, so this is the rejected flow-form list
2803                    // mis-encoding (`[[[a]], [[b]]]`), not a real wiki-link. A
2804                    // legitimate target never starts with `[`. The frontmatter
2805                    // `WIKI_LINK_FLOW_FORM_LIST` check already owns that error;
2806                    // extracting a bogus body link here would double-report it as
2807                    // a spurious `WIKI_LINK_SHORT_FORM`.
2808                    if !target.is_empty() && !target.starts_with('[') {
2809                        out.push(Link {
2810                            target,
2811                            line: line_no,
2812                        });
2813                    }
2814                    i = i + 2 + close + 2;
2815                    continue;
2816                }
2817            }
2818            i += 1;
2819        }
2820    }
2821    out
2822}
2823
2824/// If `line` opens a fenced code block, return `(fence byte, run length)`. A
2825/// local mirror of the parser's `opening_fence` so the validator's fence
2826/// tracking matches the rest of the toolkit: a fence is ``` ``` ``` or `~~~`
2827/// (run ≥ 3) at ≤ 3 spaces of indent, and a backtick fence's info string may
2828/// not itself contain a backtick.
2829fn fence_opens(line: &str) -> Option<(u8, usize)> {
2830    let indent = line.len() - line.trim_start_matches(' ').len();
2831    if indent > 3 {
2832        return None;
2833    }
2834    let rest = &line[indent..];
2835    let byte = rest.bytes().next()?;
2836    if byte != b'`' && byte != b'~' {
2837        return None;
2838    }
2839    let run = rest.len() - rest.trim_start_matches(byte as char).len();
2840    if run < 3 {
2841        return None;
2842    }
2843    // A backtick fence's info string may not itself contain a backtick.
2844    if byte == b'`' && rest[run..].contains('`') {
2845        return None;
2846    }
2847    Some((byte, run))
2848}
2849
2850/// True if `line` closes the currently open `fence`: same char, run at least as
2851/// long, nothing but trailing whitespace after. Local mirror of the parser's
2852/// `is_closing_fence` — so an inner fence of the *other* character (a ``` ``` ```
2853/// line inside a `~~~` block) does NOT close the outer fence.
2854fn fence_closes(line: &str, fence: (u8, usize)) -> bool {
2855    let (byte, open_len) = fence;
2856    let indent = line.len() - line.trim_start_matches(' ').len();
2857    if indent > 3 {
2858        return false;
2859    }
2860    let rest = &line[indent..];
2861    let run = rest.len() - rest.trim_start_matches(byte as char).len();
2862    if run < open_len {
2863        return false;
2864    }
2865    rest[run..].trim().is_empty()
2866}
2867
2868/// Detect the frontmatter INLINE flow-form wiki-link-list mis-encoding —
2869/// `attendees: [[[a]], [[b]]]` — and return the offending keys.
2870///
2871/// **Scoped to the inline value on the key line.** The SPEC's canonical
2872/// list-of-links form is the *unquoted YAML block sequence* (`- [[a]]` per
2873/// indented line), which is explicitly correct (SPEC § Linking) and MUST NOT be
2874/// flagged — even though, parsed whole, it nests the same way the rejected
2875/// inline flow form does. So this check looks only at the value written *inline*
2876/// after the colon: if it opens a flow sequence (`[…]`) whose parsed shape is a
2877/// nested sequence (a list whose items are themselves lists — the wiki-link-list
2878/// mis-encoding), it is flagged. A key with no inline value (the block form,
2879/// whose items live on continuation lines) is never inspected here.
2880///
2881/// Parsing the inline value (rather than a literal `starts_with("[[[")` text
2882/// test) is what catches the whitespace variant `attendees: [ [[a]] ]`, which
2883/// encodes the identical nested sequence but evaded the old prefix match.
2884fn detect_flow_form_link_lists(fm_yaml: &str) -> Vec<String> {
2885    let mut out = Vec::new();
2886    for line in fm_yaml.lines() {
2887        // Top-level key lines only (no indentation, not a comment or list dash).
2888        if line.starts_with(' ') || line.starts_with('\t') {
2889            continue;
2890        }
2891        let Some((key, rest)) = line.split_once(':') else {
2892            continue;
2893        };
2894        let key = key.trim();
2895        if key.is_empty()
2896            || key.starts_with('#')
2897            || key.starts_with('-')
2898            || !key
2899                .chars()
2900                .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
2901        {
2902            continue;
2903        }
2904        let rest = rest.trim();
2905        // Only an inline flow sequence (`[…]`) on the key line is a candidate;
2906        // the unquoted block form has an empty inline value and is never flagged.
2907        if !rest.starts_with('[') {
2908            continue;
2909        }
2910        // Parse just the inline value and test its shape: a list whose items are
2911        // themselves lists is the wiki-link-list mis-encoding (`[[[a]]]` parses
2912        // to `Seq[Seq[Seq[String]]]`; the scalar inline link `[[a]]` is only
2913        // `Seq[Seq[String]]` and is NOT flagged).
2914        if let Ok(Value::Sequence(items)) = serde_norway::from_str::<Value>(rest) {
2915            let nested = items.iter().any(|item| match item {
2916                Value::Sequence(inner) => inner.iter().any(|x| matches!(x, Value::Sequence(_))),
2917                _ => false,
2918            });
2919            if nested {
2920                out.push(key.to_string());
2921            }
2922        }
2923    }
2924    out
2925}
2926
2927/// True if a bare target (no `.md`) is a full store-relative path: it contains a
2928/// `/` and its first segment is a known layer.
2929fn is_full_store_path(bare: &str) -> bool {
2930    let mut parts = bare.splitn(2, '/');
2931    let first = parts.next().unwrap_or("");
2932    let has_rest = parts.next().map(|r| !r.is_empty()).unwrap_or(false);
2933    matches!(first, "sources" | "records") && has_rest
2934}
2935
2936/// True if a path contains only normal relative components. Validator inputs
2937/// come from user-authored markdown/JSON sidecars; never let absolute paths,
2938/// platform prefixes, or `..` turn a validation probe into a filesystem escape.
2939fn is_safe_store_relative_path(path: &Path) -> bool {
2940    let mut saw_component = false;
2941    for component in path.components() {
2942        match component {
2943            Component::Normal(_) => saw_component = true,
2944            Component::CurDir => {}
2945            Component::ParentDir | Component::RootDir | Component::Prefix(_) => return false,
2946        }
2947    }
2948    saw_component
2949}
2950
2951fn safe_md_target_rel(bare: &str) -> Option<PathBuf> {
2952    let path = Path::new(bare);
2953    if !is_safe_store_relative_path(path) {
2954        return None;
2955    }
2956    Some(PathBuf::from(format!("{bare}.md")))
2957}
2958
2959/// How a wiki-link / index-entry target resolves on disk.
2960enum TargetResolution {
2961    /// The target exists (either as the literal path or with a `.md` suffix).
2962    Exists,
2963    /// The target is a safe store-relative path but no file exists for it.
2964    Missing,
2965    /// The target escapes the store (absolute, `..`, prefix) — never probe it.
2966    Unsafe,
2967}
2968
2969/// Resolve a bare wiki-link / index-entry target the way the graph engine does
2970/// ([`crate::graph`]'s `resolve_existing`): try the path **as written** first
2971/// (so a link to a raw non-`.md` source file kept verbatim under `sources/` —
2972/// `[[sources/emails/x.eml]]`, `[[sources/contracts/y.pdf]]` — resolves to the
2973/// real file), then the `.md`-appended path (the common case for content
2974/// pages). Without trying the literal path first, a legal link to a raw source
2975/// file is wrongly flagged `WIKI_LINK_BROKEN` even though `graph backlinks`
2976/// resolves it.
2977fn resolve_wiki_target(store: &Store, bare: &str) -> TargetResolution {
2978    // The literal path and the `.md`-appended path share the same safety check
2979    // (`safe_md_target_rel` only differs by appending `.md`), so an unsafe bare
2980    // target is unsafe in both forms.
2981    if !is_safe_store_relative_path(Path::new(bare)) {
2982        return TargetResolution::Unsafe;
2983    }
2984    match resolved_target_abs(store, bare) {
2985        Some(_) => TargetResolution::Exists,
2986        None => TargetResolution::Missing,
2987    }
2988}
2989
2990/// The absolute on-disk path a bare wiki-link / index-entry target resolves to,
2991/// trying the literal path first, then `.md`-appended — mirroring the graph
2992/// engine. `None` when neither exists, or when the bare target escapes the store
2993/// (callers that need to distinguish unsafe from merely-missing use
2994/// [`resolve_wiki_target`]).
2995fn resolved_target_abs(store: &Store, bare: &str) -> Option<PathBuf> {
2996    if !is_safe_store_relative_path(Path::new(bare)) {
2997        return None;
2998    }
2999    // The literal path, as written (e.g. an `.eml`/`.pdf` source file kept
3000    // verbatim under `sources/`).
3001    let literal = store.root.join(bare);
3002    if literal.is_file() {
3003        return Some(literal);
3004    }
3005    // The `.md`-appended path (a content page referenced without its extension).
3006    let with_md = store.root.join(format!("{bare}.md"));
3007    if with_md.is_file() {
3008        return Some(with_md);
3009    }
3010    None
3011}
3012
3013/// True if a bare target path is under `prefix` (both `.md`-stripped).
3014fn path_under_prefix(bare: &str, prefix: &str) -> bool {
3015    let prefix = prefix.trim_end_matches('/');
3016    bare == prefix || bare.starts_with(&format!("{prefix}/"))
3017}
3018
3019/// The type-folder for a store-relative content path: `<layer>/<type-folder>`
3020/// (the folder directly under the layer; date-shards roll up to it). `None` for
3021/// files directly in a layer folder or outside the two layers.
3022fn type_folder_of(rel: &Path) -> Option<PathBuf> {
3023    let comps: Vec<&str> = rel.iter().filter_map(|s| s.to_str()).collect();
3024    if comps.len() < 3 {
3025        return None; // need layer/type-folder/file at minimum
3026    }
3027    if !matches!(comps[0], "sources" | "records") {
3028        return None;
3029    }
3030    Some(PathBuf::from(comps[0]).join(comps[1]))
3031}
3032
3033/// **SWEEP.** Walk every `.md` content file under `sources/`/`records/`,
3034/// returning store-relative paths to be parsed in full. Skips hidden dirs and
3035/// the index twin (`index.jsonl`). Used only by `validate_all`; the working-set
3036/// incoming-linker scan rides the embedded-ripgrep `Store::find_links_to_any`
3037/// (a single presence-only pass), so the loop default never walks-and-*parses*
3038/// the whole content tree.
3039///
3040/// **`log/` is NOT pruned here.** Only the *root-level* `log/` rotation archive
3041/// is reserved (`Store::is_in_log_dir` checks only the first path component);
3042/// the walk roots are the two layers, so the root archive is already out of
3043/// scope. A `log`-named folder *inside* a layer (e.g. `records/log/` — a
3044/// decision log) is real content (see `is_content_file`), so pruning every
3045/// `name == "log"` made `--all` silently skip those files — reporting fewer
3046/// errors than the default working-set scope on the same store.
3047fn walk_content_files(root: &Path) -> Vec<PathBuf> {
3048    let mut out = Vec::new();
3049    for layer in ["sources", "records"] {
3050        let base = root.join(layer);
3051        if !base.is_dir() {
3052            continue;
3053        }
3054        for entry in walkdir::WalkDir::new(&base)
3055            // Follow symlinks, matching the loop-default `md_walker`
3056            // (store.rs `follow_links(true)`): a content file that is a symlink
3057            // into the store, or that lives in a symlinked-in type-folder, is
3058            // checked by `dbmd validate` (the loop default rides `Store::walk` /
3059            // `walk_all_md`, both following symlinks). Without this the `--all`
3060            // sweep silently SKIPPED such files, so the authoritative superset
3061            // reported FEWER issues than the loop scope on the same store —
3062            // inverting the `--all`-is-the-superset contract. walkdir's loop
3063            // detection drops a symlink cycle (yields an Err that `.flatten()`
3064            // discards), so this cannot hang.
3065            .follow_links(true)
3066            .into_iter()
3067            .filter_entry(|e| {
3068                let name = e.file_name().to_str().unwrap_or("");
3069                !name.starts_with('.')
3070            })
3071            .flatten()
3072        {
3073            if !entry.file_type().is_file() {
3074                continue;
3075            }
3076            let name = entry.file_name().to_str().unwrap_or("");
3077            if name.ends_with(".md") && name != "index.md" {
3078                if let Ok(rel) = entry.path().strip_prefix(root) {
3079                    out.push(rel.to_path_buf());
3080                }
3081            }
3082        }
3083    }
3084    out.sort();
3085    out
3086}
3087
3088/// Every `index.md` under the store (root + layers + type-folders), as
3089/// store-relative paths. Used to detect orphan indexes. Like
3090/// [`walk_content_files`], a `log`-named folder *inside* a layer is real content
3091/// and its `index.md` is not pruned (only the root-level `log/` archive is
3092/// reserved, and the walk roots are the two layers, so it is already
3093/// out of scope).
3094fn walk_index_files(root: &Path) -> Vec<PathBuf> {
3095    let mut out = Vec::new();
3096    if root.join("index.md").is_file() {
3097        out.push(PathBuf::from("index.md"));
3098    }
3099    for layer in ["sources", "records"] {
3100        let base = root.join(layer);
3101        if !base.is_dir() {
3102            continue;
3103        }
3104        for entry in walkdir::WalkDir::new(&base)
3105            // Follow symlinks, matching the loop-default `md_walker`
3106            // (store.rs `follow_links(true)`): a content file that is a symlink
3107            // into the store, or that lives in a symlinked-in type-folder, is
3108            // checked by `dbmd validate` (the loop default rides `Store::walk` /
3109            // `walk_all_md`, both following symlinks). Without this the `--all`
3110            // sweep silently SKIPPED such files, so the authoritative superset
3111            // reported FEWER issues than the loop scope on the same store —
3112            // inverting the `--all`-is-the-superset contract. walkdir's loop
3113            // detection drops a symlink cycle (yields an Err that `.flatten()`
3114            // discards), so this cannot hang.
3115            .follow_links(true)
3116            .into_iter()
3117            .filter_entry(|e| {
3118                let name = e.file_name().to_str().unwrap_or("");
3119                !name.starts_with('.')
3120            })
3121            .flatten()
3122        {
3123            if entry.file_type().is_file() && entry.file_name().to_str() == Some("index.md") {
3124                if let Ok(rel) = entry.path().strip_prefix(root) {
3125                    out.push(rel.to_path_buf());
3126                }
3127            }
3128        }
3129    }
3130    out.sort();
3131    out
3132}
3133
3134/// A parsed `index.md` entry line: the wiki-link target, the optional summary
3135/// text after the `—`, and the 1-based line number.
3136struct IndexEntry {
3137    target: String,
3138    summary_text: Option<String>,
3139    line: u32,
3140}
3141
3142/// Parse the `- [[<path>]] — <summary>` entry lines of an `index.md`. Stops at a
3143/// `## More` footer (those lines aren't file entries). Root/layer entries with a
3144/// `|display` segment and a `(N)` count are parsed too — the target is the bare
3145/// path, the summary text is whatever follows the em dash.
3146fn parse_index_entries(text: &str) -> Vec<IndexEntry> {
3147    let mut out = Vec::new();
3148    let mut in_more = false;
3149    for (idx, line) in text.lines().enumerate() {
3150        let trimmed = line.trim_start();
3151        if trimmed.starts_with("## More") {
3152            in_more = true;
3153            continue;
3154        }
3155        if in_more {
3156            continue;
3157        }
3158        if !trimmed.starts_with("- ") {
3159            continue;
3160        }
3161        // Find the first `[[...]]`.
3162        let Some(open) = trimmed.find("[[") else {
3163            continue;
3164        };
3165        let Some(close_rel) = trimmed[open + 2..].find("]]") else {
3166            continue;
3167        };
3168        let inner = &trimmed[open + 2..open + 2 + close_rel];
3169        let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
3170
3171        // Summary text: whatever follows the first em dash (`—`) or ` - `.
3172        let after = &trimmed[open + 2 + close_rel + 2..];
3173        let summary_text = extract_index_entry_summary(after);
3174
3175        out.push(IndexEntry {
3176            target,
3177            summary_text,
3178            line: (idx + 1) as u32,
3179        });
3180    }
3181    out
3182}
3183
3184/// Pull the summary portion out of the text trailing an index entry's
3185/// wiki-link: drop a leading `(N files)` count, then the `—`/`-` separator, then
3186/// strip a trailing `  ·  #tag` suffix **only when it is a genuine tag block**
3187/// (so a literal `·` inside the summary text is preserved, not mistaken for the
3188/// renderer's tag separator).
3189fn extract_index_entry_summary(after: &str) -> Option<String> {
3190    let mut s = after.trim();
3191    // Drop a leading "(N ...)" count segment, if present.
3192    if s.starts_with('(') {
3193        if let Some(close) = s.find(')') {
3194            s = s[close + 1..].trim_start();
3195        }
3196    }
3197    // Require an em dash or hyphen separator before the summary.
3198    let s = if let Some(rest) = s.strip_prefix('—') {
3199        rest.trim()
3200    } else if let Some(rest) = s.strip_prefix('-') {
3201        rest.trim()
3202    } else {
3203        return None;
3204    };
3205    if s.is_empty() {
3206        return None;
3207    }
3208    // Strip a trailing tag block — but ONLY when it matches the EXACT delimiter
3209    // the renderer emits: `  ·  #tag #tag` (a *double*-spaced middot, per
3210    // `crate::index::format_md_entry`'s `format!("  ·  {tags}")`), dropped when
3211    // the file has no tags. The previous code also accepted a *single*-spaced
3212    // ` · ` separator, which collided with a legal summary whose own text ends
3213    // in a single-spaced middot-plus-hashtag tail — e.g. a tagless file with
3214    // `summary: "Standup notes · #standup"`. The renderer round-trips that
3215    // summary verbatim (no tag block, since there are no tags), but the loose
3216    // strip mistook the ` · #standup` for the renderer's tag suffix, compared
3217    // `"Standup notes"` against the file's full summary, and emitted a spurious
3218    // `INDEX_SUMMARY_MISMATCH` that `dbmd index rebuild` could never fix
3219    // (rebuild regenerates the identical line). Matching the renderer's exact
3220    // double-spaced delimiter makes the comparison round-trip. `rsplit_once`
3221    // matches from the right so only the real trailing tag block is considered.
3222    let s = match s.rsplit_once("  ·  ") {
3223        Some((summary, tags)) if is_tag_suffix(tags) => summary.trim(),
3224        _ => s,
3225    };
3226    Some(s.to_string())
3227}
3228
3229/// True if `s` is a non-empty tag block: one or more whitespace-separated tokens
3230/// each starting with `#`, the exact shape the index renderer appends after the
3231/// `·` separator (`crate::index::format_md_entry`). Used to distinguish the
3232/// renderer's `  ·  #tag` suffix from a literal `·` inside the summary text.
3233fn is_tag_suffix(s: &str) -> bool {
3234    let mut any = false;
3235    for tok in s.split_whitespace() {
3236        if !tok.starts_with('#') || tok.len() < 2 {
3237            return false;
3238        }
3239        any = true;
3240    }
3241    any
3242}
3243
3244/// Parse a `log.md` entry header `## [YYYY-MM-DD HH:MM] <kind> | <object>`.
3245/// Returns `(timestamp, kind, object)`; `None` if the timestamp is unparseable
3246/// or the header isn't well-formed.
3247fn parse_log_header(line: &str) -> Option<(DateTime<FixedOffset>, String, Option<String>)> {
3248    let rest = line.strip_prefix("## [")?;
3249    let close = rest.find(']')?;
3250    let ts_str = &rest[..close];
3251    let tail = rest[close + 1..].trim();
3252
3253    // Parse `YYYY-MM-DD HH:MM` (the SPEC header form) as a naive local time and
3254    // attach a zero offset — the log header carries minute precision, no zone.
3255    let naive = NaiveDateTime::parse_from_str(ts_str.trim(), "%Y-%m-%d %H:%M").ok()?;
3256    let offset = FixedOffset::east_opt(0)?;
3257    let ts = naive.and_local_timezone(offset).single()?;
3258
3259    // kind | object
3260    let (kind, object) = match tail.split_once('|') {
3261        Some((k, o)) => {
3262            let o = o.trim();
3263            (
3264                k.trim().to_string(),
3265                if o.is_empty() {
3266                    None
3267                } else {
3268                    Some(o.to_string())
3269                },
3270            )
3271        }
3272        None => (tail.to_string(), None),
3273    };
3274    if kind.is_empty() {
3275        return None;
3276    }
3277    Some((ts, kind, object))
3278}
3279
3280/// Every log file that holds entries for the working-set scan: the active
3281/// `log.md` plus every `log/<YYYY-MM>.md` archive. [`Log::append`] rotates
3282/// strictly-prior-month entries into the archives, so the active file alone is
3283/// NOT the full timeline — both the last `validate` cutoff and a changed-but-
3284/// unvalidated object can live in an archive after a month rollover. Reading the
3285/// archives here keeps the working-set readers in sync with the rest of the log
3286/// layer (`Log::since`/`Log::tail`), which deliberately cross archives, and
3287/// prevents `dbmd validate` from silently skipping archived changed files. Reads
3288/// only log headers, never the content store, so the loop budget is preserved.
3289fn log_files_for_working_set(store: &Store) -> Vec<PathBuf> {
3290    let mut files = vec![store.root.join("log.md")];
3291    let archive_dir = store.root.join("log");
3292    if let Ok(entries) = std::fs::read_dir(&archive_dir) {
3293        let mut archives: Vec<PathBuf> = entries
3294            .flatten()
3295            .map(|e| e.path())
3296            .filter(|p| {
3297                p.is_file()
3298                    && p.file_name()
3299                        .and_then(|s| s.to_str())
3300                        .and_then(|n| n.strip_suffix(".md"))
3301                        .is_some_and(is_year_month_archive)
3302            })
3303            .collect();
3304        // Deterministic order (oldest month first); the callers fold across all
3305        // files so order doesn't affect the result, but a stable order keeps the
3306        // scan reproducible.
3307        archives.sort();
3308        files.extend(archives);
3309    }
3310    files
3311}
3312
3313/// True if `s` looks like a `YYYY-MM` archive stem (4 digits, `-`, 2 digits) —
3314/// the `log/<YYYY-MM>.md` naming the rotation in [`crate::log`] emits.
3315fn is_year_month_archive(s: &str) -> bool {
3316    let b = s.as_bytes();
3317    b.len() == 7
3318        && b[..4].iter().all(u8::is_ascii_digit)
3319        && b[4] == b'-'
3320        && b[5..7].iter().all(u8::is_ascii_digit)
3321}
3322
3323/// The timestamp of the most recent `validate` entry across the active `log.md`
3324/// **and** the `log/<YYYY-MM>.md` archives — the default working-set cutoff.
3325/// Reads only headers; never the whole store. Archive-aware so a `validate`
3326/// entry that rotated into an archive after a month rollover still anchors the
3327/// cutoff (without this, the cutoff silently resets to `None`).
3328fn last_validate_at(store: &Store) -> Option<DateTime<FixedOffset>> {
3329    let mut latest: Option<DateTime<FixedOffset>> = None;
3330    for file in log_files_for_working_set(store) {
3331        let Ok(text) = std::fs::read_to_string(&file) else {
3332            continue;
3333        };
3334        for line in text.lines() {
3335            if !line.starts_with("## [") {
3336                continue;
3337            }
3338            if let Some((ts, kind, _)) = parse_log_header(line) {
3339                if kind == "validate" {
3340                    latest = Some(match latest {
3341                        Some(p) if p >= ts => p,
3342                        _ => ts,
3343                    });
3344                }
3345            }
3346        }
3347    }
3348    latest
3349}
3350
3351/// The set of content objects changed since `cutoff`, read from log entries
3352/// whose kind mutates a file. When `cutoff` is `None`, every mutating entry
3353/// counts (no prior validate window). Returns store-relative `.md` paths.
3354///
3355/// Scans the active `log.md` **and** every `log/<YYYY-MM>.md` archive: after a
3356/// month rollover [`Log::append`] rotates prior-month entries out of the active
3357/// file, so an object changed-but-never-validated in a prior month lives only in
3358/// an archive. Reading the archives here is what keeps `dbmd validate` from
3359/// silently skipping those files. Reads only log headers, never the content
3360/// store.
3361fn changed_objects_since(
3362    store: &Store,
3363    cutoff: Option<DateTime<FixedOffset>>,
3364) -> BTreeSet<PathBuf> {
3365    let mut out = BTreeSet::new();
3366    for file in log_files_for_working_set(store) {
3367        let Ok(text) = std::fs::read_to_string(&file) else {
3368            continue;
3369        };
3370        for line in text.lines() {
3371            if !line.starts_with("## [") {
3372                continue;
3373            }
3374            let Some((ts, kind, object)) = parse_log_header(line) else {
3375                continue;
3376            };
3377            if let Some(c) = cutoff {
3378                if ts < c {
3379                    continue;
3380                }
3381            }
3382            if !matches!(
3383                kind.as_str(),
3384                "create" | "update" | "ingest" | "rename" | "delete" | "link"
3385            ) {
3386                continue;
3387            }
3388            if let Some(obj) = object {
3389                // The object slot is a store-relative path (or a wiki-link target).
3390                let bare = obj
3391                    .trim()
3392                    .trim_start_matches("[[")
3393                    .trim_end_matches("]]")
3394                    .split('|')
3395                    .next()
3396                    .unwrap_or("")
3397                    .trim()
3398                    .trim_end_matches(".md")
3399                    .to_string();
3400                if bare.is_empty() {
3401                    continue;
3402                }
3403                // Containment: the object slot is a log-header field that can
3404                // carry a `..`/absolute/prefix path (a hand-edited or
3405                // merge-malformed log line). Route it through the same safety gate
3406                // every other disk-touching validator path uses
3407                // (`safe_md_target_rel`, which `link_target_type` already applies)
3408                // so a `records/../../leaky` object cannot make
3409                // `validate_working_set` read + frontmatter-report on a file
3410                // OUTSIDE the store root. An unsafe object is dropped from the
3411                // changed set rather than probed.
3412                if let Some(rel) = safe_md_target_rel(&bare) {
3413                    out.insert(rel);
3414                }
3415            }
3416        }
3417    }
3418    out
3419}
3420
3421/// The result of the [`derived_from_ignored_type`] policy check: the
3422/// `derived_from` target that resolves to an ignored-type record, plus that
3423/// record's type. Carries exactly what both the validate finding and the
3424/// write-time warning need to render their message.
3425#[derive(Debug, Clone, PartialEq, Eq)]
3426pub struct DerivedFromIgnored {
3427    /// The `derived_from` wiki-link target as written (bare store-relative path,
3428    /// no `.md`).
3429    pub target: String,
3430    /// The resolved `type` of that target, which is present in
3431    /// `store.config.ignored_types`.
3432    pub target_type: String,
3433}
3434
3435/// **The single authoritative `### Ignored types` derivation check.** Decides
3436/// whether a conclusion record derives from an ignored-type record: the
3437/// `meta-type` must be `conclusion`, `### Ignored types` must be non-empty, and
3438/// some `derived_from` target must resolve to a record whose `type` is in
3439/// `ignored_types`. Returns the first such target (and its type), or `None`.
3440///
3441/// Both surfaces call this so the policy lives in exactly one place:
3442/// [`check_content_file`] (read side — `dbmd validate`) feeds it the
3443/// `derived_from` targets it scanned from the raw frontmatter, and the write
3444/// surface (`dbmd write`) feeds it the targets from the composed frontmatter.
3445/// The link *extraction* differs per surface (text-scan with line numbers vs.
3446/// the parsed `Frontmatter`); the *decision* — type gate, target-type
3447/// resolution, and `ignored_types` membership — does not.
3448pub fn derived_from_ignored_type<I, S>(
3449    store: &Store,
3450    meta_type: &str,
3451    derived_from_targets: I,
3452) -> Option<DerivedFromIgnored>
3453where
3454    I: IntoIterator<Item = S>,
3455    S: AsRef<str>,
3456{
3457    if meta_type != "conclusion" || store.config.ignored_types.is_empty() {
3458        return None;
3459    }
3460    for target in derived_from_targets {
3461        let target = target.as_ref();
3462        if let Some(target_type) = link_target_type(store, target) {
3463            if store.config.ignored_types.contains(&target_type) {
3464                return Some(DerivedFromIgnored {
3465                    target: target.to_string(),
3466                    target_type,
3467                });
3468            }
3469        }
3470    }
3471    None
3472}
3473
3474/// Resolve the `type` of a wiki-link target file (bare, no `.md`), or `None`.
3475fn link_target_type(store: &Store, target: &str) -> Option<String> {
3476    let bare = target.trim_end_matches(".md");
3477    let abs = store.root.join(safe_md_target_rel(bare)?);
3478    let text = std::fs::read_to_string(&abs).ok()?;
3479    let (yaml, _, _) = split_frontmatter(&text)?;
3480    let value: Value = serde_norway::from_str(&yaml).ok()?;
3481    if let Value::Mapping(m) = value {
3482        m.get(Value::String("type".into())).and_then(scalar_string)
3483    } else {
3484        None
3485    }
3486}
3487
3488// ── Shape validators ─────────────────────────────────────────────────────────
3489
3490/// True if a string is RFC3339 / ISO-8601 with a time + zone (the
3491/// `created`/`updated` contract: `2026-05-27T08:00:00-07:00`).
3492fn is_iso8601(s: &str) -> bool {
3493    DateTime::parse_from_rfc3339(s.trim()).is_ok()
3494}
3495
3496/// True if a string is an ISO-8601 *date* (`2026-05-27`) or a full RFC3339
3497/// datetime. Type-specific date fields (`expense.date`, `contact.last_touch`)
3498/// accept the date-only form per the SPEC's worked example.
3499fn is_iso8601_date_or_datetime(s: &str) -> bool {
3500    let s = s.trim();
3501    if DateTime::parse_from_rfc3339(s).is_ok() {
3502        return true;
3503    }
3504    chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_ok()
3505}
3506
3507/// True for `<local>@<domain>` with a non-empty local part and a dotted domain.
3508/// There must be exactly one `@`: a domain that still contains an `@` after the
3509/// split (the common double-`@` typo `sarah@@acme.com`, or `a@b@c.com`) is
3510/// rejected — without this the domain `@acme.com` passed every other check.
3511fn is_email(s: &str) -> bool {
3512    let s = s.trim();
3513    let Some((local, domain)) = s.split_once('@') else {
3514        return false;
3515    };
3516    !local.is_empty()
3517        && !domain.contains('@')
3518        && domain.contains('.')
3519        && !domain.starts_with('.')
3520        && !domain.ends_with('.')
3521        && !domain.contains(' ')
3522        && !local.contains(' ')
3523}
3524
3525/// True for a currency amount: an optional symbol or 3-letter ISO code, then a
3526/// plain decimal number with optional thousands separators and ≤ 2 decimals.
3527///
3528/// The numeric part is validated by hand (not `f64::parse`) so the non-numeric
3529/// floats `f64` accepts — `inf`, `-inf`, `NaN`, and `1e3`-style exponents — are
3530/// rejected, and the ≤ 2-decimal rule is actually enforced.
3531fn is_currency(s: &str) -> bool {
3532    let mut t = s.trim();
3533    // Strip a leading currency symbol …
3534    for sym in ["$", "€", "£", "¥"] {
3535        if let Some(rest) = t.strip_prefix(sym) {
3536            t = rest.trim_start();
3537            break;
3538        }
3539    }
3540    // … or a leading 3-letter ISO-4217-ish code (`USD 100`, `EUR 9.50`). The
3541    // code must be exactly three ASCII letters and separated from the number by
3542    // whitespace, so a bare `USD` with no amount still fails.
3543    if let Some((head, rest)) = t.split_once(char::is_whitespace) {
3544        if head.len() == 3 && head.chars().all(|c| c.is_ascii_alphabetic()) {
3545            t = rest.trim_start();
3546        }
3547    }
3548
3549    let cleaned: String = t.chars().filter(|c| *c != ',').collect();
3550    is_plain_amount(cleaned.trim())
3551}
3552
3553/// True for a bare decimal amount: optional sign, ≥ 1 digit, an optional
3554/// fractional part of 1–2 digits. No exponents, no `inf`/`NaN`, no empty string.
3555fn is_plain_amount(s: &str) -> bool {
3556    let digits = s.strip_prefix(['+', '-']).unwrap_or(s);
3557    let (int_part, frac_part) = match digits.split_once('.') {
3558        Some((i, f)) => (i, Some(f)),
3559        None => (digits, None),
3560    };
3561    if int_part.is_empty() || !int_part.bytes().all(|b| b.is_ascii_digit()) {
3562        return false;
3563    }
3564    match frac_part {
3565        None => true,
3566        Some(f) => (1..=2).contains(&f.len()) && f.bytes().all(|b| b.is_ascii_digit()),
3567    }
3568}
3569
3570/// True for an http(s) URL: a recognized scheme prefix with at least one
3571/// character after it. The length guard uses the *matched* scheme's own length,
3572/// so a single-character host on the shorter `http://` scheme (`http://x`, 8
3573/// bytes — e.g. an intranet/container hostname) is accepted; a bare scheme with
3574/// nothing after it (`http://`, `https://`) is rejected.
3575fn is_url(s: &str) -> bool {
3576    let s = s.trim();
3577    for scheme in ["http://", "https://"] {
3578        if let Some(rest) = s.strip_prefix(scheme) {
3579            return !rest.is_empty();
3580        }
3581    }
3582    false
3583}
3584
3585/// A short, deterministic suggestion for a `SCHEMA_SHAPE_MISMATCH`.
3586fn shape_suggestion(shape: Shape) -> String {
3587    match shape {
3588        Shape::String => "use a scalar string".into(),
3589        Shape::Int => "use an integer".into(),
3590        Shape::Bool => "use `true` or `false`".into(),
3591        Shape::Date => "use an ISO-8601 date, e.g. 2026-05-27".into(),
3592        Shape::Email => "use a `<local>@<domain>` address".into(),
3593        Shape::Currency => "use a numeric amount, e.g. 1234.56".into(),
3594        Shape::Url => "use an http(s) URL".into(),
3595    }
3596}
3597
3598/// Suggest a full-path rewrite for a short-form wiki-link. Without the layer we
3599/// can't know the folder, so the suggestion is generic but actionable.
3600fn short_form_suggestion(bare: &str) -> Option<String> {
3601    Some(format!(
3602        "use a full store-relative path, e.g. [[records/contacts/{}]]",
3603        slugish(bare)
3604    ))
3605}
3606
3607/// A filesystem-ish leaf for a plain string (lowercase, spaces → hyphens).
3608fn slugish(s: &str) -> String {
3609    s.trim()
3610        .to_lowercase()
3611        .chars()
3612        .map(|c| if c.is_whitespace() { '-' } else { c })
3613        .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '/' || *c == '_')
3614        .collect()
3615}
3616
3617/// Cross-file asset-manifest integrity (the `--all` sweep). Text-only: it never
3618/// hashes a byte or reads an asset file's contents — byte presence and hash
3619/// correctness are `dbmd assets verify`, not `validate`, so a fresh clone with
3620/// no restored bytes still passes. Cross-checks `assets.jsonl` against every
3621/// content file's `asset`/`assets` declarations.
3622fn check_assets(store: &Store, parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
3623    use crate::assets;
3624
3625    let manifest_rel = Path::new(assets::MANIFEST_FILE);
3626    let manifest_abs = store.root.join(assets::MANIFEST_FILE);
3627
3628    // Lenient manifest read: a malformed line is reported, not fatal.
3629    let mut manifest: BTreeMap<String, assets::AssetRecord> = BTreeMap::new();
3630    if let Ok(text) = std::fs::read_to_string(&manifest_abs) {
3631        for (i, line) in text.lines().enumerate() {
3632            if line.trim().is_empty() {
3633                continue;
3634            }
3635            match serde_json::from_str::<assets::AssetRecord>(line) {
3636                Ok(rec) => {
3637                    manifest.insert(rec.path.clone(), rec);
3638                }
3639                Err(e) => push(
3640                    issues,
3641                    Severity::Error,
3642                    codes::ASSET_MANIFEST_MALFORMED,
3643                    manifest_rel,
3644                    Some((i as u32) + 1),
3645                    None,
3646                    format!("invalid {} record: {e}", assets::MANIFEST_FILE),
3647                    Some("run `dbmd assets scan` to rebuild the manifest".to_string()),
3648                    vec![],
3649                ),
3650            }
3651        }
3652    }
3653
3654    // Per-wrapper declarations: every declared asset must be in the manifest and
3655    // must not point at a markdown content file.
3656    let mut declared: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
3657    for (rel, p) in parsed {
3658        let Some(map) = &p.fm else {
3659            continue;
3660        };
3661        for decl in assets::declarations_from_yaml_map(map) {
3662            let norm = match assets::normalize_asset_path(&decl.path) {
3663                Ok(n) => n,
3664                Err(_) => continue, // a bad declared path is surfaced by `scan`, not here
3665            };
3666            declared.insert(norm.clone());
3667            let is_md = Path::new(&norm)
3668                .extension()
3669                .and_then(|e| e.to_str())
3670                .map(|e| e.eq_ignore_ascii_case("md"))
3671                .unwrap_or(false);
3672            if is_md {
3673                push(
3674                    issues,
3675                    Severity::Warning,
3676                    codes::ASSET_PATH_IS_CONTENT,
3677                    rel,
3678                    None,
3679                    Some("asset".to_string()),
3680                    format!("asset path `{norm}` points at a markdown content file"),
3681                    Some("assets are raw binaries; reference a non-markdown path".to_string()),
3682                    vec![PathBuf::from(&norm)],
3683                );
3684            }
3685            if !manifest.contains_key(&norm) {
3686                push(
3687                    issues,
3688                    Severity::Error,
3689                    codes::ASSET_UNDECLARED,
3690                    rel,
3691                    None,
3692                    Some("asset".to_string()),
3693                    format!(
3694                        "references asset `{norm}` with no record in {}",
3695                        assets::MANIFEST_FILE
3696                    ),
3697                    Some("run `dbmd assets scan` to catalog it".to_string()),
3698                    vec![PathBuf::from(&norm)],
3699                );
3700            }
3701        }
3702    }
3703
3704    // Per-record: wrapper existence + orphan detection.
3705    for (path, rec) in &manifest {
3706        for w in &rec.wrappers {
3707            if !store.root.join(w).is_file() {
3708                push(
3709                    issues,
3710                    Severity::Error,
3711                    codes::ASSET_WRAPPER_BROKEN,
3712                    Path::new(path),
3713                    None,
3714                    None,
3715                    format!("manifest record for `{path}` names a missing wrapper `{w}`"),
3716                    Some("run `dbmd assets scan` to reconcile the manifest".to_string()),
3717                    vec![PathBuf::from(w)],
3718                );
3719            }
3720        }
3721        if !declared.contains(path) {
3722            push(
3723                issues,
3724                Severity::Warning,
3725                codes::ASSET_MANIFEST_ORPHAN,
3726                Path::new(path),
3727                None,
3728                None,
3729                format!(
3730                    "`{path}` is in {} but no wrapper references it",
3731                    assets::MANIFEST_FILE
3732                ),
3733                Some("run `dbmd assets scan` to drop the orphan, or add a wrapper".to_string()),
3734                vec![],
3735            );
3736        }
3737    }
3738}
3739
3740/// Push a fully-formed [`Issue`].
3741#[allow(clippy::too_many_arguments)]
3742fn push(
3743    issues: &mut Vec<Issue>,
3744    severity: Severity,
3745    code: &'static str,
3746    file: &Path,
3747    line: Option<u32>,
3748    key: Option<String>,
3749    message: String,
3750    suggestion: Option<String>,
3751    related: Vec<PathBuf>,
3752) {
3753    issues.push(Issue {
3754        severity,
3755        code,
3756        file: file.to_path_buf(),
3757        line,
3758        key,
3759        message,
3760        suggestion,
3761        related,
3762    });
3763}
3764
3765/// 1-based line of a top-level frontmatter key inside the YAML block, offset to
3766/// the file (the YAML starts at file line 2). `None` if not found.
3767fn fm_key_line(fm_yaml: &str, key: &str) -> Option<u32> {
3768    for (i, line) in fm_yaml.lines().enumerate() {
3769        let trimmed = line.trim_start();
3770        // A top-level key line: `key:` with no leading list dash.
3771        if let Some(rest) = trimmed.strip_prefix(key) {
3772            if rest.starts_with(':') && line.starts_with(key) {
3773                // +2: file line 1 is the opening `---`, YAML line 0 → file line 2.
3774                return Some((i as u32) + 2);
3775            }
3776        }
3777    }
3778    None
3779}
3780
3781/// The line a *field-absence* issue (a required key that is missing entirely)
3782/// anchors to: the key's line when present, else line `1` — the frontmatter
3783/// block's opening `---`. A missing key has no line of its own; anchoring it to
3784/// the block top gives the agent (and the `EXPECTED` golden) a stable, non-null
3785/// line to point at instead of an unhelpful `null`.
3786fn fm_key_line_or_top(fm_yaml: &str, key: &str) -> Option<u32> {
3787    fm_key_line(fm_yaml, key).or(Some(1))
3788}
3789
3790/// A stable sort order for issues: by file, then line, then code. Keeps `--json`
3791/// output deterministic across runs.
3792fn issue_order(a: &Issue, b: &Issue) -> std::cmp::Ordering {
3793    a.file
3794        .cmp(&b.file)
3795        .then(a.line.cmp(&b.line))
3796        .then(a.code.cmp(b.code))
3797        .then(a.key.cmp(&b.key))
3798}
3799
3800// ═════════════════════════════════════════════════════════════════════════════
3801//  Tests
3802// ═════════════════════════════════════════════════════════════════════════════
3803
3804#[cfg(test)]
3805mod tests {
3806    use super::*;
3807    use crate::parser::{Config, FieldSpec};
3808    use std::fs;
3809    use tempfile::TempDir;
3810
3811    #[test]
3812    fn split_frontmatter_tolerates_leading_bom() {
3813        // Regression (finding #19 cross-module): a UTF-8 BOM before the opening
3814        // fence must not make validate treat the file as frontmatter-less while
3815        // the catalog indexes it. Pre-fix `first.trim_end() != "---"` was true
3816        // for `\u{feff}---` and the function returned None.
3817        let text = "\u{feff}---\ntype: contact\nsummary: hi\n---\nbody\n";
3818        let parsed = split_frontmatter(text);
3819        assert!(
3820            parsed.is_some(),
3821            "a leading BOM must not hide frontmatter from validate"
3822        );
3823        let (yaml, body, close_line) = parsed.unwrap();
3824        assert_eq!(yaml, "type: contact\nsummary: hi\n");
3825        assert_eq!(body, "body");
3826        assert_eq!(close_line, 4, "BOM is inline on line 1, not a new line");
3827    }
3828
3829    /// A test store builder over a real tempdir. Every helper writes real files
3830    /// so the assertions exercise real behavior, not mocks.
3831    struct Fixture {
3832        dir: TempDir,
3833        config: Config,
3834    }
3835
3836    impl Fixture {
3837        /// A fresh store with a **valid** `DB.md` (the identity contract:
3838        /// `type: db-md` + `scope` + `owner`) and the two layer dirs. A valid
3839        /// DB.md keeps `check_db_md` silent so a "clean store" fixture is truly
3840        /// clean; tests that want a broken DB.md write their own via `write`.
3841        fn new() -> Self {
3842            let dir = TempDir::new().unwrap();
3843            fs::write(
3844                dir.path().join("DB.md"),
3845                "---\ntype: db-md\nscope: company\nowner: Test\n---\n",
3846            )
3847            .unwrap();
3848            for layer in ["sources", "records"] {
3849                fs::create_dir_all(dir.path().join(layer)).unwrap();
3850            }
3851            Fixture {
3852                dir,
3853                config: Config::default(),
3854            }
3855        }
3856
3857        /// A store with no `DB.md` marker.
3858        fn bare() -> Self {
3859            let dir = TempDir::new().unwrap();
3860            Fixture {
3861                dir,
3862                config: Config::default(),
3863            }
3864        }
3865
3866        /// Write a file at a store-relative path, creating parent dirs.
3867        fn write(&self, rel: &str, contents: &str) {
3868            let abs = self.dir.path().join(rel);
3869            fs::create_dir_all(abs.parent().unwrap()).unwrap();
3870            fs::write(abs, contents).unwrap();
3871        }
3872
3873        fn store(&self) -> Store {
3874            Store {
3875                root: self.dir.path().to_path_buf(),
3876                config: self.config.clone(),
3877            }
3878        }
3879
3880        fn store_all(&self) -> Vec<Issue> {
3881            validate_all(&self.store()).unwrap()
3882        }
3883
3884        /// Write the canonical `index.md` + `index.jsonl` at every level via the
3885        /// real builder ([`crate::index::Index::rebuild_all`]) — the same
3886        /// projection a `dbmd index rebuild` produces. Use this (rather than a
3887        /// hand-typed sidecar line) whenever a test asserts a *clean* store, so
3888        /// the sidecar carries the COMPLETE per-field projection and the fixture
3889        /// can't silently drift from what the index writer emits.
3890        fn rebuild_indexes(&self) {
3891            crate::index::Index::rebuild_all(&self.store()).unwrap();
3892        }
3893    }
3894
3895    /// True if any issue has this code.
3896    fn has(issues: &[Issue], code: &str) -> bool {
3897        issues.iter().any(|i| i.code == code)
3898    }
3899
3900    /// Count issues with a code.
3901    fn count(issues: &[Issue], code: &str) -> usize {
3902        issues.iter().filter(|i| i.code == code).count()
3903    }
3904
3905    /// The first issue with a code, or panic.
3906    fn find<'a>(issues: &'a [Issue], code: &str) -> &'a Issue {
3907        issues
3908            .iter()
3909            .find(|i| i.code == code)
3910            .unwrap_or_else(|| panic!("expected an issue with code {code}; got {issues:#?}"))
3911    }
3912
3913    /// A minimal valid `contact` body for reuse.
3914    fn valid_contact(summary: &str) -> String {
3915        format!(
3916            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{summary}\"\nname: A\n---\n\n# A\n"
3917        )
3918    }
3919
3920    // ── store marker ──────────────────────────────────────────────────────────
3921
3922    #[test]
3923    fn not_a_store_when_db_md_absent() {
3924        let fx = Fixture::bare();
3925        let issues = fx.store_all();
3926        assert_eq!(issues.len(), 1, "only NOT_A_STORE expected: {issues:#?}");
3927        assert_eq!(issues[0].code, codes::NOT_A_STORE);
3928        assert!(issues[0].is_error());
3929    }
3930
3931    #[test]
3932    fn working_set_also_reports_not_a_store() {
3933        let fx = Fixture::bare();
3934        let issues = validate_working_set(&fx.store(), None).unwrap();
3935        assert!(has(&issues, codes::NOT_A_STORE));
3936    }
3937
3938    #[test]
3939    fn clean_store_has_no_issues() {
3940        let fx = Fixture::new();
3941        fx.write("records/contacts/a.md", &valid_contact("A contact"));
3942        // Build the canonical indexes (complete per-field jsonl included) the
3943        // same way `dbmd index rebuild` does, so a freshly-rebuilt store is
3944        // proven clean across every projected field, not just summary/type.
3945        fx.rebuild_indexes();
3946        let issues = fx.store_all();
3947        assert!(
3948            issues.is_empty(),
3949            "expected a clean store, got: {issues:#?}"
3950        );
3951    }
3952
3953    // ── meta-type closed enum ─────────────────────────────────────────────────
3954
3955    /// Regression (adversarial review): a NON-SCALAR `meta-type` (a YAML list or
3956    /// mapping) must be rejected with `FM_BAD_META_TYPE`, not silently slip past
3957    /// the enum check (and then get reclassified as the default `fact`). Pre-fix
3958    /// the check was gated on `and_then(scalar_string)`, which returned `None`
3959    /// for a sequence/mapping and short-circuited the whole branch.
3960    #[test]
3961    fn meta_type_enum_is_closed_for_scalars_and_non_scalars() {
3962        let fx = Fixture::new();
3963        let body = |mt: &str| {
3964            format!(
3965                "---\ntype: profile\nmeta-type: {mt}\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n"
3966            )
3967        };
3968
3969        // Valid enum members + absent (default fact) → no FM_BAD_META_TYPE.
3970        for ok in ["fact", "operational", "conclusion"] {
3971            fx.write("records/profiles/ok.md", &body(ok));
3972            let issues = validate_working_set(&fx.store(), None).unwrap();
3973            assert!(
3974                !has(&issues, codes::FM_BAD_META_TYPE),
3975                "`meta-type: {ok}` must be accepted; got {issues:#?}"
3976            );
3977        }
3978        fx.write(
3979            "records/profiles/absent.md",
3980            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n",
3981        );
3982        assert!(
3983            !has(
3984                &validate_working_set(&fx.store(), None).unwrap(),
3985                codes::FM_BAD_META_TYPE
3986            ),
3987            "an absent meta-type is the default `fact` and must be accepted"
3988        );
3989
3990        // Scalar-but-wrong, AND non-scalar (list / mapping) → FM_BAD_META_TYPE.
3991        for bad in ["xyz", "Fact", "[fact, conclusion]", "{kind: conclusion}"] {
3992            let fx2 = Fixture::new();
3993            fx2.write("records/profiles/bad.md", &body(bad));
3994            let issues = validate_working_set(&fx2.store(), None).unwrap();
3995            assert!(
3996                has(&issues, codes::FM_BAD_META_TYPE),
3997                "`meta-type: {bad}` must be rejected with FM_BAD_META_TYPE; got {issues:#?}"
3998            );
3999        }
4000    }
4001
4002    // ── DB.md structure ───────────────────────────────────────────────────────
4003
4004    /// The `Fixture::new` DB.md is valid → no `DB_MD_*` issue. This pins the
4005    /// "valid identity file is silent" half (a bug that flagged a valid DB.md
4006    /// would fail here).
4007    #[test]
4008    fn valid_db_md_emits_no_structure_issue() {
4009        let fx = Fixture::new();
4010        let issues = fx.store_all();
4011        assert!(
4012            !has(&issues, codes::DB_MD_BAD_TYPE)
4013                && !has(&issues, codes::DB_MD_MISSING_FIELD)
4014                && !has(&issues, codes::DB_MD_UNKNOWN_SECTION),
4015            "a valid DB.md (type: db-md + scope + owner, recognized sections) is silent: {issues:#?}"
4016        );
4017    }
4018
4019    /// A DB.md whose `type:` isn't `db-md` → `DB_MD_BAD_TYPE`, keyed on `type`,
4020    /// anchored to the `type:` line (file line 2). Failing to read the type, or
4021    /// accepting a non-`db-md` type, breaks this.
4022    #[test]
4023    fn db_md_wrong_type_is_error() {
4024        let fx = Fixture::new();
4025        fx.write("DB.md", "---\ntype: notes\nscope: company\nowner: T\n---\n");
4026        let issues = fx.store_all();
4027        let i = find(&issues, codes::DB_MD_BAD_TYPE);
4028        assert!(i.is_error());
4029        assert_eq!(i.file, PathBuf::from("DB.md"));
4030        assert_eq!(i.key.as_deref(), Some("type"));
4031        assert_eq!(i.line, Some(2), "anchors to the `type:` line");
4032    }
4033
4034    /// A DB.md missing `scope` and `owner` → one `DB_MD_MISSING_FIELD` per
4035    /// absent field, each keyed on its field name, anchored to the block top.
4036    #[test]
4037    fn db_md_missing_scope_and_owner_each_report() {
4038        let fx = Fixture::new();
4039        fx.write("DB.md", "---\ntype: db-md\n---\n");
4040        let issues = fx.store_all();
4041        assert_eq!(
4042            count(&issues, codes::DB_MD_MISSING_FIELD),
4043            2,
4044            "both scope and owner absent → two issues: {issues:#?}"
4045        );
4046        let keys: BTreeSet<Option<String>> = issues
4047            .iter()
4048            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
4049            .map(|i| i.key.clone())
4050            .collect();
4051        assert_eq!(
4052            keys,
4053            BTreeSet::from([Some("scope".to_string()), Some("owner".to_string())]),
4054            "one issue keyed on each missing field"
4055        );
4056        for i in issues
4057            .iter()
4058            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
4059        {
4060            assert!(i.is_error());
4061            assert_eq!(i.line, Some(1), "absent field anchors to the block top");
4062        }
4063    }
4064
4065    /// A present-but-blank required field is still missing (`DB_MD_MISSING_FIELD`),
4066    /// anchored to its own line — guarding against an "is the key textually
4067    /// present?" shortcut that would miss `owner:` with an empty value.
4068    #[test]
4069    fn db_md_blank_required_field_is_missing() {
4070        let fx = Fixture::new();
4071        fx.write(
4072            "DB.md",
4073            "---\ntype: db-md\nscope: company\nowner: \"\"\n---\n",
4074        );
4075        let issues = fx.store_all();
4076        let i = find(&issues, codes::DB_MD_MISSING_FIELD);
4077        assert_eq!(i.key.as_deref(), Some("owner"));
4078        assert_eq!(
4079            i.line,
4080            Some(4),
4081            "a present-but-empty field anchors to its line"
4082        );
4083        assert!(
4084            count(&issues, codes::DB_MD_MISSING_FIELD) == 1,
4085            "scope is present and non-empty → only owner reported"
4086        );
4087    }
4088
4089    /// An unrecognized `##` section → `DB_MD_UNKNOWN_SECTION` (warning), anchored
4090    /// to the heading's file line; the three recognized sections stay silent.
4091    #[test]
4092    fn db_md_unknown_section_is_warning() {
4093        let fx = Fixture::new();
4094        fx.write(
4095            "DB.md",
4096            // line 1 `---`, 2 type, 3 scope, 4 owner, 5 `---`, 6 blank,
4097            // 7 `## Agent instructions`, 8 blank, 9 prose, 10 blank,
4098            // 11 `## Glossary`.
4099            "---\ntype: db-md\nscope: company\nowner: T\n---\n\n## Agent instructions\n\nbe good\n\n## Glossary\n\nterms\n",
4100        );
4101        let issues = fx.store_all();
4102        let i = find(&issues, codes::DB_MD_UNKNOWN_SECTION);
4103        assert!(!i.is_error(), "unknown section is a warning, not an error");
4104        assert_eq!(i.severity, Severity::Warning);
4105        assert_eq!(
4106            i.line,
4107            Some(11),
4108            "anchors to the `## Glossary` heading line"
4109        );
4110        assert!(
4111            i.message.contains("Glossary"),
4112            "the message names the offending section: {}",
4113            i.message
4114        );
4115        // The recognized `## Agent instructions` section did NOT fire.
4116        assert_eq!(
4117            count(&issues, codes::DB_MD_UNKNOWN_SECTION),
4118            1,
4119            "only the unrecognized section is flagged: {issues:#?}"
4120        );
4121    }
4122
4123    /// A DB.md with no frontmatter at all → `DB_MD_BAD_TYPE` plus both
4124    /// `DB_MD_MISSING_FIELD`s (no provable type, no provable fields).
4125    #[test]
4126    fn db_md_no_frontmatter_reports_type_and_both_fields() {
4127        let fx = Fixture::new();
4128        fx.write("DB.md", "# just a heading, no frontmatter\n");
4129        let issues = fx.store_all();
4130        assert!(has(&issues, codes::DB_MD_BAD_TYPE));
4131        assert_eq!(count(&issues, codes::DB_MD_MISSING_FIELD), 2);
4132    }
4133
4134    // ── frontmatter ─────────────────────────────────────────────────────────
4135
4136    #[test]
4137    fn missing_type_is_error() {
4138        let fx = Fixture::new();
4139        fx.write(
4140            "records/contacts/a.md",
4141            "---\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\n# A\n",
4142        );
4143        let issues = fx.store_all();
4144        assert!(has(&issues, codes::FM_MISSING_TYPE));
4145        assert!(find(&issues, codes::FM_MISSING_TYPE).is_error());
4146    }
4147
4148    #[test]
4149    fn missing_universal_timestamps_are_errors_on_content_files() {
4150        let fx = Fixture::new();
4151        fx.write(
4152            "records/contacts/a.md",
4153            "---\ntype: contact\nsummary: x\nname: A\n---\n\n# A\n",
4154        );
4155        let issues = fx.store_all();
4156
4157        let missing_created = find(&issues, codes::FM_MISSING_CREATED);
4158        assert_eq!(missing_created.key.as_deref(), Some("created"));
4159        assert!(missing_created.is_error());
4160
4161        let missing_updated = find(&issues, codes::FM_MISSING_UPDATED);
4162        assert_eq!(missing_updated.key.as_deref(), Some("updated"));
4163        assert!(missing_updated.is_error());
4164    }
4165
4166    #[test]
4167    fn meta_files_do_not_require_universal_timestamps() {
4168        let fx = Fixture::new();
4169        let issues = fx.store_all();
4170
4171        assert!(
4172            !has(&issues, codes::FM_MISSING_CREATED),
4173            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
4174        );
4175        assert!(
4176            !has(&issues, codes::FM_MISSING_UPDATED),
4177            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
4178        );
4179    }
4180
4181    #[test]
4182    fn content_file_with_no_frontmatter_block_reports_type_and_summary() {
4183        let fx = Fixture::new();
4184        fx.write(
4185            "records/profiles/a.md",
4186            "# Just a heading\n\nNo frontmatter here.\n",
4187        );
4188        let issues = fx.store_all();
4189        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
4190        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4191    }
4192
4193    #[test]
4194    fn content_file_with_empty_frontmatter_reports_type_and_summary() {
4195        let fx = Fixture::new();
4196        fx.write("records/profiles/a.md", "---\n---\n\nbody\n");
4197        let issues = fx.store_all();
4198        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
4199        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4200    }
4201
4202    #[test]
4203    fn malformed_yaml_is_error_and_suppresses_field_checks() {
4204        let fx = Fixture::new();
4205        // A tab inside a mapping value is invalid YAML.
4206        fx.write(
4207            "records/contacts/a.md",
4208            "---\ntype: contact\n  bad: : : :\n: : nope\n---\n\nbody\n",
4209        );
4210        let issues = fx.store_all();
4211        let issue = find(&issues, codes::FM_MALFORMED_YAML);
4212        assert!(issue.is_error());
4213        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4214        // When YAML doesn't parse we don't *also* claim the summary is missing;
4215        // the agent fixes the YAML first.
4216        assert!(
4217            !has(&issues, codes::SUMMARY_MISSING),
4218            "malformed YAML should suppress SUMMARY_MISSING: {issues:#?}"
4219        );
4220    }
4221
4222    #[test]
4223    fn bad_created_timestamp_is_error() {
4224        let fx = Fixture::new();
4225        fx.write(
4226            "records/contacts/a.md",
4227            "---\ntype: contact\ncreated: not-a-date\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
4228        );
4229        let issues = fx.store_all();
4230        let issue = find(&issues, codes::FM_BAD_TIMESTAMP);
4231        assert_eq!(issue.key.as_deref(), Some("created"));
4232        assert!(issue.is_error());
4233    }
4234
4235    #[test]
4236    fn date_only_created_is_rejected_but_type_date_field_accepted() {
4237        let fx = Fixture::new();
4238        // `created` must be a full RFC3339 datetime → a date-only value is bad.
4239        // `last_touch` is a type-specific date field → date-only is fine.
4240        fx.write(
4241            "records/contacts/a.md",
4242            "---\ntype: contact\ncreated: 2026-05-22\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\nlast_touch: 2026-05-22\n---\n\n# A\n",
4243        );
4244        let issues = fx.store_all();
4245        let created_issues: Vec<_> = issues
4246            .iter()
4247            .filter(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created"))
4248            .collect();
4249        assert_eq!(
4250            created_issues.len(),
4251            1,
4252            "date-only `created` must fail: {issues:#?}"
4253        );
4254        assert!(
4255            !issues.iter().any(
4256                |i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("last_touch")
4257            ),
4258            "date-only `last_touch` is valid: {issues:#?}"
4259        );
4260    }
4261
4262    // ── summary ─────────────────────────────────────────────────────────────
4263
4264    #[test]
4265    fn summary_missing_empty_multiline_toolong() {
4266        let fx = Fixture::new();
4267        fx.write(
4268            "records/profiles/missing.md",
4269            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\n---\n\nbody\n",
4270        );
4271        fx.write(
4272            "records/profiles/empty.md",
4273            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"   \"\n---\n\nbody\n",
4274        );
4275        let long = "x".repeat(201);
4276        fx.write(
4277            "records/profiles/long.md",
4278            &format!("---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{long}\"\n---\n\nbody\n"),
4279        );
4280        let issues = fx.store_all();
4281        assert!(has(&issues, codes::SUMMARY_MISSING));
4282        assert_eq!(
4283            find(&issues, codes::SUMMARY_MISSING).file,
4284            PathBuf::from("records/profiles/missing.md")
4285        );
4286        assert!(has(&issues, codes::SUMMARY_EMPTY));
4287        assert!(has(&issues, codes::SUMMARY_TOO_LONG));
4288        assert_eq!(
4289            find(&issues, codes::SUMMARY_TOO_LONG).severity,
4290            Severity::Warning
4291        );
4292    }
4293
4294    #[test]
4295    fn summary_multiline_via_yaml_block_scalar() {
4296        let fx = Fixture::new();
4297        // A literal block scalar produces a value with a newline.
4298        fx.write(
4299            "records/profiles/a.md",
4300            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: |\n  line one\n  line two\n---\n\nbody\n",
4301        );
4302        let issues = fx.store_all();
4303        assert!(has(&issues, codes::SUMMARY_MULTILINE), "{issues:#?}");
4304    }
4305
4306    #[test]
4307    fn summary_exactly_200_chars_is_ok() {
4308        let fx = Fixture::new();
4309        let s = "y".repeat(200);
4310        fx.write(
4311            "records/profiles/a.md",
4312            &format!("---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{s}\"\n---\n\nbody\n"),
4313        );
4314        let issues = fx.store_all();
4315        assert!(
4316            !has(&issues, codes::SUMMARY_TOO_LONG),
4317            "200 is the bound, inclusive: {issues:#?}"
4318        );
4319    }
4320
4321    #[test]
4322    fn meta_files_need_no_summary() {
4323        let fx = Fixture::new();
4324        // The root/layer/type indexes + log carry no summary and must not be
4325        // flagged. (A lone DB.md store with one contact and full indexes.)
4326        fx.write("records/contacts/a.md", &valid_contact("A contact"));
4327        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n# I\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4328        fx.write(
4329            "records/index.md",
4330            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4331        );
4332        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — A contact\n");
4333        fx.write(
4334            "records/contacts/index.jsonl",
4335            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"A contact\"}\n",
4336        );
4337        fx.write("log.md", "---\ntype: log\n---\n\n# Log\n");
4338        let issues = fx.store_all();
4339        assert!(!has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4340    }
4341
4342    // ── tags ────────────────────────────────────────────────────────────────
4343
4344    #[test]
4345    fn nested_tags_warns_flat_tags_ok() {
4346        let fx = Fixture::new();
4347        fx.write(
4348            "records/contacts/nested.md",
4349            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags:\n  - good\n  - [nested, list]\n---\n\n# A\n",
4350        );
4351        fx.write(
4352            "records/contacts/flat.md",
4353            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags: [customer, vip]\n---\n\n# A\n",
4354        );
4355        let issues = fx.store_all();
4356        let tag_issues: Vec<_> = issues
4357            .iter()
4358            .filter(|i| i.code == codes::TAGS_MALFORMED)
4359            .collect();
4360        assert_eq!(
4361            tag_issues.len(),
4362            1,
4363            "only the nested-tags file should warn: {issues:#?}"
4364        );
4365        assert_eq!(
4366            tag_issues[0].file,
4367            PathBuf::from("records/contacts/nested.md")
4368        );
4369        assert_eq!(tag_issues[0].severity, Severity::Warning);
4370    }
4371
4372    // ── wiki-links ────────────────────────────────────────────────────────────
4373
4374    #[test]
4375    fn short_form_wiki_link_is_error() {
4376        let fx = Fixture::new();
4377        let mut body = valid_contact("links to a short form");
4378        body.push_str("\nSee [[sarah-chen]] for details.\n");
4379        fx.write("records/contacts/a.md", &body);
4380        let issues = fx.store_all();
4381        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
4382        assert!(issue.is_error());
4383        assert!(issue.message.contains("sarah-chen"));
4384        // A short-form link must NOT also be reported broken — fix the form first.
4385        assert!(
4386            !issues
4387                .iter()
4388                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.message.contains("sarah-chen")),
4389            "short-form should suppress broken: {issues:#?}"
4390        );
4391    }
4392
4393    #[test]
4394    fn broken_full_path_wiki_link_is_error() {
4395        let fx = Fixture::new();
4396        let mut body = valid_contact("links to a missing file");
4397        body.push_str("\nSee [[records/contacts/ghost]].\n");
4398        fx.write("records/contacts/a.md", &body);
4399        let issues = fx.store_all();
4400        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
4401        assert!(issue.is_error());
4402        assert!(issue.message.contains("records/contacts/ghost"));
4403        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4404    }
4405
4406    #[test]
4407    fn traversal_full_path_wiki_link_is_rejected_before_probe() {
4408        let fx = Fixture::new();
4409        let mut body = valid_contact("links with traversal");
4410        body.push_str("\nSee [[records/contacts/../../ghost]].\n");
4411        fx.write("records/contacts/a.md", &body);
4412        let issues = fx.store_all();
4413        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
4414        assert!(issue.message.contains("not a safe store-relative path"));
4415        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4416    }
4417
4418    #[test]
4419    fn valid_full_path_wiki_link_passes() {
4420        let fx = Fixture::new();
4421        fx.write("records/contacts/target.md", &valid_contact("target"));
4422        let mut body = valid_contact("links to target");
4423        body.push_str("\nSee [[records/contacts/target]].\n");
4424        fx.write("records/contacts/a.md", &body);
4425        let issues = fx.store_all();
4426        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4427        assert!(!has(&issues, codes::WIKI_LINK_SHORT_FORM), "{issues:#?}");
4428    }
4429
4430    #[test]
4431    fn md_extension_wiki_link_warns_and_resolves() {
4432        let fx = Fixture::new();
4433        fx.write("records/contacts/target.md", &valid_contact("target"));
4434        let mut body = valid_contact("links with extension");
4435        body.push_str("\nSee [[records/contacts/target.md]].\n");
4436        fx.write("records/contacts/a.md", &body);
4437        let issues = fx.store_all();
4438        let issue = find(&issues, codes::WIKI_LINK_HAS_EXTENSION);
4439        assert_eq!(issue.severity, Severity::Warning);
4440        assert_eq!(
4441            issue.suggestion.as_deref(),
4442            Some("drop the extension: [[records/contacts/target]]")
4443        );
4444        // The target exists once `.md` is stripped → not broken.
4445        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4446    }
4447
4448    #[test]
4449    fn wiki_links_in_code_fences_are_ignored() {
4450        let fx = Fixture::new();
4451        let mut body = valid_contact("has a fenced example");
4452        body.push_str("\n```\n[[sarah-chen]]\n```\n");
4453        fx.write("records/contacts/a.md", &body);
4454        let issues = fx.store_all();
4455        assert!(
4456            !has(&issues, codes::WIKI_LINK_SHORT_FORM),
4457            "fenced wiki-links must be ignored: {issues:#?}"
4458        );
4459    }
4460
4461    #[test]
4462    fn flow_form_link_list_in_frontmatter_is_error() {
4463        let fx = Fixture::new();
4464        fx.write(
4465            "records/meetings/m.md",
4466            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees: [[[records/contacts/a]], [[records/contacts/b]]]\n---\n\n# M\n",
4467        );
4468        let issues = fx.store_all();
4469        let issue = find(&issues, codes::WIKI_LINK_FLOW_FORM_LIST);
4470        assert!(issue.is_error());
4471        assert_eq!(issue.key.as_deref(), Some("attendees"));
4472    }
4473
4474    #[test]
4475    fn block_form_link_list_in_frontmatter_is_not_flow_form() {
4476        let fx = Fixture::new();
4477        fx.write("records/contacts/a.md", &valid_contact("a"));
4478        fx.write("records/contacts/b.md", &valid_contact("b"));
4479        fx.write(
4480            "records/meetings/m.md",
4481            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees:\n  - [[records/contacts/a]]\n  - [[records/contacts/b]]\n---\n\n# M\n",
4482        );
4483        let issues = fx.store_all();
4484        assert!(
4485            !has(&issues, codes::WIKI_LINK_FLOW_FORM_LIST),
4486            "{issues:#?}"
4487        );
4488        // Block-form link targets are still integrity-checked (both exist here).
4489        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4490    }
4491
4492    #[test]
4493    fn frontmatter_short_form_link_field_is_error() {
4494        let fx = Fixture::new();
4495        // `related` is a *custom* (non-schema) wiki-link field, so it goes
4496        // through the generic doctrine path → a short form is WIKI_LINK_SHORT_FORM.
4497        fx.write(
4498            "records/synthesis/a.md",
4499            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: \"[[sarah-chen]]\"\n---\n\n# A\n",
4500        );
4501        let issues = fx.store_all();
4502        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
4503        assert!(issue.is_error());
4504        assert_eq!(issue.key.as_deref(), Some("related"));
4505    }
4506
4507    #[test]
4508    fn unquoted_frontmatter_link_is_recognized() {
4509        // An UNQUOTED `[[...]]` parses in YAML as a nested sequence, not a
4510        // string. The validator must still see it as a wiki-link (text-based
4511        // extraction). A short-form custom field must report SHORT_FORM, and a
4512        // full-path one with a missing target must report BROKEN.
4513        let fx = Fixture::new();
4514        fx.write(
4515            "records/synthesis/short.md",
4516            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[sarah-chen]]\n---\n\n# A\n",
4517        );
4518        fx.write(
4519            "records/synthesis/broken.md",
4520            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[records/contacts/ghost]]\n---\n\n# A\n",
4521        );
4522        let issues = fx.store_all();
4523        assert!(
4524            issues.iter().any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
4525                && i.file == Path::new("records/synthesis/short.md")
4526                && i.key.as_deref() == Some("related")),
4527            "unquoted short-form frontmatter link must be caught: {issues:#?}"
4528        );
4529        assert!(
4530            issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN
4531                && i.file == Path::new("records/synthesis/broken.md")),
4532            "unquoted full-path frontmatter link to a missing file must be caught: {issues:#?}"
4533        );
4534    }
4535
4536    #[test]
4537    fn short_form_in_declared_link_field_is_prefix_mismatch_not_double_reported() {
4538        // A short-form value in a *declared* link field (a `### contact` schema
4539        // with `company link to records/companies/`) is SCHEMA_LINK_PREFIX_MISMATCH
4540        // (the target isn't under the prefix), and must NOT also be reported as a
4541        // bare WIKI_LINK_SHORT_FORM — the schema path owns that field once.
4542        let mut fx = Fixture::new();
4543        fx.config.schemas.insert(
4544            "contact".into(),
4545            Schema {
4546                fields: vec![FieldSpec {
4547                    name: "company".into(),
4548                    link_prefix: Some(PathBuf::from("records/companies")),
4549                    ..Default::default()
4550                }],
4551                ..Default::default()
4552            },
4553        );
4554        fx.write(
4555            "records/contacts/a.md",
4556            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[northstar]]\"\n---\n\n# A\n",
4557        );
4558        let issues = fx.store_all();
4559        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
4560        assert_eq!(issue.key.as_deref(), Some("company"));
4561        // The same link must NOT also be double-reported via the generic path.
4562        assert!(
4563            !issues
4564                .iter()
4565                .any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
4566                    && i.key.as_deref() == Some("company")),
4567            "schema link fields are checked once, by the schema path: {issues:#?}"
4568        );
4569    }
4570
4571    #[test]
4572    fn schema_link_field_with_md_extension_still_warns() {
4573        let mut fx = Fixture::new();
4574        fx.config.schemas.insert(
4575            "contact".into(),
4576            Schema {
4577                fields: vec![FieldSpec {
4578                    name: "company".into(),
4579                    link_prefix: Some(PathBuf::from("records/companies")),
4580                    ..Default::default()
4581                }],
4582                ..Default::default()
4583            },
4584        );
4585        fx.write(
4586            "records/companies/acme.md",
4587            "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: Acme\nname: Acme\n---\n\n# Acme\n",
4588        );
4589        fx.write(
4590            "records/contacts/a.md",
4591            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[records/companies/acme.md]]\"\n---\n\n# A\n",
4592        );
4593        let issues = fx.store_all();
4594        let issue = issues
4595            .iter()
4596            .find(|i| {
4597                i.code == codes::WIKI_LINK_HAS_EXTENSION && i.key.as_deref() == Some("company")
4598            })
4599            .unwrap_or_else(|| panic!("schema link extension warning missing: {issues:#?}"));
4600        assert_eq!(issue.severity, Severity::Warning);
4601        assert!(
4602            !issues
4603                .iter()
4604                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.key.as_deref() == Some("company")),
4605            "extensionless existence check should still find acme.md: {issues:#?}"
4606        );
4607    }
4608
4609    // ── schema: explicit DB.md schema (required / shape / enum) ───────────────
4610
4611    #[test]
4612    fn explicit_schema_required_shape_enum() {
4613        let fx = {
4614            let mut fx = Fixture::new();
4615            // contact schema: name required, email required+email shape,
4616            // status enum: active|inactive
4617            let schema = Schema {
4618                fields: vec![
4619                    FieldSpec {
4620                        name: "name".into(),
4621                        required: true,
4622                        ..Default::default()
4623                    },
4624                    FieldSpec {
4625                        name: "email".into(),
4626                        required: true,
4627                        shape: Some(Shape::Email),
4628                        ..Default::default()
4629                    },
4630                    FieldSpec {
4631                        name: "status".into(),
4632                        enum_values: Some(vec!["active".into(), "inactive".into()]),
4633                        ..Default::default()
4634                    },
4635                ],
4636                ..Default::default()
4637            };
4638            fx.config.schemas.insert("contact".into(), schema);
4639            fx
4640        };
4641        fx.write(
4642            "records/contacts/a.md",
4643            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nemail: not-an-email\nstatus: archived\n---\n\n# A\n",
4644        );
4645        let issues = fx.store_all();
4646        // name absent → MISSING_REQUIRED
4647        assert!(
4648            issues
4649                .iter()
4650                .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
4651                    && i.key.as_deref() == Some("name")),
4652            "{issues:#?}"
4653        );
4654        // email malformed → SHAPE_MISMATCH
4655        assert!(
4656            issues.iter().any(
4657                |i| i.code == codes::SCHEMA_SHAPE_MISMATCH && i.key.as_deref() == Some("email")
4658            ),
4659            "{issues:#?}"
4660        );
4661        // status archived not in enum → ENUM_VIOLATION
4662        assert!(
4663            issues
4664                .iter()
4665                .any(|i| i.code == codes::SCHEMA_ENUM_VIOLATION
4666                    && i.key.as_deref() == Some("status")),
4667            "{issues:#?}"
4668        );
4669    }
4670
4671    #[test]
4672    fn schema_without_link_field_allows_plain_value() {
4673        // A `contact` schema with no `company` link field means a plain `company`
4674        // string is fine — schema enforcement is exactly what the store declares,
4675        // nothing implicit.
4676        let mut fx = Fixture::new();
4677        fx.config.schemas.insert(
4678            "contact".into(),
4679            Schema {
4680                fields: vec![FieldSpec {
4681                    name: "name".into(),
4682                    required: true,
4683                    ..Default::default()
4684                }],
4685                ..Default::default()
4686            },
4687        );
4688        fx.write(
4689            "records/contacts/a.md",
4690            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
4691        );
4692        let issues = fx.store_all();
4693        assert!(
4694            !has(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH),
4695            "no declared link field for `company` → a plain value is fine: {issues:#?}"
4696        );
4697    }
4698
4699    #[test]
4700    fn schema_link_field_plain_value_is_prefix_mismatch() {
4701        // The surviving link-enforcement path: a declared `link to <prefix>/`
4702        // field with a plain-string value is SCHEMA_LINK_PREFIX_MISMATCH.
4703        let mut fx = Fixture::new();
4704        fx.config.schemas.insert(
4705            "contact".into(),
4706            Schema {
4707                fields: vec![FieldSpec {
4708                    name: "company".into(),
4709                    link_prefix: Some(PathBuf::from("records/companies")),
4710                    ..Default::default()
4711                }],
4712                ..Default::default()
4713            },
4714        );
4715        fx.write(
4716            "records/contacts/a.md",
4717            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
4718        );
4719        let issues = fx.store_all();
4720        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
4721        assert_eq!(issue.key.as_deref(), Some("company"));
4722        assert!(issue
4723            .suggestion
4724            .as_deref()
4725            .unwrap()
4726            .contains("records/companies/"));
4727    }
4728
4729    #[test]
4730    fn schema_shape_int_and_url_and_currency() {
4731        let mut fx = Fixture::new();
4732        fx.config.schemas.insert(
4733            "widget".into(),
4734            Schema {
4735                fields: vec![
4736                    FieldSpec {
4737                        name: "qty".into(),
4738                        shape: Some(Shape::Int),
4739                        ..Default::default()
4740                    },
4741                    FieldSpec {
4742                        name: "site".into(),
4743                        shape: Some(Shape::Url),
4744                        ..Default::default()
4745                    },
4746                    FieldSpec {
4747                        name: "price".into(),
4748                        shape: Some(Shape::Currency),
4749                        ..Default::default()
4750                    },
4751                ],
4752                ..Default::default()
4753            },
4754        );
4755        // `USD 100` is the corpus-realistic shape (an `expense.currency`-style
4756        // ISO code + amount). It must pass — it used to spuriously fail.
4757        fx.write(
4758            "records/widgets/ok.md",
4759            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nqty: 5\nsite: https://example.com\nprice: \"USD 1,234.50\"\n---\n\n# ok\n",
4760        );
4761        // `free` is non-numeric; `inf`/`NaN`/3-decimal used to slip through
4762        // because the old impl leaned on `f64::parse`. `price: inf` here guards
4763        // the under-rejection half of the finding.
4764        fx.write(
4765            "records/widgets/bad.md",
4766            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nqty: five\nsite: ftp://nope\nprice: inf\n---\n\n# bad\n",
4767        );
4768        let issues = fx.store_all();
4769        let bad_shape: Vec<_> = issues
4770            .iter()
4771            .filter(|i| {
4772                i.code == codes::SCHEMA_SHAPE_MISMATCH
4773                    && i.file == Path::new("records/widgets/bad.md")
4774            })
4775            .map(|i| i.key.clone().unwrap_or_default())
4776            .collect();
4777        assert!(bad_shape.contains(&"qty".to_string()), "{issues:#?}");
4778        assert!(bad_shape.contains(&"site".to_string()), "{issues:#?}");
4779        assert!(
4780            bad_shape.contains(&"price".to_string()),
4781            "inf must be rejected as currency: {issues:#?}"
4782        );
4783        assert!(
4784            !issues.iter().any(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH
4785                && i.file == Path::new("records/widgets/ok.md")),
4786            "valid shapes (incl. `USD 1,234.50`) must not fire: {issues:#?}"
4787        );
4788    }
4789
4790    #[test]
4791    fn schema_shape_or_enum_field_with_non_scalar_value_is_shape_mismatch() {
4792        let mut fx = Fixture::new();
4793        fx.config.schemas.insert(
4794            "contact".into(),
4795            Schema {
4796                fields: vec![
4797                    FieldSpec {
4798                        name: "email".into(),
4799                        required: true,
4800                        shape: Some(Shape::Email),
4801                        ..Default::default()
4802                    },
4803                    FieldSpec {
4804                        name: "status".into(),
4805                        enum_values: Some(vec!["active".into(), "inactive".into()]),
4806                        ..Default::default()
4807                    },
4808                ],
4809                ..Default::default()
4810            },
4811        );
4812        // A required EMAIL field and an ENUM field, each holding a LIST. Both
4813        // used to slip through entirely (`scalar_string` → None → the shape and
4814        // enum bodies silently no-op); now they flag SCHEMA_SHAPE_MISMATCH.
4815        fx.write(
4816            "records/contacts/bad.md",
4817            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nemail:\n  - a@b.com\n  - c@d.com\nstatus:\n  - active\n---\n\n# bad\n",
4818        );
4819        let issues = fx.store_all();
4820        let mismatched: Vec<_> = issues
4821            .iter()
4822            .filter(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH)
4823            .map(|i| i.key.clone().unwrap_or_default())
4824            .collect();
4825        assert!(
4826            mismatched.contains(&"email".to_string()),
4827            "list-valued required email must flag: {issues:#?}"
4828        );
4829        assert!(
4830            mismatched.contains(&"status".to_string()),
4831            "list-valued enum must flag: {issues:#?}"
4832        );
4833    }
4834
4835    #[test]
4836    fn is_currency_accepts_codes_and_rejects_non_numeric() {
4837        // Symbols and 3-letter ISO codes both strip; plain numbers pass.
4838        for ok in [
4839            "100",
4840            "1234.56",
4841            "$1,234.50",
4842            "USD 100", // the finding's headline probe — used to be false
4843            "usd 100", // case-insensitive code
4844            "EUR 9.50",
4845            "£12",
4846            "¥1000",
4847            "-5.00", // signed amounts are real (refunds)
4848            "+5",
4849            "1,000,000",
4850        ] {
4851            assert!(is_currency(ok), "expected currency: {ok:?}");
4852        }
4853        // Non-numeric floats `f64::parse` would accept, and the > 2-decimal /
4854        // bare-code / exponent cases the docstring forbids.
4855        for bad in [
4856            "inf", "-inf", "infinity", "NaN", "nan",    // f64 accepts these; we must not
4857            "12.999", // 3 decimals
4858            "1.2345", // 4 decimals
4859            "USD",    // bare code, no amount
4860            "$",      // bare symbol
4861            "free", "", " ", "1e3",      // exponent form
4862            "1.",       // trailing dot, no fractional digits
4863            ".5",       // leading dot, no integer digits
4864            "1 000",    // space as separator is not a thousands separator
4865            "USDD 100", // 4-letter "code" must not strip
4866        ] {
4867            assert!(!is_currency(bad), "expected NOT currency: {bad:?}");
4868        }
4869    }
4870
4871    // ── policies ───────────────────────────────────────────────────────────
4872
4873    #[test]
4874    fn ignored_type_present_is_info() {
4875        let mut fx = Fixture::new();
4876        fx.config.ignored_types.push("temp".into());
4877        fx.write(
4878            "records/temps/x.md",
4879            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
4880        );
4881        let issues = fx.store_all();
4882        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_PRESENT);
4883        assert_eq!(issue.severity, Severity::Info);
4884        assert!(!issue.is_error());
4885        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4886    }
4887
4888    #[test]
4889    fn conclusion_record_derived_from_ignored_type_warns() {
4890        let mut fx = Fixture::new();
4891        fx.config.ignored_types.push("temp".into());
4892        fx.write(
4893            "records/temps/x.md",
4894            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
4895        );
4896        // The policy now gates on `meta-type: conclusion` (not the retired
4897        // `type: wiki-page`): a conclusion record that derives from an
4898        // ignored-type record warns.
4899        fx.write(
4900            "records/synthesis/t.md",
4901            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: derived\nderived_from: \"[[records/temps/x]]\"\n---\n\n# t\n",
4902        );
4903        let issues = fx.store_all();
4904        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_DERIVED);
4905        assert_eq!(issue.severity, Severity::Warning);
4906        assert_eq!(issue.key.as_deref(), Some("derived_from"));
4907        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4908    }
4909
4910    /// The shared `derived_from_ignored_type` entry point — the single
4911    /// policy-decision both `dbmd validate` (read) and `dbmd write` (write-time
4912    /// warning) now route through, so they cannot diverge. This pins its
4913    /// contract directly: the meta-type gate (now `meta-type: conclusion`, not
4914    /// the retired `type: wiki-page`), the empty-ignored-types gate, a positive
4915    /// match carrying the resolved target type, and a non-ignored target
4916    /// rejected.
4917    #[test]
4918    fn derived_from_ignored_type_is_the_shared_policy_decision() {
4919        let mut fx = Fixture::new();
4920        fx.config.ignored_types.push("secret".into());
4921        // An ignored-type record …
4922        fx.write(
4923            "records/secrets/s.md",
4924            "---\ntype: secret\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: hush\n---\n\n# s\n",
4925        );
4926        // … and a non-ignored record.
4927        fx.write(
4928            "records/contacts/c.md",
4929            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nname: C\n---\n\n# c\n",
4930        );
4931        let store = fx.store();
4932
4933        // Positive: a conclusion record deriving from the ignored-type record
4934        // matches, and the hit carries both the target (as written) and its
4935        // resolved type.
4936        let hit =
4937            derived_from_ignored_type(&store, "conclusion", std::iter::once("records/secrets/s"))
4938                .expect("conclusion → ignored-type record must match");
4939        assert_eq!(hit.target, "records/secrets/s");
4940        assert_eq!(hit.target_type, "secret");
4941
4942        // Meta-type gate: a non-`conclusion` meta-type never triggers, even with
4943        // the same ignored-type target.
4944        assert_eq!(
4945            derived_from_ignored_type(&store, "fact", std::iter::once("records/secrets/s")),
4946            None,
4947            "only conclusion derivation is policed"
4948        );
4949
4950        // Target gate: a conclusion deriving from a non-ignored record is fine.
4951        assert_eq!(
4952            derived_from_ignored_type(&store, "conclusion", std::iter::once("records/contacts/c")),
4953            None,
4954            "deriving from a non-ignored type is allowed"
4955        );
4956
4957        // First match wins across multiple targets (here the second is the hit).
4958        let hit = derived_from_ignored_type(
4959            &store,
4960            "conclusion",
4961            ["records/contacts/c", "records/secrets/s"],
4962        )
4963        .expect("a later ignored-type target must still be found");
4964        assert_eq!(hit.target, "records/secrets/s");
4965
4966        // Empty-policy gate: with no `### Ignored types`, nothing is policed.
4967        fx.config.ignored_types.clear();
4968        let store = fx.store();
4969        assert_eq!(
4970            derived_from_ignored_type(&store, "conclusion", std::iter::once("records/secrets/s")),
4971            None,
4972            "an empty ignored-types policy short-circuits"
4973        );
4974    }
4975
4976    // ── duplicates ───────────────────────────────────────────────────────────
4977
4978    #[test]
4979    fn dup_id_is_hard_error_with_related() {
4980        let fx = Fixture::new();
4981        fx.write(
4982            "records/contacts/a.md",
4983            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
4984        );
4985        fx.write(
4986            "records/contacts/b.md",
4987            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
4988        );
4989        let issues = fx.store_all();
4990        // Reporting rule #1: ONE issue per collision group, keyed on the
4991        // lexicographically smallest path (`a.md`), partner in `related`.
4992        assert_eq!(
4993            count(&issues, codes::DUP_ID),
4994            1,
4995            "one issue per group: {issues:#?}"
4996        );
4997        let a = issues.iter().find(|i| i.code == codes::DUP_ID).unwrap();
4998        assert_eq!(a.file, PathBuf::from("records/contacts/a.md"));
4999        assert!(a.is_error());
5000        assert_eq!(a.key.as_deref(), Some("id"));
5001        assert_eq!(
5002            a.line,
5003            Some(3),
5004            "anchors to the `id` line on the reported file"
5005        );
5006        assert_eq!(a.related, vec![PathBuf::from("records/contacts/b.md")]);
5007    }
5008
5009    #[test]
5010    fn dup_id_not_fired_in_working_set() {
5011        // DUP_* is an --all-only cross-file check; the working set must not run it.
5012        let fx = Fixture::new();
5013        fx.write(
5014            "records/contacts/a.md",
5015            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
5016        );
5017        fx.write(
5018            "records/contacts/b.md",
5019            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
5020        );
5021        // Log says both changed since epoch, so they're in the working set.
5022        fx.write(
5023            "log.md",
5024            "---\ntype: log\n---\n\n## [2026-05-22 10:00] create | records/contacts/a\nx\n\n## [2026-05-22 10:01] create | records/contacts/b\nx\n",
5025        );
5026        let issues = validate_working_set(&fx.store(), None).unwrap();
5027        assert!(
5028            !has(&issues, codes::DUP_ID),
5029            "DUP_ID is --all only: {issues:#?}"
5030        );
5031    }
5032
5033    #[test]
5034    fn dup_unique_key_single_field_is_warning() {
5035        let mut fx = Fixture::new();
5036        // contact declares `- unique: email`.
5037        fx.config.schemas.insert(
5038            "contact".into(),
5039            Schema {
5040                unique_keys: vec![vec!["email".into()]],
5041                ..Default::default()
5042            },
5043        );
5044        for (f, name) in [("a", "A"), ("b", "B")] {
5045            fx.write(
5046                &format!("records/contacts/{f}.md"),
5047                &format!("---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: s\nname: {name}\nemail: dup@x.com\n---\n\n# {name}\n"),
5048            );
5049        }
5050        let issues = fx.store_all();
5051        // One issue per group (rule #1), keyed on the smallest path, anchored to
5052        // the single `email` field.
5053        assert_eq!(count(&issues, codes::DUP_UNIQUE_KEY), 1);
5054        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
5055        assert_eq!(dup.severity, Severity::Warning);
5056        assert_eq!(dup.file, PathBuf::from("records/contacts/a.md"));
5057        assert_eq!(dup.key.as_deref(), Some("email"));
5058        assert_eq!(dup.related, vec![PathBuf::from("records/contacts/b.md")]);
5059    }
5060
5061    #[test]
5062    fn dup_unique_key_compound_and_clean_when_one_field_differs() {
5063        let mut fx = Fixture::new();
5064        // expense declares `- unique: date, amount, vendor` (a compound key).
5065        fx.config.schemas.insert(
5066            "expense".into(),
5067            Schema {
5068                unique_keys: vec![vec!["date".into(), "amount".into(), "vendor".into()]],
5069                ..Default::default()
5070            },
5071        );
5072        fx.write("records/companies/acme.md", "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: c\nname: Acme\n---\n# A\n");
5073        let exp = |f: &str, amount: &str| {
5074            format!(
5075            "---\ntype: expense\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: e\ndate: 2026-05-01\namount: {amount}\nvendor: \"[[records/companies/acme]]\"\n---\n\n# {f}\n"
5076        )
5077        };
5078        fx.write("records/expenses/e1.md", &exp("e1", "100"));
5079        fx.write("records/expenses/e2.md", &exp("e2", "100"));
5080        fx.write("records/expenses/e3.md", &exp("e3", "200")); // different amount
5081        let issues = fx.store_all();
5082        // One issue for the e1+e2 group (rule #1), keyed on the smallest path
5083        // (e1) with e2 in `related`; e3 differs on amount and never appears.
5084        assert_eq!(
5085            count(&issues, codes::DUP_UNIQUE_KEY),
5086            1,
5087            "only e1+e2 collide, one issue: {issues:#?}"
5088        );
5089        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
5090        assert_eq!(dup.file, PathBuf::from("records/expenses/e1.md"));
5091        assert_eq!(
5092            dup.line,
5093            Some(1),
5094            "compound-key collision anchors to line 1"
5095        );
5096        assert_eq!(dup.related, vec![PathBuf::from("records/expenses/e2.md")]);
5097        assert!(
5098            !issues.iter().any(|i| i.code == codes::DUP_UNIQUE_KEY
5099                && i.related.contains(&PathBuf::from("records/expenses/e3.md"))),
5100            "e3 differs on amount and must not collide: {issues:#?}"
5101        );
5102    }
5103
5104    #[test]
5105    fn dup_unique_key_list_field_is_order_independent() {
5106        let mut fx = Fixture::new();
5107        // meeting declares `- unique: date, attendees`; the list field is a set.
5108        fx.config.schemas.insert(
5109            "meeting".into(),
5110            Schema {
5111                unique_keys: vec![vec!["date".into(), "attendees".into()]],
5112                ..Default::default()
5113            },
5114        );
5115        fx.write("records/contacts/a.md", &valid_contact("a"));
5116        fx.write("records/contacts/b.md", &valid_contact("b"));
5117        let m = |f: &str, order: &str| {
5118            let attendees = if order == "ab" {
5119                "  - [[records/contacts/a]]\n  - [[records/contacts/b]]"
5120            } else {
5121                "  - [[records/contacts/b]]\n  - [[records/contacts/a]]"
5122            };
5123            format!(
5124                "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nattendees:\n{attendees}\n---\n\n# {f}\n"
5125            )
5126        };
5127        fx.write("records/meetings/m1.md", &m("m1", "ab"));
5128        fx.write("records/meetings/m2.md", &m("m2", "ba"));
5129        let issues = fx.store_all();
5130        // The attendee SET is order-independent, so m1 (ab) and m2 (ba) collide
5131        // → a single issue on the smaller path.
5132        assert_eq!(
5133            count(&issues, codes::DUP_UNIQUE_KEY),
5134            1,
5135            "same date + same attendee set (any order) collide as one issue: {issues:#?}"
5136        );
5137        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
5138        assert_eq!(dup.file, PathBuf::from("records/meetings/m1.md"));
5139        assert_eq!(dup.related, vec![PathBuf::from("records/meetings/m2.md")]);
5140    }
5141
5142    // ── indexes ───────────────────────────────────────────────────────────────
5143
5144    #[test]
5145    fn missing_indexes_at_all_three_levels() {
5146        let fx = Fixture::new();
5147        fx.write("records/contacts/a.md", &valid_contact("a"));
5148        let issues = fx.store_all();
5149        // root, layer (records), and type-folder (records/contacts) all missing.
5150        // The type-folder INDEX_MISSING is keyed on the FOLDER path (not its
5151        // would-be index.md), per the field convention `EXPECTED` pins.
5152        let missing_files: BTreeSet<PathBuf> = issues
5153            .iter()
5154            .filter(|i| i.code == codes::INDEX_MISSING)
5155            .map(|i| i.file.clone())
5156            .collect();
5157        assert!(
5158            missing_files.contains(&PathBuf::from("index.md")),
5159            "{issues:#?}"
5160        );
5161        assert!(
5162            missing_files.contains(&PathBuf::from("records/index.md")),
5163            "{issues:#?}"
5164        );
5165        assert!(
5166            missing_files.contains(&PathBuf::from("records/contacts")),
5167            "{issues:#?}"
5168        );
5169        // When the index.md is entirely absent we do NOT additionally fire
5170        // INDEX_JSONL_MISSING — one INDEX_MISSING covers the folder (rule #4).
5171        assert!(!has(&issues, codes::INDEX_JSONL_MISSING), "{issues:#?}");
5172    }
5173
5174    #[test]
5175    fn index_stale_entry_and_missing_entry() {
5176        let fx = Fixture::new();
5177        fx.write(
5178            "records/contacts/present.md",
5179            &valid_contact("present contact"),
5180        );
5181        // Indexes for the parents (root/layer) present so we isolate type-folder.
5182        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5183        fx.write(
5184            "records/index.md",
5185            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5186        );
5187        // Type-folder index lists a GHOST (stale) and omits `present` (missing).
5188        fx.write(
5189            "records/contacts/index.md",
5190            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/ghost]] — gone\n",
5191        );
5192        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/present.md\",\"type\":\"contact\",\"summary\":\"present contact\"}\n");
5193        let issues = fx.store_all();
5194        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
5195        assert!(stale.message.contains("ghost"));
5196        assert!(stale.is_error());
5197        let missing = find(&issues, codes::INDEX_MISSING_ENTRY);
5198        assert!(
5199            missing.message.contains("present.md"),
5200            "{}",
5201            missing.message
5202        );
5203    }
5204
5205    #[test]
5206    fn index_md_entry_with_traversal_path_is_stale_not_probe() {
5207        let fx = Fixture::new();
5208        fx.write("records/contacts/a.md", &valid_contact("a"));
5209        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5210        fx.write(
5211            "records/index.md",
5212            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5213        );
5214        fx.write(
5215            "records/contacts/index.md",
5216            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/../../ghost]] — unsafe\n",
5217        );
5218        fx.write(
5219            "records/contacts/index.jsonl",
5220            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5221        );
5222        let issues = fx.store_all();
5223        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
5224        assert!(stale.message.contains("not a safe store-relative path"));
5225    }
5226
5227    #[test]
5228    fn index_summary_mismatch() {
5229        let fx = Fixture::new();
5230        fx.write("records/contacts/a.md", &valid_contact("the real summary"));
5231        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5232        fx.write(
5233            "records/index.md",
5234            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5235        );
5236        fx.write(
5237            "records/contacts/index.md",
5238            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a STALE summary\n",
5239        );
5240        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"the real summary\"}\n");
5241        let issues = fx.store_all();
5242        let issue = find(&issues, codes::INDEX_SUMMARY_MISMATCH);
5243        assert!(issue.is_error());
5244        assert_eq!(issue.related, vec![PathBuf::from("records/contacts/a.md")]);
5245    }
5246
5247    #[test]
5248    fn index_summary_match_passes() {
5249        let fx = Fixture::new();
5250        fx.write("records/contacts/a.md", &valid_contact("matching summary"));
5251        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5252        fx.write(
5253            "records/index.md",
5254            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5255        );
5256        fx.write(
5257            "records/contacts/index.md",
5258            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — matching summary\n",
5259        );
5260        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"matching summary\"}\n");
5261        let issues = fx.store_all();
5262        assert!(!has(&issues, codes::INDEX_SUMMARY_MISMATCH), "{issues:#?}");
5263    }
5264
5265    #[test]
5266    fn index_entry_with_tag_suffix_matches_summary() {
5267        let fx = Fixture::new();
5268        fx.write("records/contacts/a.md", &valid_contact("clean summary"));
5269        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5270        fx.write(
5271            "records/index.md",
5272            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5273        );
5274        // Entry carries the renderer's `  ·  #tag` suffix (the EXACT double-spaced
5275        // delimiter `crate::index::format_md_entry` emits for a tagged file),
5276        // which must be stripped before comparing against the file's summary.
5277        fx.write(
5278            "records/contacts/index.md",
5279            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — clean summary  ·  #customer\n",
5280        );
5281        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"clean summary\"}\n");
5282        let issues = fx.store_all();
5283        assert!(
5284            !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
5285            "tag suffix should be stripped: {issues:#?}"
5286        );
5287    }
5288
5289    #[test]
5290    fn index_entry_single_spaced_middot_tail_is_part_of_summary() {
5291        // Regression (the finding): a tagless file whose `summary` legitimately
5292        // ends in a single-spaced ` · #word` tail round-trips through `index
5293        // rebuild` verbatim (the renderer appends NO `  ·  #tag` block, since the
5294        // file has no tags). The validator must NOT mistake that single-spaced
5295        // tail for the renderer's tag suffix, or it reports a spurious — and
5296        // unfixable — INDEX_SUMMARY_MISMATCH on a freshly rebuilt store.
5297        let fx = Fixture::new();
5298        fx.write(
5299            "records/contacts/a.md",
5300            &valid_contact("Standup notes · #standup"),
5301        );
5302        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5303        fx.write(
5304            "records/index.md",
5305            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5306        );
5307        fx.write(
5308            "records/contacts/index.md",
5309            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — Standup notes · #standup\n",
5310        );
5311        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"Standup notes · #standup\"}\n");
5312        let issues = fx.store_all();
5313        assert!(
5314            !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
5315            "a single-spaced middot tail is part of the summary, not a tag block: {issues:#?}"
5316        );
5317    }
5318
5319    #[test]
5320    fn index_jsonl_desync_missing_file_in_jsonl() {
5321        let fx = Fixture::new();
5322        fx.write("records/contacts/a.md", &valid_contact("a"));
5323        fx.write("records/contacts/b.md", &valid_contact("b"));
5324        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (2 files)\n");
5325        fx.write(
5326            "records/index.md",
5327            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5328        );
5329        fx.write(
5330            "records/contacts/index.md",
5331            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n- [[records/contacts/b]] — b\n",
5332        );
5333        // jsonl only lists `a` → `b` is a desync (the twin must be complete).
5334        fx.write(
5335            "records/contacts/index.jsonl",
5336            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5337        );
5338        let issues = fx.store_all();
5339        let desync = find(&issues, codes::INDEX_JSONL_DESYNC);
5340        assert!(desync.message.contains("b.md"), "{}", desync.message);
5341    }
5342
5343    #[test]
5344    fn index_jsonl_desync_record_points_at_missing_file() {
5345        let fx = Fixture::new();
5346        fx.write("records/contacts/a.md", &valid_contact("a"));
5347        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5348        fx.write(
5349            "records/index.md",
5350            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5351        );
5352        fx.write(
5353            "records/contacts/index.md",
5354            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
5355        );
5356        fx.write(
5357            "records/contacts/index.jsonl",
5358            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
5359        );
5360        let issues = fx.store_all();
5361        assert!(
5362            issues
5363                .iter()
5364                .any(|i| i.code == codes::INDEX_JSONL_DESYNC && i.message.contains("ghost.md")),
5365            "{issues:#?}"
5366        );
5367    }
5368
5369    #[test]
5370    fn index_jsonl_record_with_traversal_path_is_desync_not_probe() {
5371        let fx = Fixture::new();
5372        fx.write("records/contacts/a.md", &valid_contact("a"));
5373        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5374        fx.write(
5375            "records/index.md",
5376            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5377        );
5378        fx.write(
5379            "records/contacts/index.md",
5380            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
5381        );
5382        fx.write(
5383            "records/contacts/index.jsonl",
5384            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/../../ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
5385        );
5386        let issues = fx.store_all();
5387        assert!(
5388            issues.iter().any(|i| i.code == codes::INDEX_JSONL_DESYNC
5389                && i.message.contains("not a safe store-relative path")),
5390            "{issues:#?}"
5391        );
5392    }
5393
5394    #[test]
5395    fn index_jsonl_stale_summary() {
5396        let fx = Fixture::new();
5397        fx.write("records/contacts/a.md", &valid_contact("real summary"));
5398        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5399        fx.write(
5400            "records/index.md",
5401            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5402        );
5403        fx.write(
5404            "records/contacts/index.md",
5405            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — real summary\n",
5406        );
5407        // jsonl summary disagrees with the file frontmatter.
5408        fx.write(
5409            "records/contacts/index.jsonl",
5410            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"OUTDATED\"}\n",
5411        );
5412        let issues = fx.store_all();
5413        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5414        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
5415        assert!(stale.key.as_deref().unwrap().contains("summary"));
5416    }
5417
5418    /// The whole point of `INDEX_JSONL_STALE`: a sidecar field the query/search
5419    /// path actually reads (`email`, `domain`, the `(date,amount,vendor)` dedup
5420    /// tuple, `tags`, `updated`, `links`, `company` …) that disagrees with the
5421    /// `.md` is STALE — even when `summary` and `type` are perfectly correct.
5422    /// Pre-fix the validator only diffed summary+type, so a sidecar with a wrong
5423    /// `email` validated clean and answered `--where email=…` with a phantom
5424    /// value present in no file. This is the direct regression guard.
5425    #[test]
5426    fn index_jsonl_stale_queryable_field_email() {
5427        let fx = Fixture::new();
5428        let contact = "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"a contact\"\nname: A\nemail: real@correct.com\n---\n\n# A\n";
5429        fx.write("records/contacts/a.md", contact);
5430        // Start from the canonical, fully-correct sidecar set …
5431        fx.rebuild_indexes();
5432        let jsonl_path = fx.dir.path().join("records/contacts/index.jsonl");
5433        let good = fs::read_to_string(&jsonl_path).unwrap();
5434        // sanity: the canonical store is clean (no STALE on a fresh rebuild).
5435        assert!(
5436            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
5437            "freshly-rebuilt sidecar must not be stale"
5438        );
5439        // … then desync ONLY the email so it's the single differing field.
5440        assert!(
5441            good.contains("real@correct.com"),
5442            "sidecar projects email: {good}"
5443        );
5444        fx.write(
5445            "records/contacts/index.jsonl",
5446            &good.replace("real@correct.com", "STALE-WRONG@evil.com"),
5447        );
5448
5449        let issues = fx.store_all();
5450        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5451        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
5452        // The mismatch is reported precisely on `email`, and summary/type — which
5453        // still match — are NOT named.
5454        let key = stale.key.as_deref().unwrap();
5455        assert!(
5456            key.contains("email"),
5457            "expected `email` in stale key, got {key:?}"
5458        );
5459        assert!(!key.contains("summary"), "summary still matches: {key:?}");
5460        assert!(!key.contains("type"), "type still matches: {key:?}");
5461    }
5462
5463    /// Broaden the guard across the typed/list/timestamp projections at once:
5464    /// a wrong `tags`, `updated`, and a custom dedup field (`amount`) are each
5465    /// caught, with all three named in one issue.
5466    #[test]
5467    fn index_jsonl_stale_typed_and_list_fields() {
5468        let fx = Fixture::new();
5469        let expense = "---\ntype: expense\ncreated: 2026-05-20T08:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"office chairs\"\ntags: [furniture, q2]\namount: 1299\nvendor: Acme\ndate: 2026-05-20\n---\n\n# Expense\n";
5470        fx.write("records/expenses/e.md", expense);
5471        fx.rebuild_indexes();
5472        let jsonl_path = fx.dir.path().join("records/expenses/index.jsonl");
5473        let good = fs::read_to_string(&jsonl_path).unwrap();
5474        assert!(
5475            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
5476            "freshly-rebuilt sidecar must not be stale"
5477        );
5478        // Desync a list field (tags), a timestamp (updated), and a number (amount).
5479        let stale_line = good
5480            .replace("\"q2\"", "\"WRONG-TAG\"")
5481            .replace("2026-05-22T10:00:00-07:00", "2099-01-01T00:00:00-07:00")
5482            .replace("1299", "9999");
5483        fx.write("records/expenses/index.jsonl", &stale_line);
5484
5485        let issues = fx.store_all();
5486        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5487        let key = stale.key.as_deref().unwrap();
5488        for expected in ["amount", "tags", "updated"] {
5489            assert!(
5490                key.contains(expected),
5491                "expected `{expected}` in stale key, got {key:?}"
5492            );
5493        }
5494    }
5495
5496    #[test]
5497    fn index_orphan_in_noncanonical_folder() {
5498        let fx = Fixture::new();
5499        fx.write("records/contacts/a.md", &valid_contact("a"));
5500        // Build the canonical indexes so they aren't reported as orphans.
5501        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5502        fx.write(
5503            "records/index.md",
5504            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5505        );
5506        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
5507        fx.write(
5508            "records/contacts/index.jsonl",
5509            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5510        );
5511        // An index.md inside a sub-sub-folder (operator territory) is an orphan.
5512        fx.write(
5513            "records/contacts/subfolder/index.md",
5514            "---\ntype: index\nscope: type-folder\n---\n\n# stray\n",
5515        );
5516        let issues = fx.store_all();
5517        let orphan = find(&issues, codes::INDEX_ORPHAN);
5518        assert_eq!(orphan.severity, Severity::Warning);
5519        assert_eq!(
5520            orphan.file,
5521            PathBuf::from("records/contacts/subfolder/index.md")
5522        );
5523    }
5524
5525    #[test]
5526    fn index_wrong_scope() {
5527        let fx = Fixture::new();
5528        fx.write("records/contacts/a.md", &valid_contact("a"));
5529        // Root index declares the wrong scope.
5530        fx.write("index.md", "---\ntype: index\nscope: layer\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5531        fx.write(
5532            "records/index.md",
5533            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5534        );
5535        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
5536        fx.write(
5537            "records/contacts/index.jsonl",
5538            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5539        );
5540        let issues = fx.store_all();
5541        let issue = find(&issues, codes::INDEX_WRONG_SCOPE);
5542        assert_eq!(issue.severity, Severity::Warning);
5543        assert_eq!(issue.file, PathBuf::from("index.md"));
5544    }
5545
5546    #[test]
5547    fn capped_type_folder_index_does_not_flag_missing_entries() {
5548        // Over the 500-entry cap, omitted entries are expected, not an error.
5549        let fx = Fixture::new();
5550        for i in 0..501 {
5551            fx.write(
5552                &format!("records/contacts/c{i:04}.md"),
5553                &valid_contact(&format!("contact {i}")),
5554            );
5555        }
5556        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (501 files)\n");
5557        fx.write(
5558            "records/index.md",
5559            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5560        );
5561        // Type-folder index lists only ONE entry + a More footer.
5562        fx.write(
5563            "records/contacts/index.md",
5564            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/c0000]] — contact 0\n\n## More\n\nThis folder has 501 files.\n",
5565        );
5566        // jsonl must still be complete — write all 501 lines.
5567        let mut jsonl = String::new();
5568        for i in 0..501 {
5569            jsonl.push_str(&format!(
5570                "{{\"path\":\"records/contacts/c{i:04}.md\",\"type\":\"contact\",\"summary\":\"contact {i}\"}}\n"
5571            ));
5572        }
5573        fx.write("records/contacts/index.jsonl", &jsonl);
5574        let issues = fx.store_all();
5575        assert!(
5576            !has(&issues, codes::INDEX_MISSING_ENTRY),
5577            "over the cap, missing browse entries are expected: {issues:#?}"
5578        );
5579        // But the jsonl is complete → no desync.
5580        assert!(
5581            !has(&issues, codes::INDEX_JSONL_DESYNC),
5582            "{:#?}",
5583            issues
5584                .iter()
5585                .filter(|i| i.code == codes::INDEX_JSONL_DESYNC)
5586                .collect::<Vec<_>>()
5587        );
5588    }
5589
5590    // ── log ────────────────────────────────────────────────────────────────
5591
5592    #[test]
5593    fn log_bad_timestamp_unknown_kind_out_of_order() {
5594        let fx = Fixture::new();
5595        fx.write(
5596            "log.md",
5597            concat!(
5598                "---\ntype: log\n---\n\n# Log\n\n",
5599                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5600                "## [2026-05-27 09:00] update | records/contacts/b\nx\n\n", // out of order
5601                "## [2026-05-27 11:00] frobnicate | records/contacts/c\nx\n\n", // unknown kind
5602                "## [not-a-date] create | records/contacts/d\nx\n",         // bad timestamp
5603            ),
5604        );
5605        let issues = fx.store_all();
5606        assert!(has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
5607        assert_eq!(
5608            find(&issues, codes::LOG_OUT_OF_ORDER).severity,
5609            Severity::Warning
5610        );
5611        let unknown = find(&issues, codes::LOG_UNKNOWN_KIND);
5612        assert_eq!(unknown.severity, Severity::Warning);
5613        assert!(unknown.message.contains("frobnicate"));
5614        assert!(unknown
5615            .suggestion
5616            .as_deref()
5617            .is_some_and(|s| s.contains("create")));
5618        let bad = find(&issues, codes::LOG_BAD_TIMESTAMP);
5619        assert!(bad.is_error());
5620    }
5621
5622    #[test]
5623    fn log_validate_entry_without_object_is_well_formed() {
5624        let fx = Fixture::new();
5625        fx.write(
5626            "log.md",
5627            "---\ntype: log\n---\n\n## [2026-05-27 10:00] validate\nPASS\n",
5628        );
5629        let issues = fx.store_all();
5630        assert!(!has(&issues, codes::LOG_BAD_TIMESTAMP), "{issues:#?}");
5631        assert!(!has(&issues, codes::LOG_UNKNOWN_KIND), "{issues:#?}");
5632    }
5633
5634    #[test]
5635    fn log_in_order_is_clean() {
5636        let fx = Fixture::new();
5637        fx.write(
5638            "log.md",
5639            concat!(
5640                "---\ntype: log\n---\n\n",
5641                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5642                "## [2026-05-27 10:05] update | records/contacts/a\nx\n",
5643            ),
5644        );
5645        let issues = fx.store_all();
5646        assert!(!has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
5647    }
5648
5649    #[test]
5650    fn log_not_checked_in_working_set() {
5651        // log.md ordering is an --all-only check.
5652        let fx = Fixture::new();
5653        fx.write(
5654            "log.md",
5655            concat!(
5656                "---\ntype: log\n---\n\n",
5657                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5658                "## [2026-05-27 09:00] update | records/contacts/a\nx\n",
5659            ),
5660        );
5661        let issues = validate_working_set(&fx.store(), None).unwrap();
5662        assert!(
5663            !has(&issues, codes::LOG_OUT_OF_ORDER),
5664            "log ordering is --all only: {issues:#?}"
5665        );
5666    }
5667
5668    // ── working-set scoping ───────────────────────────────────────────────────
5669
5670    #[test]
5671    fn working_set_validates_only_changed_files() {
5672        let fx = Fixture::new();
5673        // `dirty` has a bad timestamp; `clean_but_unlogged` also does but is NOT
5674        // in the log → working set must skip it.
5675        fx.write(
5676            "records/contacts/dirty.md",
5677            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5678        );
5679        fx.write(
5680            "records/contacts/unlogged.md",
5681            "---\ntype: contact\ncreated: ALSO-BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5682        );
5683        fx.write(
5684            "log.md",
5685            "---\ntype: log\n---\n\n## [2026-05-22 10:00] update | records/contacts/dirty\nedited\n",
5686        );
5687        let issues = validate_working_set(&fx.store(), None).unwrap();
5688        assert!(
5689            issues.iter().any(|i| i.code == codes::FM_BAD_TIMESTAMP
5690                && i.file == Path::new("records/contacts/dirty.md")),
5691            "{issues:#?}"
5692        );
5693        assert!(
5694            !issues
5695                .iter()
5696                .any(|i| i.file == Path::new("records/contacts/unlogged.md")),
5697            "unlogged file must not be in the working set: {issues:#?}"
5698        );
5699    }
5700
5701    #[test]
5702    fn working_set_includes_incoming_linkers_to_changed_path() {
5703        let fx = Fixture::new();
5704        // `changed` was renamed/removed (logged). `linker` points at it with a
5705        // now-broken link and was NOT itself logged — but must be pulled in.
5706        fx.write(
5707            "records/profiles/linker.md",
5708            "---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: links to a removed page\n---\n\nSee [[records/contacts/changed]].\n",
5709        );
5710        // `changed.md` does NOT exist on disk (removed).
5711        fx.write(
5712            "log.md",
5713            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/changed\nremoved\n",
5714        );
5715        let issues = validate_working_set(&fx.store(), None).unwrap();
5716        assert!(
5717            issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN
5718                && i.file == Path::new("records/profiles/linker.md")),
5719            "incoming linker to a removed path must be validated: {issues:#?}"
5720        );
5721    }
5722
5723    #[test]
5724    fn working_set_respects_explicit_since_cutoff() {
5725        let fx = Fixture::new();
5726        fx.write(
5727            "records/contacts/old.md",
5728            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5729        );
5730        fx.write(
5731            "records/contacts/new.md",
5732            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5733        );
5734        fx.write(
5735            "log.md",
5736            concat!(
5737                "---\ntype: log\n---\n\n",
5738                "## [2026-05-20 10:00] update | records/contacts/old\nx\n\n",
5739                "## [2026-05-25 10:00] update | records/contacts/new\nx\n",
5740            ),
5741        );
5742        // Cutoff after `old` but before `new`.
5743        let since = DateTime::parse_from_rfc3339("2026-05-22T00:00:00+00:00").unwrap();
5744        let issues = validate_working_set(&fx.store(), Some(since)).unwrap();
5745        assert!(
5746            issues
5747                .iter()
5748                .any(|i| i.file == Path::new("records/contacts/new.md")),
5749            "{issues:#?}"
5750        );
5751        assert!(
5752            !issues
5753                .iter()
5754                .any(|i| i.file == Path::new("records/contacts/old.md")),
5755            "old change is before the cutoff: {issues:#?}"
5756        );
5757    }
5758
5759    #[test]
5760    fn working_set_default_since_is_last_validate_entry() {
5761        let fx = Fixture::new();
5762        // `before` changed before the last validate; `after` changed after.
5763        fx.write(
5764            "records/contacts/before.md",
5765            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5766        );
5767        fx.write(
5768            "records/contacts/after.md",
5769            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5770        );
5771        fx.write(
5772            "log.md",
5773            concat!(
5774                "---\ntype: log\n---\n\n",
5775                "## [2026-05-20 10:00] update | records/contacts/before\nx\n\n",
5776                "## [2026-05-21 10:00] validate\nPASS\n\n",
5777                "## [2026-05-22 10:00] update | records/contacts/after\nx\n",
5778            ),
5779        );
5780        let issues = validate_working_set(&fx.store(), None).unwrap();
5781        assert!(
5782            issues
5783                .iter()
5784                .any(|i| i.file == Path::new("records/contacts/after.md")),
5785            "{issues:#?}"
5786        );
5787        assert!(
5788            !issues
5789                .iter()
5790                .any(|i| i.file == Path::new("records/contacts/before.md")),
5791            "change before the last validate entry is outside the default window: {issues:#?}"
5792        );
5793    }
5794
5795    // ── ordering / determinism ────────────────────────────────────────────────
5796
5797    #[test]
5798    fn issues_are_sorted_by_file_then_line() {
5799        let fx = Fixture::new();
5800        fx.write("records/profiles/z.md", "---\ntype: profile\nmeta-type: conclusion\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
5801        fx.write("records/profiles/a.md", "---\ntype: profile\nmeta-type: conclusion\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
5802        let issues = fx.store_all();
5803        let files: Vec<&PathBuf> = issues.iter().map(|i| &i.file).collect();
5804        let mut sorted = files.clone();
5805        sorted.sort();
5806        assert_eq!(
5807            files, sorted,
5808            "issues must be emitted in a stable file order"
5809        );
5810    }
5811
5812    // ── boundaries: codes validate must NOT emit ──────────────────────────────
5813
5814    #[test]
5815    fn frozen_page_is_not_a_validate_error() {
5816        // POLICY_FROZEN_PAGE is a *write-time* refusal, never a validate finding.
5817        // A clean file listed in `### Frozen pages` must validate clean.
5818        let mut fx = Fixture::new();
5819        fx.config
5820            .frozen_pages
5821            .push(PathBuf::from("records/decisions/d.md"));
5822        fx.write(
5823            "records/decisions/d.md",
5824            "---\ntype: decision\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a finalized decision\n---\n\n# D\n",
5825        );
5826        let issues = fx.store_all();
5827        assert!(
5828            !has(&issues, codes::POLICY_FROZEN_PAGE),
5829            "frozen pages are enforced at write-time, not by validate: {issues:#?}"
5830        );
5831    }
5832
5833    #[test]
5834    fn wiki_link_ambiguous_is_never_emitted_under_full_path_doctrine() {
5835        // The full-path doctrine makes ambiguity impossible; the defensive code
5836        // must never fire on a normal store.
5837        let fx = Fixture::new();
5838        fx.write("records/contacts/sarah-chen.md", &valid_contact("sarah"));
5839        let mut body = valid_contact("links to sarah");
5840        body.push_str("\nSee [[records/contacts/sarah-chen]].\n");
5841        fx.write("records/contacts/p.md", &body);
5842        let issues = fx.store_all();
5843        assert!(!has(&issues, codes::WIKI_LINK_AMBIGUOUS), "{issues:#?}");
5844    }
5845
5846    // ── unknown-type / unknown-field passthrough ──────────────────────────────
5847
5848    #[test]
5849    fn unknown_type_passes_through() {
5850        // A custom type is ambient context: it has a `type`, so no
5851        // FM_MISSING_TYPE, and with no matching schema there are no schema
5852        // errors. Only the universal contract (summary, timestamps) applies.
5853        let fx = Fixture::new();
5854        fx.write(
5855            "records/proposals/x.md",
5856            "---\ntype: proposal\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a proposal\ncustom_field: anything\nbudget: 5000\n---\n\n# Proposal\n",
5857        );
5858        let issues = fx.store_all();
5859        assert!(!has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
5860        assert!(!has(&issues, codes::SCHEMA_MISSING_REQUIRED), "{issues:#?}");
5861        assert!(!has(&issues, codes::SCHEMA_SHAPE_MISMATCH), "{issues:#?}");
5862        // The unknown fields don't trip anything.
5863        assert!(
5864            !issues
5865                .iter()
5866                .any(|i| i.key.as_deref() == Some("custom_field")
5867                    || i.key.as_deref() == Some("budget")),
5868            "unknown fields are ambient context: {issues:#?}"
5869        );
5870    }
5871
5872    // ── find_links_to prefix-collision safety (working set) ───────────────────
5873
5874    #[test]
5875    fn incoming_linker_scan_does_not_prefix_match() {
5876        // A changed `records/contacts/sarah` must NOT pull in a file that only
5877        // links to `records/contacts/sarah-chen` (a longer path sharing a prefix).
5878        let fx = Fixture::new();
5879        fx.write(
5880            "records/profiles/only-sarah-chen.md",
5881            "---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
5882        );
5883        // The log says `records/contacts/sarah` (the shorter path) changed.
5884        fx.write(
5885            "log.md",
5886            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah\nremoved\n",
5887        );
5888        let issues = validate_working_set(&fx.store(), None).unwrap();
5889        assert!(
5890            !issues
5891                .iter()
5892                .any(|i| i.file == Path::new("records/profiles/only-sarah-chen.md")),
5893            "a prefix-sharing link must not pull a file into the working set: {issues:#?}"
5894        );
5895    }
5896
5897    #[test]
5898    fn working_set_does_not_flag_stale_catalog_index_as_wiki_link_broken() {
5899        // The working-set incoming-linker scan rides embedded-ripgrep
5900        // `Store::find_links_to`, which scans EVERY `.md` — so a type-folder
5901        // `index.md` listing a now-deleted target IS pulled into the working set.
5902        // But its entries are GENERATED catalog entries, not authored body links:
5903        // a dangling one is an `INDEX_STALE_ENTRY` ("run `dbmd index rebuild`"),
5904        // the job of `check_indexes` under `--all` — NOT a `WIKI_LINK_BROKEN`
5905        // ("create the target"), whose remedy would steer an agent to recreate
5906        // the very data it just deleted. The loop default must therefore NOT
5907        // body-link-check the derived catalog (index integrity is an O(store)
5908        // sweep concern, not an O(changed) loop concern). Adversarial review #11:
5909        // the prior behavior gave WIKI_LINK_BROKEN here while `--all` gave
5910        // INDEX_STALE_ENTRY for the identical condition — two codes, opposite
5911        // remedies, across the loop default vs the sweep.
5912        let fx = Fixture::new();
5913        // A catalog that still lists the deleted contact (a real, common stale
5914        // state after an out-of-band `delete`).
5915        fx.write(
5916            "records/contacts/index.md",
5917            "---\ntype: index\n---\n\n- [[records/contacts/sarah-chen]] — Sarah Chen\n",
5918        );
5919        // The log says `records/contacts/sarah-chen` was deleted.
5920        fx.write(
5921            "log.md",
5922            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n",
5923        );
5924        let issues = validate_working_set(&fx.store(), None).unwrap();
5925        assert!(
5926            !issues
5927                .iter()
5928                .any(|i| i.file == Path::new("records/contacts/index.md")
5929                    && i.code == codes::WIKI_LINK_BROKEN),
5930            "a stale catalog `index.md` entry must NOT be WIKI_LINK_BROKEN in the \
5931             working set (it is an INDEX_STALE_ENTRY under `--all`): {issues:#?}"
5932        );
5933    }
5934
5935    #[test]
5936    fn incoming_linker_scan_covers_the_whole_changed_set_in_one_pass() {
5937        // CONTRACT (the O(changed × store) fix): the working-set scan finds
5938        // incoming linkers for EVERY changed object, and does so via the single
5939        // batch pass `Store::find_links_to_any` — not one full store read per
5940        // changed object. This test pins the behavior that makes the single-pass
5941        // correct: with two DISTINCT deleted targets, the linker to EACH is pulled
5942        // into the working set and flagged. A regression that scanned for only the
5943        // first/last changed object, or that dropped the batch union, would leave
5944        // one of the two broken links unreported and fail here.
5945        let fx = Fixture::new();
5946        // Linker A → deleted target #1 (in the body).
5947        fx.write(
5948            "records/profiles/refers-sarah.md",
5949            "---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
5950        );
5951        // Linker B → deleted target #2 (in a typed frontmatter field — an edge the
5952        // sidecar `links` projection would miss, which is why this must be a
5953        // content scan, not a sidecar read).
5954        fx.write(
5955            "records/meetings/2026/05/kickoff.md",
5956            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\ncompany: \"[[records/companies/acme]]\"\n---\n\n# Kickoff\n",
5957        );
5958        // The log says BOTH targets were deleted in this window.
5959        fx.write(
5960            "log.md",
5961            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n\n## [2026-05-22 10:05] delete | records/companies/acme\nremoved\n",
5962        );
5963
5964        let issues = validate_working_set(&fx.store(), None).unwrap();
5965        assert!(
5966            issues
5967                .iter()
5968                .any(|i| i.file == Path::new("records/profiles/refers-sarah.md")
5969                    && i.code == codes::WIKI_LINK_BROKEN),
5970            "linker to the FIRST deleted target must be pulled in and flagged: {issues:#?}"
5971        );
5972        assert!(
5973            issues.iter().any(
5974                |i| i.file == Path::new("records/meetings/2026/05/kickoff.md")
5975                    && i.code == codes::WIKI_LINK_BROKEN
5976            ),
5977            "linker to the SECOND deleted target (typed-field edge) must also be \
5978             pulled in and flagged — proves the scan covers the whole changed set, \
5979             not just one object: {issues:#?}"
5980        );
5981    }
5982
5983    #[test]
5984    fn frontmatter_block_sequence_links_each_get_their_own_line() {
5985        // Each block-sequence wiki-link reports on its own source line.
5986        let fx = Fixture::new();
5987        // Neither target exists → two WIKI_LINK_BROKEN, on different lines.
5988        fx.write(
5989            "records/meetings/m.md",
5990            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nparticipants:\n  - [[records/contacts/ghost1]]\n  - [[records/contacts/ghost2]]\n---\n\n# M\n",
5991        );
5992        let issues = fx.store_all();
5993        let broken_lines: BTreeSet<Option<u32>> = issues
5994            .iter()
5995            .filter(|i| i.code == codes::WIKI_LINK_BROKEN)
5996            .map(|i| i.line)
5997            .collect();
5998        assert_eq!(
5999            broken_lines.len(),
6000            2,
6001            "two distinct broken-link lines: {issues:#?}"
6002        );
6003    }
6004
6005    // ── Regression: null / non-scalar created/updated ────────────────────────
6006
6007    #[test]
6008    fn null_created_is_missing_not_silently_passed() {
6009        // Regression: a present-but-`null` `created:` previously slipped past
6010        // both FM_MISSING_CREATED (only `!contains_key` was checked) and
6011        // FM_BAD_TIMESTAMP (`scalar_string(null)` is None → branch no-oped).
6012        let fx = Fixture::new();
6013        fx.write(
6014            "records/contacts/a.md",
6015            "---\ntype: contact\ncreated:\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
6016        );
6017        let issues = fx.store_all();
6018        assert!(
6019            has(&issues, codes::FM_MISSING_CREATED),
6020            "null `created:` must read as missing: {issues:#?}"
6021        );
6022    }
6023
6024    #[test]
6025    fn sequence_created_is_bad_timestamp() {
6026        // A non-scalar `created: [2026]` is not a timestamp string → FM_BAD_TIMESTAMP.
6027        let fx = Fixture::new();
6028        fx.write(
6029            "records/contacts/a.md",
6030            "---\ntype: contact\ncreated: [2026]\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
6031        );
6032        let issues = fx.store_all();
6033        assert!(
6034            issues
6035                .iter()
6036                .any(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created")),
6037            "a sequence `created:` must be FM_BAD_TIMESTAMP: {issues:#?}"
6038        );
6039    }
6040
6041    // ── Regression: schema required null / empty-collection ──────────────────
6042
6043    #[test]
6044    fn required_field_null_or_empty_collection_is_missing() {
6045        // Regression: a plain required field (no shape/enum) holding YAML null
6046        // (`name:`), an empty list (`name: []`), or an empty mapping (`name: {}`)
6047        // previously validated with 0 issues — `scalar_string` returned None and
6048        // `.unwrap_or(false)` treated the value as non-empty.
6049        for value in ["", " []", " {}"] {
6050            let mut fx = Fixture::new();
6051            fx.config.schemas.insert(
6052                "contact".into(),
6053                Schema {
6054                    fields: vec![FieldSpec {
6055                        name: "name".into(),
6056                        required: true,
6057                        ..Default::default()
6058                    }],
6059                    ..Default::default()
6060                },
6061            );
6062            fx.write(
6063                "records/contacts/a.md",
6064                &format!(
6065                    "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname:{value}\n---\n\n# A\n"
6066                ),
6067            );
6068            let issues = fx.store_all();
6069            assert!(
6070                issues
6071                    .iter()
6072                    .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
6073                        && i.key.as_deref() == Some("name")),
6074                "required `name:{value}` must be SCHEMA_MISSING_REQUIRED: {issues:#?}"
6075            );
6076        }
6077    }
6078
6079    // ── Regression: WIKI_LINK_BROKEN on raw source files ─────────────────────
6080
6081    #[test]
6082    fn wiki_link_to_raw_source_file_resolves() {
6083        // Regression: a body link to a raw `.eml`/`.pdf` source kept verbatim
6084        // under `sources/` was flagged WIKI_LINK_BROKEN because the existence
6085        // probe only ever stat'd `{bare}.md`. It must resolve the literal path.
6086        let fx = Fixture::new();
6087        fx.write("sources/emails/2026-05-22-elena.eml", "raw email bytes\n");
6088        fx.write(
6089            "records/contacts/a.md",
6090            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\nSee [[sources/emails/2026-05-22-elena.eml]] for context.\n",
6091        );
6092        let issues = fx.store_all();
6093        assert!(
6094            !issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN),
6095            "a link to an existing raw source file must not be broken: {issues:#?}"
6096        );
6097    }
6098
6099    // ── Regression: unreadable (non-UTF-8) content file ──────────────────────
6100
6101    #[test]
6102    fn non_utf8_content_file_is_reported() {
6103        // Regression: a content file with invalid UTF-8 bytes made
6104        // check_content_file return None silently, so the store passed with exit
6105        // 0. It must surface FM_UNREADABLE instead of passing vacuously.
6106        let fx = Fixture::new();
6107        let abs = fx.dir.path().join("records/notes/corrupt.md");
6108        fs::create_dir_all(abs.parent().unwrap()).unwrap();
6109        fs::write(&abs, [0xFF, 0xFE, 0x00, 0x01]).unwrap();
6110        let issues = validate_working_set(&fx.store(), None).unwrap();
6111        assert!(
6112            has(&issues, codes::FM_UNREADABLE),
6113            "an unreadable content file must be reported, not silently skipped: {issues:#?}"
6114        );
6115    }
6116
6117    // ── Regression: code-fence char/run tracking ─────────────────────────────
6118
6119    #[test]
6120    fn tilde_fence_containing_backtick_fence_does_not_invert() {
6121        // Regression: a `~~~` block legally contains ``` lines (documenting a
6122        // backtick fence); a naive toggle inverted `in_fence` and checked the
6123        // demo `[[fake]]` inside the code block as a live link. The link inside
6124        // BOTH fences must be skipped.
6125        let body = "~~~markdown\n```\n[[fake-link]]\n```\n~~~\n";
6126        let links = extract_wiki_links(body);
6127        assert!(
6128            links.is_empty(),
6129            "wiki-link inside a nested code fence must be skipped: {links:?}"
6130        );
6131    }
6132
6133    // ── Regression: --all skips in-layer `log/` folder ───────────────────────
6134
6135    #[test]
6136    fn all_sweep_visits_in_layer_log_folder() {
6137        // Regression: `validate --all` pruned every dir named `log`, so a real
6138        // content folder like `records/log/` was invisible to the full sweep —
6139        // reporting FEWER errors than the default scope. A frontmatter-less file
6140        // there must still surface FM_MISSING_TYPE under --all.
6141        let fx = Fixture::new();
6142        fx.write("records/log/2026-06-01-pricing.md", "no frontmatter here\n");
6143        let issues = fx.store_all();
6144        assert!(
6145            has(&issues, codes::FM_MISSING_TYPE),
6146            "--all must validate files under an in-layer `log/` folder: {issues:#?}"
6147        );
6148    }
6149
6150    // ── Regression: flow-form list with whitespace ───────────────────────────
6151
6152    #[test]
6153    fn flow_form_link_list_with_spaces_is_flagged() {
6154        // Regression: `attendees: [ [[a]] ]` parses to the same nested-sequence
6155        // mis-encoding as `[[[a]]]` but evaded the literal `starts_with("[[[")`
6156        // text test. The value-based detector must catch the whitespace variant.
6157        let keys = detect_flow_form_link_lists("attendees: [ [[records/contacts/elena]] ]\n");
6158        assert!(
6159            keys.iter().any(|k| k == "attendees"),
6160            "spaced flow-form list must be detected: {keys:?}"
6161        );
6162    }
6163
6164    // ── Regression: INDEX_SUMMARY_MISMATCH middot tail ───────────────────────
6165
6166    #[test]
6167    fn middot_hashtag_summary_tail_round_trips() {
6168        // Regression: a tagless summary that legitimately ends in a single-spaced
6169        // ` · #word` tail round-trips through the renderer verbatim, but the loose
6170        // ` · ` strip mistook it for the tag block and reported a spurious,
6171        // unfixable INDEX_SUMMARY_MISMATCH. The strip must use the renderer's
6172        // exact double-spaced `  ·  ` delimiter.
6173        assert_eq!(
6174            extract_index_entry_summary("— Standup notes · #standup").as_deref(),
6175            Some("Standup notes · #standup"),
6176            "a single-spaced middot tail is part of the summary, not a tag block"
6177        );
6178        // The renderer's real double-spaced tag suffix IS still stripped.
6179        assert_eq!(
6180            extract_index_entry_summary("— Renewal champion  ·  #renewal #acme").as_deref(),
6181            Some("Renewal champion"),
6182            "the renderer's double-spaced `  ·  #tag` suffix is stripped"
6183        );
6184    }
6185
6186    // ── Regression: shape Url / Email edge cases ─────────────────────────────
6187
6188    #[test]
6189    fn url_shape_accepts_short_http_and_rejects_bare_scheme() {
6190        assert!(is_url("http://x"), "an 8-char http URL is valid");
6191        assert!(is_url("https://x"), "a 9-char https URL is valid");
6192        assert!(!is_url("http://"), "a bare scheme with no host is rejected");
6193        assert!(!is_url("https://"), "a bare https scheme is rejected");
6194    }
6195
6196    #[test]
6197    fn email_shape_rejects_double_at() {
6198        assert!(!is_email("sarah@@acme.com"), "double-@ domain is rejected");
6199        assert!(!is_email("a@b@c.com"), "two @ signs are rejected");
6200        assert!(is_email("sarah@acme.com"), "a normal address still passes");
6201    }
6202
6203    // ── Regression: working-set vs --all agree on log.md links ───────────────
6204
6205    #[test]
6206    fn working_set_does_not_flag_log_md_body_links() {
6207        // Regression: the working-set incoming-linker scan runs root `log.md`
6208        // through the body wiki-link check, flagging a historical `[[deleted]]`
6209        // mention as WIKI_LINK_BROKEN — an error `--all` never reports and that
6210        // the append-only log can't have "fixed". The root meta files must be
6211        // excluded from the body link check, matching --all.
6212        let fx = Fixture::new();
6213        fx.write("records/contacts/a.md", &valid_contact("A"));
6214        fx.write(
6215            "log.md",
6216            "---\ntype: log\n---\n\n## [2026-06-01 10:00] delete | records/contacts/ghost\n\nRemoved [[records/contacts/ghost]] per cleanup.\n",
6217        );
6218        let issues = validate_working_set(&fx.store(), None).unwrap();
6219        assert!(
6220            !issues
6221                .iter()
6222                .any(|i| i.code == codes::WIKI_LINK_BROKEN
6223                    && i.file == std::path::Path::new("log.md")),
6224            "a broken wiki-link inside append-only log.md must not be flagged: {issues:#?}"
6225        );
6226    }
6227
6228    // ── Regression: DB.md schema field lint ──────────────────────────────────
6229
6230    #[test]
6231    fn schema_duplicate_field_name_is_flagged() {
6232        let mut fx = Fixture::new();
6233        fx.config.schemas.insert(
6234            "contact".into(),
6235            Schema {
6236                fields: vec![
6237                    FieldSpec {
6238                        name: "name".into(),
6239                        required: true,
6240                        ..Default::default()
6241                    },
6242                    FieldSpec {
6243                        name: "name".into(),
6244                        ..Default::default()
6245                    },
6246                ],
6247                ..Default::default()
6248            },
6249        );
6250        let issues = fx.store_all();
6251        assert!(
6252            issues
6253                .iter()
6254                .any(|i| i.code == codes::DB_MD_SCHEMA_FIELD && i.key.as_deref() == Some("name")),
6255            "a duplicate schema field name must be flagged: {issues:#?}"
6256        );
6257    }
6258
6259    #[test]
6260    fn schema_unknown_modifier_is_info() {
6261        let mut fx = Fixture::new();
6262        fx.config.schemas.insert(
6263            "contact".into(),
6264            Schema {
6265                fields: vec![FieldSpec {
6266                    name: "name".into(),
6267                    unknown_modifiers: vec!["requierd".into()],
6268                    ..Default::default()
6269                }],
6270                ..Default::default()
6271            },
6272        );
6273        let issues = fx.store_all();
6274        assert!(
6275            issues.iter().any(|i| i.code == codes::DB_MD_SCHEMA_FIELD
6276                && i.severity == Severity::Info
6277                && i.key.as_deref() == Some("name")),
6278            "an unrecognized schema modifier must surface as Info: {issues:#?}"
6279        );
6280    }
6281
6282    /// Every code in `mod codes` must appear as a row in SPEC.md § Validation —
6283    /// the SPEC table is the declared "complete vocabulary" an agent branches on,
6284    /// and the module doc-comment promises this code implements "exactly those
6285    /// codes — no more, no fewer." This guards against the code/SPEC drift where a
6286    /// new validation code is added to the engine but never documented.
6287    #[test]
6288    fn every_code_constant_is_documented_in_spec() {
6289        // Parse the canonical constant *values* straight out of this module's
6290        // source, so a future `pub const X: &str = "X";` is covered with no test
6291        // edit. Format is uniform: `    pub const NAME: &str = "VALUE";`.
6292        let this_src = include_str!("validate.rs");
6293        let mut codes_in_module: Vec<String> = Vec::new();
6294        let mut in_codes_mod = false;
6295        for line in this_src.lines() {
6296            let t = line.trim();
6297            if t.starts_with("pub mod codes") {
6298                in_codes_mod = true;
6299                continue;
6300            }
6301            // The `mod codes` block ends at its closing brace at column 0.
6302            if in_codes_mod && line == "}" {
6303                break;
6304            }
6305            if in_codes_mod {
6306                if let Some(rest) = t.strip_prefix("pub const ") {
6307                    // rest = `NAME: &str = "VALUE";`
6308                    let value = rest
6309                        .split_once('=')
6310                        .map(|(_, v)| v.trim())
6311                        .and_then(|v| v.strip_prefix('"'))
6312                        .and_then(|v| v.strip_suffix("\";"))
6313                        .unwrap_or_else(|| panic!("unparseable code constant line: {line:?}"));
6314                    codes_in_module.push(value.to_string());
6315                }
6316            }
6317        }
6318        assert!(
6319            codes_in_module.len() >= 36,
6320            "parsed only {} code constants from `mod codes`; the parser likely \
6321             broke against a source-format change",
6322            codes_in_module.len()
6323        );
6324
6325        // SPEC.md lives at the repo root, two levels up from this crate's manifest.
6326        let spec_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../../SPEC.md");
6327        let spec = fs::read_to_string(&spec_path)
6328            .unwrap_or_else(|e| panic!("cannot read {}: {e}", spec_path.display()));
6329
6330        // Each code must appear as a SPEC § Validation table cell: `` | `CODE` | ``.
6331        let missing: Vec<&String> = codes_in_module
6332            .iter()
6333            .filter(|code| !spec.contains(&format!("| `{code}` |")))
6334            .collect();
6335        assert!(
6336            missing.is_empty(),
6337            "validation codes emitted by the engine but absent from SPEC.md \
6338             § Validation (the declared complete vocabulary): {missing:?}"
6339        );
6340    }
6341}