Skip to main content

dbmd_core/
validate.rs

1//! `validate` — the validation engine.
2//!
3//! The canonical issue-code vocabulary is **SPEC.md § Validation** (that table
4//! is the single source of truth). This module implements exactly those codes
5//! — no more, no fewer. If a code is added here it must be added to the SPEC
6//! table in the same change. The codes are exposed as the [`codes`] constants
7//! so call sites never spell a code as a bare string literal.
8//!
9//! **Two scopes.** [`validate_working_set`] is the loop default: content files
10//! changed since `since`, plus any file whose wiki-links target a changed path.
11//! The changed set and the per-file checks are O(changed); the incoming linkers
12//! are found by a *single* embedded-ripgrep pass over the store for the whole
13//! changed set at once ([`Store::find_links_to_any`], one scan — not a full read
14//! per changed object, and not the parse-the-tree walk `--all` does). It never
15//! calls [`Store::walk`] and never builds the global cross-file state.
16//! [`validate_all`] is the full SWEEP: it adds the checks that need that global
17//! state — entity-dedup `DUP_*`, every-index sync, and `log.md` ordering.
18//!
19//! ## Why this module is self-contained
20//!
21//! Validation does its own frontmatter split, YAML parse, wiki-link scan,
22//! log-header parse, and file walk here, reading only the two public,
23//! caller-populated fields of a [`Store`]: [`Store::root`] and
24//! [`Store::config`] — rather than routing through the sibling modules
25//! ([`crate::parser`], [`crate::store`], [`crate::log`], [`crate::index`]).
26//! Keeping the checks local lets the validator report precise, per-issue
27//! diagnostics (exact codes, file, and context) without coupling its output to
28//! incidental behavior of the shared readers; the public surface and the
29//! emitted issue vocabulary are the contract.
30
31use std::collections::{BTreeMap, BTreeSet, HashMap};
32use std::path::{Path, PathBuf};
33
34use chrono::{DateTime, FixedOffset, NaiveDateTime};
35use serde_yml::Value;
36
37use crate::parser::{FieldSpec, Schema, Shape};
38use crate::store::Store;
39
40/// Severity of a validation [`Issue`]. Any [`Severity::Error`] fails validation
41/// (non-zero exit); warnings and info do not.
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub enum Severity {
44    /// Blocks: a hard violation of the format or doctrine.
45    Error,
46    /// A decision point the agent resolves at its discretion.
47    Warning,
48    /// Visibility only; never affects exit status.
49    Info,
50}
51
52/// A single structured validation finding. Agent-primary and machine-parseable
53/// via `--json`; `suggestion` is a deterministic remediation hint the agent
54/// applies without guessing.
55#[derive(Debug, Clone, PartialEq, Eq)]
56pub struct Issue {
57    /// The severity; only [`Severity::Error`] fails validation.
58    pub severity: Severity,
59    /// The structured code, e.g. `"WIKI_LINK_SHORT_FORM"` — one of [`codes`].
60    pub code: &'static str,
61    /// The file the issue is about.
62    pub file: PathBuf,
63    /// The 1-based line, when applicable.
64    pub line: Option<u32>,
65    /// The frontmatter key, when the issue is about a specific field.
66    pub key: Option<String>,
67    /// A human-readable message.
68    pub message: String,
69    /// A deterministic remediation hint, when one exists.
70    pub suggestion: Option<String>,
71    /// Other files involved (e.g. the duplicate partner in a collision).
72    pub related: Vec<PathBuf>,
73}
74
75impl Issue {
76    /// True if this issue fails validation (i.e. its severity is
77    /// [`Severity::Error`]).
78    pub fn is_error(&self) -> bool {
79        matches!(self.severity, Severity::Error)
80    }
81}
82
83/// The canonical validation issue codes — one constant per row of the SPEC.md
84/// § Validation table. Call sites reference these instead of bare strings so
85/// the code and the SPEC table can never silently drift.
86pub mod codes {
87    /// path has no `DB.md`; not a db.md store.
88    pub const NOT_A_STORE: &str = "NOT_A_STORE";
89    /// the store's `DB.md` is not `type: db-md`.
90    pub const DB_MD_BAD_TYPE: &str = "DB_MD_BAD_TYPE";
91    /// the store's `DB.md` frontmatter lacks `scope` or `owner`.
92    pub const DB_MD_MISSING_FIELD: &str = "DB_MD_MISSING_FIELD";
93    /// `DB.md` has an `##` section other than the three recognized ones.
94    pub const DB_MD_UNKNOWN_SECTION: &str = "DB_MD_UNKNOWN_SECTION";
95    /// content file has no `type:`.
96    pub const FM_MISSING_TYPE: &str = "FM_MISSING_TYPE";
97    /// frontmatter block isn't valid YAML.
98    pub const FM_MALFORMED_YAML: &str = "FM_MALFORMED_YAML";
99    /// `created` / `updated` / a date field isn't ISO-8601.
100    pub const FM_BAD_TIMESTAMP: &str = "FM_BAD_TIMESTAMP";
101    /// a recognized `type:` sits in a layer other than its canonical one.
102    pub const LAYER_TYPE_MISMATCH: &str = "LAYER_TYPE_MISMATCH";
103    /// content file has no `summary`.
104    pub const SUMMARY_MISSING: &str = "SUMMARY_MISSING";
105    /// `summary` present but empty.
106    pub const SUMMARY_EMPTY: &str = "SUMMARY_EMPTY";
107    /// `summary` contains newlines.
108    pub const SUMMARY_MULTILINE: &str = "SUMMARY_MULTILINE";
109    /// `summary` > 200 chars.
110    pub const SUMMARY_TOO_LONG: &str = "SUMMARY_TOO_LONG";
111    /// wiki-link target isn't a full store-relative path.
112    pub const WIKI_LINK_SHORT_FORM: &str = "WIKI_LINK_SHORT_FORM";
113    /// wiki-link target file doesn't exist.
114    pub const WIKI_LINK_BROKEN: &str = "WIKI_LINK_BROKEN";
115    /// wiki-link target matches multiple files (defensive).
116    pub const WIKI_LINK_AMBIGUOUS: &str = "WIKI_LINK_AMBIGUOUS";
117    /// wiki-link target carries a `.md` extension — drop it.
118    pub const WIKI_LINK_HAS_EXTENSION: &str = "WIKI_LINK_HAS_EXTENSION";
119    /// frontmatter list uses inline `[[[a]], [[b]]]` — use block form.
120    pub const WIKI_LINK_FLOW_FORM_LIST: &str = "WIKI_LINK_FLOW_FORM_LIST";
121    /// two files declare the same explicit `id`.
122    pub const DUP_ID: &str = "DUP_ID";
123    /// two `contact`s share `email`.
124    pub const DUP_CONTACT_EMAIL: &str = "DUP_CONTACT_EMAIL";
125    /// two `company`s share `domain`.
126    pub const DUP_COMPANY_DOMAIN: &str = "DUP_COMPANY_DOMAIN";
127    /// two `expense`s share `(date, amount, vendor)`.
128    pub const DUP_EXPENSE_TUPLE: &str = "DUP_EXPENSE_TUPLE";
129    /// two `invoice`s share `(vendor, date, amount)`.
130    pub const DUP_INVOICE_TUPLE: &str = "DUP_INVOICE_TUPLE";
131    /// two `email`s share `(from, subject, date)` (re-ingest).
132    pub const DUP_EMAIL_REINGEST: &str = "DUP_EMAIL_REINGEST";
133    /// two `meeting`s share `(date, sorted-attendees-set)`.
134    pub const DUP_MEETING_TUPLE: &str = "DUP_MEETING_TUPLE";
135    /// a `DB.md` schema requires a field that's absent.
136    pub const SCHEMA_MISSING_REQUIRED: &str = "SCHEMA_MISSING_REQUIRED";
137    /// a value doesn't match the schema's shape modifier.
138    pub const SCHEMA_SHAPE_MISMATCH: &str = "SCHEMA_SHAPE_MISMATCH";
139    /// a `link to <prefix>/` field has a plain or wrong-prefix value.
140    pub const SCHEMA_LINK_PREFIX_MISMATCH: &str = "SCHEMA_LINK_PREFIX_MISMATCH";
141    /// a value isn't in the schema's `enum`.
142    pub const SCHEMA_ENUM_VIOLATION: &str = "SCHEMA_ENUM_VIOLATION";
143    /// a write was attempted on a `### Frozen pages` path (write-time).
144    pub const POLICY_FROZEN_PAGE: &str = "POLICY_FROZEN_PAGE";
145    /// a file with an `### Ignored types` type exists.
146    pub const POLICY_IGNORED_TYPE_PRESENT: &str = "POLICY_IGNORED_TYPE_PRESENT";
147    /// a `wiki-page` derives from an ignored-type record.
148    pub const POLICY_IGNORED_TYPE_DERIVED: &str = "POLICY_IGNORED_TYPE_DERIVED";
149    /// a `log.md` entry header timestamp is unparseable.
150    pub const LOG_BAD_TIMESTAMP: &str = "LOG_BAD_TIMESTAMP";
151    /// a `log.md` entry kind isn't recognized.
152    pub const LOG_UNKNOWN_KIND: &str = "LOG_UNKNOWN_KIND";
153    /// `log.md` entries aren't in non-decreasing time order (possible rewrite).
154    pub const LOG_OUT_OF_ORDER: &str = "LOG_OUT_OF_ORDER";
155    /// a non-empty canonical folder lacks `index.md`.
156    pub const INDEX_MISSING: &str = "INDEX_MISSING";
157    /// an `index.md` lists a file that no longer exists.
158    pub const INDEX_STALE_ENTRY: &str = "INDEX_STALE_ENTRY";
159    /// a file isn't listed in its folder's `index.md`.
160    pub const INDEX_MISSING_ENTRY: &str = "INDEX_MISSING_ENTRY";
161    /// an `index.md` sits in an empty / non-canonical folder.
162    pub const INDEX_ORPHAN: &str = "INDEX_ORPHAN";
163    /// an index's `scope:` doesn't match its filesystem location.
164    pub const INDEX_WRONG_SCOPE: &str = "INDEX_WRONG_SCOPE";
165    /// an index entry's text doesn't match the target file's `summary`.
166    pub const INDEX_SUMMARY_MISMATCH: &str = "INDEX_SUMMARY_MISMATCH";
167    /// a type-folder's `index.jsonl` twin is missing.
168    pub const INDEX_JSONL_MISSING: &str = "INDEX_JSONL_MISSING";
169    /// a file isn't in the `index.jsonl`, or a jsonl record points at a missing
170    /// file.
171    pub const INDEX_JSONL_DESYNC: &str = "INDEX_JSONL_DESYNC";
172    /// a `index.jsonl` record's fields don't match the file's frontmatter.
173    pub const INDEX_JSONL_STALE: &str = "INDEX_JSONL_STALE";
174    /// `tags` isn't a flat YAML list of short scalar labels.
175    pub const TAGS_MALFORMED: &str = "TAGS_MALFORMED";
176}
177
178/// The SPEC's `summary` length bound (chars). Over it → `SUMMARY_TOO_LONG`.
179const MAX_SUMMARY_LEN: usize = 200;
180
181/// Recognized `log.md` entry kinds (SPEC § `log.md`). Anything else →
182/// `LOG_UNKNOWN_KIND` (warning, not error).
183const RECOGNIZED_LOG_KINDS: &[&str] = &[
184    "ingest",
185    "create",
186    "update",
187    "delete",
188    "rename",
189    "link",
190    "validate",
191    "index-rebuild",
192    "contradiction",
193];
194
195// ─────────────────────────────────────────────────────────────────────────────
196//  Public entrypoints
197// ─────────────────────────────────────────────────────────────────────────────
198
199/// **Loop default.** Validate the working set: content files changed since
200/// `since` (default: the last `validate` entry in `log.md`), plus any file whose
201/// wiki-links target a changed/renamed/removed path. Per-file *checks* only —
202/// never a [`Store::walk`] / [`Store::walk_content_files`]-style parse-the-tree,
203/// and none of the cross-file global passes (entity-dedup, every-index sync,
204/// `log.md` ordering) that `--all` adds.
205///
206/// **Cost.** The changed set is read from `log.md` — O(changed): every
207/// `create`/`update`/`ingest`/`rename`/`delete`/`link` entry newer than the
208/// cutoff names an object. Per-file frontmatter + link-doctrine checks then run
209/// over that set plus its incoming linkers — also O(changed). The one part that
210/// is *not* O(changed) is discovering those incoming linkers: a link to a
211/// changed path can live in the body or a typed frontmatter field of any file,
212/// so it is found by a **single** embedded-ripgrep pass over the store
213/// ([`Store::find_links_to_any`]) for the whole changed set at once — one store
214/// scan, flat in the changed-set size. (It was previously a full store read
215/// *per* changed object — `O(changed × store)`; that is the blow-up this path
216/// no longer pays.) The unavoidable single content scan is the same shape as
217/// free-text `dbmd search`; the sidecar `links` projection can't replace it
218/// because it omits body/typed-field edges.
219pub fn validate_working_set(
220    store: &Store,
221    since: Option<DateTime<FixedOffset>>,
222) -> crate::Result<Vec<Issue>> {
223    if !store_marker_present(store) {
224        return Ok(vec![not_a_store_issue(store)]);
225    }
226
227    let cutoff = match since {
228        Some(ts) => Some(ts),
229        None => last_validate_at(store),
230    };
231
232    // 1. Changed objects, straight from the log (O(changed) — never a walk).
233    let changed = changed_objects_since(store, cutoff);
234
235    // 2. Add every file with an incoming wiki-link to a changed/renamed/removed
236    //    path (the linker may now be stale even though it didn't change). The
237    //    incoming-linker scan is `Store::find_links_to_any` — ONE embedded-ripgrep
238    //    pass over the store for the WHOLE changed set (one `.md` walk, one
239    //    presence-only/early-exit scan per file), not one walk per object. This
240    //    is the fix for the `O(changed × store)` blow-up that calling
241    //    `find_links_to` in a loop produced (a full store read per changed
242    //    object); the cost is now a single store scan regardless of how many
243    //    objects changed. A returned self-link is harmlessly deduped by the set
244    //    (the object is already inserted below).
245    let changed_targets: Vec<PathBuf> = changed.iter().cloned().collect();
246    let mut working: BTreeSet<PathBuf> = changed;
247    for linker in store.find_links_to_any(&changed_targets)? {
248        working.insert(linker);
249    }
250
251    let mut issues = Vec::new();
252    for rel in &working {
253        let abs = store.root.join(rel);
254        // A changed path can be a *deletion* — skip files that no longer exist;
255        // the incoming-linker scan above already flagged links into them.
256        if !abs.is_file() {
257            continue;
258        }
259        // `None` basename index: the working-set pass does not build the
260        // store-wide basename map (that is a `--all`-only structure), so a bare
261        // short-form target is reported as plain `WIKI_LINK_SHORT_FORM` and the
262        // `--all` sweep does the ambiguity upgrade.
263        check_content_file(store, rel, &abs, None, &mut issues);
264    }
265    issues.sort_by(issue_order);
266    Ok(issues)
267}
268
269/// **Full SWEEP (O(store)).** Validate every file, every link, and every index,
270/// adding the cross-file checks that need global state: entity-dedup `DUP_*`,
271/// every-index sync (md + jsonl), and `log.md` ordering. CI / recovery, not the
272/// loop.
273pub fn validate_all(store: &Store) -> crate::Result<Vec<Issue>> {
274    if !store_marker_present(store) {
275        return Ok(vec![not_a_store_issue(store)]);
276    }
277
278    let mut issues = Vec::new();
279
280    // Store-identity file: `DB.md` shape (type / required fields / section
281    // headers). A single root file, checked once in the sweep — not a content
282    // file (it carries no `summary`), so it is not part of `walk_content_files`.
283    check_db_md(store, &mut issues);
284
285    let files = walk_content_files(&store.root);
286
287    // The basename index makes the short-form wiki-link check able to upgrade a
288    // bare-basename target to `WIKI_LINK_AMBIGUOUS` when it matches ≥2 files.
289    // Built once from the already-gathered sweep list (no extra walk); only the
290    // `--all` path has it (the working-set path stays O(changed)).
291    let basenames = build_basename_index(&files);
292
293    // Per-file checks over the whole store.
294    let mut parsed: Vec<(PathBuf, Parsed)> = Vec::new();
295    for rel in &files {
296        let abs = store.root.join(rel);
297        if let Some(p) = check_content_file(store, rel, &abs, Some(&basenames), &mut issues) {
298            parsed.push((rel.clone(), p));
299        }
300    }
301
302    // Cross-file: hard + soft entity-dedup collisions.
303    check_duplicates(&parsed, &mut issues);
304
305    // Cross-file: hierarchical index.md + index.jsonl sync.
306    check_indexes(store, &files, &mut issues);
307
308    // Cross-file: log.md well-formedness + ordering.
309    check_log(store, &mut issues);
310
311    issues.sort_by(issue_order);
312    Ok(issues)
313}
314
315// ─────────────────────────────────────────────────────────────────────────────
316//  Per-file content checks (shared by both scopes)
317// ─────────────────────────────────────────────────────────────────────────────
318
319/// What `validate_all`'s cross-file pass needs from a per-file parse: the
320/// parsed YAML mapping (for dedup keys) and the raw frontmatter text (for
321/// text-based wiki-link extraction). The body and fence-line are consumed
322/// inline during the per-file pass and not carried here.
323struct Parsed {
324    /// The parsed top-level YAML mapping, keyed by string. `None` ⇒ malformed
325    /// YAML (a `FM_MALFORMED_YAML` was already emitted).
326    fm: Option<BTreeMap<String, Value>>,
327    /// The raw frontmatter YAML text (between the fences) — the source for
328    /// text-based wiki-link extraction in dedup.
329    fm_yaml: String,
330}
331
332/// Run every per-file check on one content file, pushing issues. Returns the
333/// parsed file so `validate_all` can reuse it for cross-file checks. Returns
334/// `None` only when the file is unreadable or has no frontmatter block at all
335/// (which for a content file is itself reported).
336fn check_content_file(
337    store: &Store,
338    rel: &Path,
339    abs: &Path,
340    basenames: Option<&BasenameIndex>,
341    issues: &mut Vec<Issue>,
342) -> Option<Parsed> {
343    let text = match std::fs::read_to_string(abs) {
344        Ok(t) => t,
345        Err(_) => return None,
346    };
347
348    let is_content = is_content_file(rel);
349
350    let (fm_yaml, body, fm_end_line) = match split_frontmatter(&text) {
351        Some(split) => split,
352        None => {
353            // No frontmatter at all. For a content file that means there's no
354            // `type:` and no `summary:` — report both the way a parsed-but-empty
355            // file would, so the agent gets the same actionable codes.
356            if is_content {
357                push(
358                    issues,
359                    Severity::Error,
360                    codes::FM_MISSING_TYPE,
361                    rel,
362                    None,
363                    Some("type".into()),
364                    "content file has no frontmatter `type:`".into(),
365                    Some("add a YAML frontmatter block with `type:`".into()),
366                    vec![],
367                );
368                push(
369                    issues,
370                    Severity::Error,
371                    codes::SUMMARY_MISSING,
372                    rel,
373                    None,
374                    Some("summary".into()),
375                    "content file has no `summary`".into(),
376                    Some("run `dbmd fm init`".into()),
377                    vec![],
378                );
379            }
380            return None;
381        }
382    };
383
384    // Parse the YAML block.
385    let fm: Option<BTreeMap<String, Value>> = match serde_yml::from_str::<Value>(&fm_yaml) {
386        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
387        // An empty frontmatter block parses as Null; treat as an empty mapping.
388        Ok(Value::Null) => Some(BTreeMap::new()),
389        Ok(_) => {
390            // A scalar / sequence at the top level isn't a frontmatter mapping.
391            // Anchor to line 1 — the frontmatter block's opening `---`; the whole
392            // block is opaque, so there is no single offending field line.
393            push(
394                issues,
395                Severity::Error,
396                codes::FM_MALFORMED_YAML,
397                rel,
398                Some(1),
399                None,
400                "frontmatter is not a YAML mapping".into(),
401                None,
402                vec![],
403            );
404            None
405        }
406        Err(e) => {
407            // Anchor to line 1 (the opening `---`): an unparseable block has no
408            // single offending field line; the agent re-reads the whole block.
409            push(
410                issues,
411                Severity::Error,
412                codes::FM_MALFORMED_YAML,
413                rel,
414                Some(1),
415                None,
416                format!("frontmatter block isn't valid YAML: {e}"),
417                None,
418                vec![],
419            );
420            None
421        }
422    };
423
424    if let Some(map) = &fm {
425        // The detailed frontmatter checks only run when the YAML parsed.
426        check_frontmatter(store, rel, map, &fm_yaml, basenames, issues, is_content);
427    }
428
429    // Wiki-link doctrine checks run on the body of every content file (and
430    // also on index/log meta files, whose entries are wiki-links too).
431    check_body_wiki_links(store, rel, &body, fm_end_line, basenames, issues);
432
433    Some(Parsed { fm, fm_yaml })
434}
435
436/// All frontmatter-level checks for a content file with valid YAML.
437fn check_frontmatter(
438    store: &Store,
439    rel: &Path,
440    fm: &BTreeMap<String, Value>,
441    fm_yaml: &str,
442    basenames: Option<&BasenameIndex>,
443    issues: &mut Vec<Issue>,
444    is_content: bool,
445) {
446    let type_ = fm.get("type").and_then(scalar_string);
447
448    // ── type ────────────────────────────────────────────────────────────────
449    if is_content && type_.is_none() {
450        push(
451            issues,
452            Severity::Error,
453            codes::FM_MISSING_TYPE,
454            rel,
455            fm_key_line_or_top(fm_yaml, "type"),
456            Some("type".into()),
457            "content file has no `type:`".into(),
458            Some("add a `type:` field (e.g. `type: contact`)".into()),
459            vec![],
460        );
461    }
462
463    // ── layer-appropriate type ────────────────────────────────────────────────
464    // The recognized-type table (SPEC § Recognized types) gives each canonical
465    // content type a home layer. A recognized type sitting in a *different* layer
466    // (a `contact` under `sources/`, an `email` under `wiki/`) is valid-but-
467    // unusual — the folder layout is convention, not enforcement — so it warns,
468    // never blocks. Custom / unrecognized types carry no layer expectation; meta
469    // files (index/log) are not content files and are skipped via `is_content`.
470    if is_content {
471        if let Some(t) = &type_ {
472            if let (Some(expected), Some(actual)) = (canonical_layer_for_type(t), layer_of(rel)) {
473                if expected != actual {
474                    push(
475                        issues,
476                        Severity::Warning,
477                        codes::LAYER_TYPE_MISMATCH,
478                        rel,
479                        fm_key_line(fm_yaml, "type"),
480                        Some("type".into()),
481                        format!(
482                            "type `{t}` belongs in `{expected}/` but this file is under `{actual}/`"
483                        ),
484                        Some(format!(
485                            "move the file under `{expected}/` (its canonical layer), or change its `type:`"
486                        )),
487                        vec![],
488                    );
489                }
490            }
491        }
492    }
493
494    // ── summary (universal on content files) ──────────────────────────────────
495    if is_content {
496        check_summary(rel, fm, fm_yaml, issues);
497    }
498
499    // ── timestamps: created / updated + type-specific date fields ────────────
500    for key in ["created", "updated"] {
501        if let Some(v) = fm.get(key) {
502            if let Some(s) = scalar_string(v) {
503                if !is_iso8601(&s) {
504                    push(
505                        issues,
506                        Severity::Error,
507                        codes::FM_BAD_TIMESTAMP,
508                        rel,
509                        fm_key_line(fm_yaml, key),
510                        Some(key.into()),
511                        format!("`{key}` is not ISO-8601: {s:?}"),
512                        Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
513                        vec![],
514                    );
515                }
516            }
517        }
518    }
519    // Type-specific date fields (the canonical date-shaped fields per type).
520    //
521    // Precedence: when an explicit `DB.md ## Schemas` block declares a date
522    // field with a `date` shape, the schema check OWNS that field — a bad value
523    // is `SCHEMA_SHAPE_MISMATCH` (the more specific rule), not the generic
524    // `FM_BAD_TIMESTAMP`. So `FM_BAD_TIMESTAMP` here covers only the universal
525    // `created`/`updated` (above) and the canonical date fields of types whose
526    // effective schema does NOT shape that field as a date. Skipping them avoids
527    // double-reporting one bad date under two codes.
528    if let Some(t) = &type_ {
529        let schema_date_fields = schema_shaped_date_fields(store, t);
530        for key in canonical_date_fields(t) {
531            if schema_date_fields.contains(*key) {
532                continue; // owned by the schema-shape check
533            }
534            if let Some(v) = fm.get(*key) {
535                if let Some(s) = scalar_string(v) {
536                    if !is_iso8601_date_or_datetime(&s) {
537                        push(
538                            issues,
539                            Severity::Error,
540                            codes::FM_BAD_TIMESTAMP,
541                            rel,
542                            fm_key_line(fm_yaml, key),
543                            Some((*key).into()),
544                            format!("`{key}` is not an ISO-8601 date: {s:?}"),
545                            Some("use an ISO-8601 date, e.g. 2026-05-27".into()),
546                            vec![],
547                        );
548                    }
549                }
550            }
551        }
552    }
553
554    // ── tags shape ────────────────────────────────────────────────────────────
555    if let Some(tags) = fm.get("tags") {
556        if !is_flat_scalar_list(tags) {
557            push(
558                issues,
559                Severity::Warning,
560                codes::TAGS_MALFORMED,
561                rel,
562                fm_key_line(fm_yaml, "tags"),
563                Some("tags".into()),
564                "`tags` must be a flat YAML list of short scalar labels".into(),
565                Some("use block form: one `- <tag>` per line".into()),
566                vec![],
567            );
568        }
569    }
570
571    // ── inline flow-form wiki-link lists in frontmatter ──────────────────────
572    for key in detect_flow_form_link_lists(fm_yaml) {
573        push(
574            issues,
575            Severity::Error,
576            codes::WIKI_LINK_FLOW_FORM_LIST,
577            rel,
578            fm_key_line(fm_yaml, &key),
579            Some(key.clone()),
580            format!("`{key}` uses inline flow form `[[[a]], [[b]]]`"),
581            Some("use YAML block-sequence form: one `- [[...]]` per line".into()),
582            vec![],
583        );
584    }
585
586    // ── frontmatter wiki-link fields: doctrine + integrity ───────────────────
587    // Skip keys that have an explicit `link to` schema spec — those are checked
588    // (with prefix enforcement) in `check_schema`, and double-reporting the same
589    // link via two paths would be noise.
590    let schema_link_keys: BTreeSet<String> =
591        effective_schema(store, type_.as_deref().unwrap_or(""))
592            .map(|s| {
593                s.fields
594                    .iter()
595                    .filter(|f| f.link_prefix.is_some())
596                    .map(|f| f.name.clone())
597                    .collect()
598            })
599            .unwrap_or_default();
600    for (key, link) in frontmatter_link_fields_text(fm_yaml, 2) {
601        if schema_link_keys.contains(&key) {
602            continue;
603        }
604        check_wiki_link(
605            store,
606            rel,
607            &link,
608            Some(link.line),
609            Some(&key),
610            basenames,
611            issues,
612        );
613    }
614
615    // ── policies: ignored types ──────────────────────────────────────────────
616    if let Some(t) = &type_ {
617        if store.config.ignored_types.iter().any(|it| it == t) {
618            push(
619                issues,
620                Severity::Info,
621                codes::POLICY_IGNORED_TYPE_PRESENT,
622                rel,
623                fm_key_line(fm_yaml, "type"),
624                Some("type".into()),
625                format!("file has ignored type `{t}` (per DB.md ## Policies)"),
626                None,
627                // The policy source: `DB.md` declares the ignored type.
628                vec![PathBuf::from("DB.md")],
629            );
630        }
631        // A wiki-page deriving from an ignored-type record → warning. The
632        // decision lives in the shared `derived_from_ignored_type` entry point;
633        // this side only supplies the `derived_from` targets (with their line,
634        // which the issue carries) and renders the finding.
635        for link in frontmatter_links_for_key(fm_yaml, "derived_from", 2) {
636            if let Some(hit) =
637                derived_from_ignored_type(store, t, std::iter::once(link.target.as_str()))
638            {
639                push(
640                    issues,
641                    Severity::Warning,
642                    codes::POLICY_IGNORED_TYPE_DERIVED,
643                    rel,
644                    Some(link.line),
645                    Some("derived_from".into()),
646                    format!(
647                        "wiki-page derives from ignored-type record `{}` (type `{}`)",
648                        hit.target, hit.target_type
649                    ),
650                    None,
651                    // The ignored-type source record, plus `DB.md` (the policy
652                    // source that lists the ignored type).
653                    vec![
654                        PathBuf::from(format!("{}.md", hit.target)),
655                        PathBuf::from("DB.md"),
656                    ],
657                );
658            }
659        }
660    }
661
662    // ── schema enforcement: implicit canonical + explicit DB.md ## Schemas ───
663    if let Some(t) = &type_ {
664        if let Some(schema) = effective_schema(store, t) {
665            check_schema(store, rel, fm, fm_yaml, &schema, issues);
666        }
667    }
668}
669
670/// `summary` rules: required, non-empty, single-line, ≤ 200 chars.
671fn check_summary(rel: &Path, fm: &BTreeMap<String, Value>, fm_yaml: &str, issues: &mut Vec<Issue>) {
672    let line = fm_key_line(fm_yaml, "summary");
673    match fm.get("summary") {
674        None => push(
675            issues,
676            Severity::Error,
677            codes::SUMMARY_MISSING,
678            rel,
679            // A missing `summary` key has no line of its own → anchor to the
680            // frontmatter block top (line 1), the EXPECTED field-absence rule.
681            fm_key_line_or_top(fm_yaml, "summary"),
682            Some("summary".into()),
683            "content file has no `summary`".into(),
684            Some("run `dbmd fm init`".into()),
685            vec![],
686        ),
687        Some(v) => {
688            let s = scalar_string(v).unwrap_or_default();
689            if s.trim().is_empty() {
690                push(
691                    issues,
692                    Severity::Error,
693                    codes::SUMMARY_EMPTY,
694                    rel,
695                    line,
696                    Some("summary".into()),
697                    "`summary` is present but empty".into(),
698                    Some("write a one-line summary, or run `dbmd fm init`".into()),
699                    vec![],
700                );
701            } else if s.contains('\n') {
702                push(
703                    issues,
704                    Severity::Error,
705                    codes::SUMMARY_MULTILINE,
706                    rel,
707                    line,
708                    Some("summary".into()),
709                    "`summary` must be one line (contains a newline)".into(),
710                    Some("collapse the summary to a single line".into()),
711                    vec![],
712                );
713            } else if s.chars().count() > MAX_SUMMARY_LEN {
714                push(
715                    issues,
716                    Severity::Warning,
717                    codes::SUMMARY_TOO_LONG,
718                    rel,
719                    line,
720                    Some("summary".into()),
721                    format!(
722                        "`summary` is {} chars (> {MAX_SUMMARY_LEN})",
723                        s.chars().count()
724                    ),
725                    Some(format!("trim the summary to ≤ {MAX_SUMMARY_LEN} chars")),
726                    vec![],
727                );
728            }
729        }
730    }
731}
732
733/// Wiki-link checks for a body. Per-link doctrine (`WIKI_LINK_*`).
734fn check_body_wiki_links(
735    store: &Store,
736    rel: &Path,
737    body: &str,
738    fm_end_line: u32,
739    basenames: Option<&BasenameIndex>,
740    issues: &mut Vec<Issue>,
741) {
742    for link in extract_wiki_links(body) {
743        // Body lines are offset past the frontmatter block. `link.line` is
744        // 1-based within `body`; the body starts at `fm_end_line + 1`.
745        let abs_line = fm_end_line + link.line;
746        check_wiki_link(store, rel, &link, Some(abs_line), None, basenames, issues);
747    }
748}
749
750/// A store-wide map from a file's bare basename (its stem, no `.md`) to every
751/// store-relative path carrying that basename. Built once per `validate --all`
752/// sweep so the short-form wiki-link check can distinguish a merely short-form
753/// target (`WIKI_LINK_SHORT_FORM`) from one that is *ambiguous* because the bare
754/// basename matches two or more files (`WIKI_LINK_AMBIGUOUS`, the defensive
755/// code). `None` in the working-set path — that loop is O(changed) and never
756/// walks the store, so it reports the plain short-form error without the scan.
757type BasenameIndex = HashMap<String, Vec<PathBuf>>;
758
759/// Build the [`BasenameIndex`] from the swept file list (already gathered by
760/// `validate_all`; no extra walk).
761fn build_basename_index(files: &[PathBuf]) -> BasenameIndex {
762    let mut idx: BasenameIndex = HashMap::new();
763    for rel in files {
764        if let Some(stem) = rel.file_stem().and_then(|s| s.to_str()) {
765            idx.entry(stem.to_string()).or_default().push(rel.clone());
766        }
767    }
768    idx
769}
770
771/// The shared per-wiki-link doctrine + integrity check used by both body links
772/// and frontmatter link-fields. `basenames` is `Some` only in the `--all`
773/// sweep, where a no-slash short-form target is upgraded to `WIKI_LINK_AMBIGUOUS`
774/// when its bare basename matches ≥2 files.
775fn check_wiki_link(
776    store: &Store,
777    rel: &Path,
778    link: &Link,
779    line: Option<u32>,
780    key: Option<&str>,
781    basenames: Option<&BasenameIndex>,
782    issues: &mut Vec<Issue>,
783) {
784    let bare = link.target.trim_end_matches(".md");
785
786    // Short-form: not a full store-relative path (no `/`, or first segment isn't
787    // a known layer).
788    if !is_full_store_path(bare) {
789        // Ambiguous (defensive) takes precedence over plain short-form when the
790        // target is a bare basename (no `/`) that matches ≥2 files in the store.
791        // Only computable in the sweep (where `basenames` is populated); the
792        // working-set path falls through to the plain short-form error.
793        if !bare.contains('/') {
794            if let Some(idx) = basenames {
795                if let Some(matches) = idx.get(bare) {
796                    if matches.len() >= 2 {
797                        let mut related = matches.clone();
798                        related.sort();
799                        push(
800                            issues,
801                            Severity::Error,
802                            codes::WIKI_LINK_AMBIGUOUS,
803                            rel,
804                            line,
805                            key.map(str::to_string),
806                            format!(
807                                "short-form wiki-link `[[{}]]` matches multiple files",
808                                link.target
809                            ),
810                            Some("use the full store-relative path to disambiguate".into()),
811                            related,
812                        );
813                        return;
814                    }
815                }
816            }
817        }
818        push(
819            issues,
820            Severity::Error,
821            codes::WIKI_LINK_SHORT_FORM,
822            rel,
823            line,
824            key.map(str::to_string),
825            format!(
826                "wiki-link `[[{}]]` is not a full store-relative path",
827                link.target
828            ),
829            short_form_suggestion(bare),
830            vec![],
831        );
832        // Don't also report broken; the agent must fix the form first.
833        return;
834    }
835
836    // `.md` extension → warning, then still check existence.
837    if link.target.ends_with(".md") {
838        push(
839            issues,
840            Severity::Warning,
841            codes::WIKI_LINK_HAS_EXTENSION,
842            rel,
843            line,
844            key.map(str::to_string),
845            format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
846            Some(format!("drop the extension: [[{bare}]]")),
847            vec![],
848        );
849    }
850
851    // Broken: target file doesn't exist (O(1) stat).
852    let target_abs = store.root.join(format!("{bare}.md"));
853    if !target_abs.is_file() {
854        push(
855            issues,
856            Severity::Error,
857            codes::WIKI_LINK_BROKEN,
858            rel,
859            line,
860            key.map(str::to_string),
861            format!("wiki-link target `{bare}` doesn't exist"),
862            None,
863            vec![],
864        );
865    }
866}
867
868// ─────────────────────────────────────────────────────────────────────────────
869//  Schema enforcement (implicit canonical + explicit DB.md ## Schemas)
870// ─────────────────────────────────────────────────────────────────────────────
871
872/// The effective schema for a type: an explicit `DB.md ## Schemas` block wins;
873/// otherwise the implicit canonical schema (the `(link)` etc. annotations from
874/// SPEC's recognized-types table). `None` for unknown types with no schema.
875fn effective_schema(store: &Store, type_: &str) -> Option<Schema> {
876    if let Some(s) = store.config.schemas.get(type_) {
877        return Some(s.clone());
878    }
879    implicit_canonical_schema(type_)
880}
881
882/// The set of field names the type's effective schema declares with a `date`
883/// shape. These are owned by the schema-shape check (`SCHEMA_SHAPE_MISMATCH`),
884/// so the generic `FM_BAD_TIMESTAMP` date-field pass skips them — see precedence
885/// rule #2 in `corpus-b-edges/EXPECTED/README.md`. Empty when the type has no
886/// schema or no date-shaped field.
887fn schema_shaped_date_fields(store: &Store, type_: &str) -> BTreeSet<String> {
888    effective_schema(store, type_)
889        .map(|s| {
890            s.fields
891                .iter()
892                .filter(|f| matches!(f.shape, Some(Shape::Date)))
893                .map(|f| f.name.clone())
894                .collect()
895        })
896        .unwrap_or_default()
897}
898
899/// The implicit canonical schema for a recognized type — exactly the fields the
900/// SPEC's recognized-types table marks `(link → <prefix>/)`, and no others.
901/// These are validated exactly like explicit `link to` fields. Returns `None`
902/// for types with no canonical link-shaped field.
903///
904/// The marked set (SPEC § Recognized types table): `contact.company`,
905/// `expense.vendor`, `expense.contact`, `meeting.expense`, `invoice.vendor`.
906/// This match arm and that table must stay in lockstep — if you add or remove a
907/// field here, change the table's `(link)` annotations too.
908///
909/// `wiki-page.derived_from` is intentionally absent: its links may target either
910/// `records/` or `sources/`, so it has no single canonical prefix to enforce
911/// (see SPEC § Reading rules). An operator who wants a prefix on it declares an
912/// explicit `### wiki-page` schema in `DB.md ## Schemas`.
913fn implicit_canonical_schema(type_: &str) -> Option<Schema> {
914    // We model each marked field as a `link to <prefix>/` FieldSpec so it hits
915    // the same code path as explicit schemas (SCHEMA_LINK_PREFIX_MISMATCH).
916    let link_field = |name: &str, prefix: &str| FieldSpec {
917        name: name.to_string(),
918        required: false,
919        shape: None,
920        link_prefix: Some(PathBuf::from(prefix)),
921        default: None,
922        enum_values: None,
923        unknown_modifiers: vec![],
924    };
925    let fields: Vec<FieldSpec> = match type_ {
926        "contact" => vec![link_field("company", "records/companies/")],
927        "expense" => vec![
928            link_field("vendor", "records/companies/"),
929            link_field("contact", "records/contacts/"),
930        ],
931        "meeting" => vec![link_field("expense", "records/expenses/")],
932        "invoice" => vec![link_field("vendor", "records/companies/")],
933        _ => return None,
934    };
935    Some(Schema { fields })
936}
937
938/// Validate a file's frontmatter against a schema's [`FieldSpec`]s.
939fn check_schema(
940    store: &Store,
941    rel: &Path,
942    fm: &BTreeMap<String, Value>,
943    fm_yaml: &str,
944    schema: &Schema,
945    issues: &mut Vec<Issue>,
946) {
947    for spec in &schema.fields {
948        let present = fm.get(&spec.name);
949        let line = fm_key_line(fm_yaml, &spec.name);
950
951        // Required.
952        let is_empty = match present {
953            None => true,
954            Some(v) => scalar_string(v)
955                .map(|s| s.trim().is_empty())
956                .unwrap_or(false),
957        };
958        if spec.required && is_empty {
959            push(
960                issues,
961                Severity::Error,
962                codes::SCHEMA_MISSING_REQUIRED,
963                rel,
964                // Absent key → anchor to the frontmatter top (line 1); a
965                // present-but-empty value keeps its own line.
966                fm_key_line_or_top(fm_yaml, &spec.name),
967                Some(spec.name.clone()),
968                format!("required field `{}` is absent or empty", spec.name),
969                Some(format!("set `{}` to a non-empty value", spec.name)),
970                vec![],
971            );
972            continue;
973        }
974        let Some(value) = present else { continue };
975
976        // An OPTIONAL field that is `null` or empty is simply unset — there is
977        // no value to shape/enum/link-check. (The required+empty case already
978        // returned above as `SCHEMA_MISSING_REQUIRED`.) Without this, an
979        // `paid_at: null` on an `invoice` whose schema marks `paid_at (date)`
980        // would wrongly fire `SCHEMA_SHAPE_MISMATCH` against the empty string.
981        let value_empty = value.is_null()
982            || scalar_string(value)
983                .map(|s| s.trim().is_empty())
984                .unwrap_or(false);
985        if !spec.required && value_empty {
986            continue;
987        }
988
989        // link to <prefix>/ — extract the link target(s) from the raw frontmatter
990        // text (unquoted `[[...]]` is a YAML nested-sequence, not a string).
991        if let Some(prefix) = &spec.link_prefix {
992            check_schema_link(store, rel, &spec.name, fm_yaml, prefix, line, issues);
993            continue; // a link field is never also shape/enum-checked
994        }
995
996        // enum
997        if let Some(allowed) = &spec.enum_values {
998            if let Some(s) = scalar_string(value) {
999                if !allowed.iter().any(|a| a == &s) {
1000                    push(
1001                        issues,
1002                        Severity::Error,
1003                        codes::SCHEMA_ENUM_VIOLATION,
1004                        rel,
1005                        line,
1006                        Some(spec.name.clone()),
1007                        format!("`{}` value {s:?} not in enum {allowed:?}", spec.name),
1008                        Some(format!("use one of: {}", allowed.join(", "))),
1009                        vec![],
1010                    );
1011                }
1012            }
1013            continue;
1014        }
1015
1016        // shape
1017        if let Some(shape) = spec.shape {
1018            check_schema_shape(rel, &spec.name, value, shape, line, issues);
1019        }
1020    }
1021}
1022
1023/// `link to <prefix>/` enforcement: the value must be a wiki-link whose target
1024/// starts with `<prefix>`. Reads the link target(s) from the raw frontmatter
1025/// text so unquoted `field: [[...]]` (a YAML nested-sequence, not a string) is
1026/// recognized exactly like the quoted form.
1027fn check_schema_link(
1028    store: &Store,
1029    rel: &Path,
1030    field: &str,
1031    fm_yaml: &str,
1032    prefix: &Path,
1033    line: Option<u32>,
1034    issues: &mut Vec<Issue>,
1035) {
1036    let prefix_str = prefix.to_string_lossy();
1037    let prefix_str = prefix_str.trim_end_matches('/');
1038    let suggestion = |target_leaf: &str| {
1039        Some(format!(
1040            "expected `link to {prefix_str}/`; replace with [[{prefix_str}/{target_leaf}]]"
1041        ))
1042    };
1043
1044    let links = frontmatter_links_for_key(fm_yaml, field, 2);
1045    if links.is_empty() {
1046        // No wiki-link in the field's value → it's a plain string.
1047        let raw = frontmatter_raw_value_for_key(fm_yaml, field, 2).unwrap_or_default();
1048        let raw = raw.trim().trim_matches('"').trim_matches('\'').trim();
1049        let leaf = slugish(raw);
1050        push(
1051            issues,
1052            Severity::Error,
1053            codes::SCHEMA_LINK_PREFIX_MISMATCH,
1054            rel,
1055            line,
1056            Some(field.to_string()),
1057            format!(
1058                "`{field}` is a plain string {raw:?}, expected a wiki-link under `{prefix_str}/`"
1059            ),
1060            suggestion(&leaf),
1061            vec![],
1062        );
1063        return;
1064    }
1065
1066    for link in links {
1067        let bare = link.target.trim_end_matches(".md");
1068        if !path_under_prefix(bare, prefix_str) {
1069            let leaf = bare.rsplit('/').next().unwrap_or(bare);
1070            push(
1071                issues,
1072                Severity::Error,
1073                codes::SCHEMA_LINK_PREFIX_MISMATCH,
1074                rel,
1075                line,
1076                Some(field.to_string()),
1077                format!("`{field}` target `{bare}` is not under `{prefix_str}/`"),
1078                suggestion(leaf),
1079                vec![],
1080            );
1081        } else {
1082            // Correct prefix — still surface a broken target so the agent sees
1083            // one consistent vocabulary.
1084            let target_abs = store.root.join(format!("{bare}.md"));
1085            if !target_abs.is_file() {
1086                push(
1087                    issues,
1088                    Severity::Error,
1089                    codes::WIKI_LINK_BROKEN,
1090                    rel,
1091                    line,
1092                    Some(field.to_string()),
1093                    format!("wiki-link target `{bare}` doesn't exist"),
1094                    None,
1095                    vec![],
1096                );
1097            }
1098        }
1099    }
1100}
1101
1102/// Shape enforcement for a non-link, non-enum schema field.
1103fn check_schema_shape(
1104    rel: &Path,
1105    field: &str,
1106    value: &Value,
1107    shape: Shape,
1108    line: Option<u32>,
1109    issues: &mut Vec<Issue>,
1110) {
1111    let s = scalar_string(value).unwrap_or_default();
1112    let ok = match shape {
1113        Shape::String => true, // any scalar string
1114        Shape::Int => value.is_i64() || value.is_u64() || s.trim().parse::<i64>().is_ok(),
1115        Shape::Bool => value.is_bool() || matches!(s.trim(), "true" | "false"),
1116        Shape::Date => is_iso8601_date_or_datetime(&s),
1117        Shape::Email => is_email(&s),
1118        Shape::Currency => is_currency(&s),
1119        Shape::Url => is_url(&s),
1120    };
1121    if !ok {
1122        push(
1123            issues,
1124            Severity::Error,
1125            codes::SCHEMA_SHAPE_MISMATCH,
1126            rel,
1127            line,
1128            Some(field.to_string()),
1129            format!("`{field}` value {s:?} doesn't match shape {shape:?}"),
1130            Some(shape_suggestion(shape)),
1131            vec![],
1132        );
1133    }
1134}
1135
1136// ─────────────────────────────────────────────────────────────────────────────
1137//  Cross-file: entity-dedup collisions (validate_all only)
1138// ─────────────────────────────────────────────────────────────────────────────
1139
1140/// Hard `DUP_ID` + the six soft `DUP_*` entity-dedup collisions.
1141///
1142/// **Reporting precedence (rule #1 in `corpus-b-edges/EXPECTED/README.md`):** a
1143/// collision group of N files yields exactly ONE issue, not N. Its `file` is the
1144/// lexicographically smallest store-relative path in the group (a total order →
1145/// deterministic); `related` is the rest, sorted. A single-field collision
1146/// (`id`/`email`/`domain`) anchors to that field's line on the reported file and
1147/// carries it as `key`; a multi-field tuple collision anchors to line 1 with a
1148/// null key.
1149fn check_duplicates(parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
1150    // Path → frontmatter YAML, for resolving the anchor field's line on the
1151    // reported (smallest-path) member.
1152    let fm_yaml_of: HashMap<&PathBuf, &str> = parsed
1153        .iter()
1154        .map(|(rel, p)| (rel, p.fm_yaml.as_str()))
1155        .collect();
1156
1157    // ── DUP_ID (hard error): two files with the same explicit `id`. ──────────
1158    let mut by_id: HashMap<String, Vec<PathBuf>> = HashMap::new();
1159    for (rel, p) in parsed {
1160        if let Some(map) = &p.fm {
1161            if let Some(id) = map.get("id").and_then(scalar_string) {
1162                if !id.trim().is_empty() {
1163                    by_id.entry(id).or_default().push(rel.clone());
1164                }
1165            }
1166        }
1167    }
1168    for (id, files) in &by_id {
1169        if files.len() > 1 {
1170            let (reported, related) = canonical_and_related(files);
1171            let line = fm_yaml_of.get(&reported).and_then(|y| fm_key_line(y, "id"));
1172            push(
1173                issues,
1174                Severity::Error,
1175                codes::DUP_ID,
1176                &reported,
1177                line,
1178                Some("id".into()),
1179                format!("id {id:?} is declared by more than one file"),
1180                Some("give each file a unique `id` (or drop it to derive from the path)".into()),
1181                related,
1182            );
1183        }
1184    }
1185
1186    // ── Soft, type-aware tuple dedup (all → warning). ────────────────────────
1187    // Build (type → field-tuple → files) maps.
1188    let field = |p: &Parsed, k: &str| -> Option<String> {
1189        p.fm.as_ref()
1190            .and_then(|m| m.get(k))
1191            .and_then(scalar_string)
1192            .map(|s| s.trim().to_lowercase())
1193    };
1194    // A field that may be a wiki-link (e.g. `vendor`): prefer its bare link
1195    // target (handles the unquoted YAML-sequence form), else a scalar string.
1196    let link_or_scalar = |p: &Parsed, k: &str| -> Option<String> {
1197        if let Some(link) = frontmatter_links_for_key(&p.fm_yaml, k, 2)
1198            .into_iter()
1199            .next()
1200        {
1201            return Some(link.target.trim_end_matches(".md").to_lowercase());
1202        }
1203        field(p, k)
1204    };
1205
1206    // contact.email — single-field collision, anchors to the `email` line.
1207    soft_dup(
1208        parsed,
1209        issues,
1210        "contact",
1211        codes::DUP_CONTACT_EMAIL,
1212        Some("email"),
1213        &fm_yaml_of,
1214        |p| field(p, "email").map(|e| vec![e]),
1215    );
1216    // company.domain — single-field collision, anchors to the `domain` line.
1217    soft_dup(
1218        parsed,
1219        issues,
1220        "company",
1221        codes::DUP_COMPANY_DOMAIN,
1222        Some("domain"),
1223        &fm_yaml_of,
1224        |p| field(p, "domain").map(|d| vec![d]),
1225    );
1226    // expense (date, amount, vendor) — tuple, anchors to line 1.
1227    soft_dup(
1228        parsed,
1229        issues,
1230        "expense",
1231        codes::DUP_EXPENSE_TUPLE,
1232        None,
1233        &fm_yaml_of,
1234        |p| {
1235            Some(vec![
1236                field(p, "date")?,
1237                field(p, "amount")?,
1238                link_or_scalar(p, "vendor")?,
1239            ])
1240        },
1241    );
1242    // invoice (vendor, date, amount) — tuple, anchors to line 1.
1243    soft_dup(
1244        parsed,
1245        issues,
1246        "invoice",
1247        codes::DUP_INVOICE_TUPLE,
1248        None,
1249        &fm_yaml_of,
1250        |p| {
1251            Some(vec![
1252                link_or_scalar(p, "vendor")?,
1253                field(p, "date")?,
1254                field(p, "amount")?,
1255            ])
1256        },
1257    );
1258    // email (from, subject, date) — tuple, anchors to line 1.
1259    soft_dup(
1260        parsed,
1261        issues,
1262        "email",
1263        codes::DUP_EMAIL_REINGEST,
1264        None,
1265        &fm_yaml_of,
1266        |p| {
1267            Some(vec![
1268                field(p, "from")?,
1269                field(p, "subject")?,
1270                field(p, "date")?,
1271            ])
1272        },
1273    );
1274    // meeting (date, sorted-attendees-set) — tuple, anchors to line 1.
1275    soft_dup(
1276        parsed,
1277        issues,
1278        "meeting",
1279        codes::DUP_MEETING_TUPLE,
1280        None,
1281        &fm_yaml_of,
1282        |p| {
1283            let date = field(p, "date")?;
1284            let attendees = meeting_attendees_key(p)?;
1285            Some(vec![date, attendees])
1286        },
1287    );
1288}
1289
1290/// Emit ONE soft-dedup warning per group of ≥2 files of `type_` that share the
1291/// tuple `key_of` returns. Files for which `key_of` is `None` (missing a field)
1292/// are skipped — an incomplete tuple is never a collision.
1293///
1294/// Per reporting rule #1 the issue is keyed on the lexicographically smallest
1295/// store-relative path; `related` is the rest. `anchor_field` is `Some(name)`
1296/// for a single-field collision (`email`/`domain`) — the issue then anchors to
1297/// that field's line on the reported file and carries it as `key`; `None` for a
1298/// multi-field tuple, which anchors to line 1 with a null key. `fm_yaml_of`
1299/// resolves the field line on the reported member.
1300#[allow(clippy::too_many_arguments)]
1301fn soft_dup(
1302    parsed: &[(PathBuf, Parsed)],
1303    issues: &mut Vec<Issue>,
1304    type_: &str,
1305    code: &'static str,
1306    anchor_field: Option<&str>,
1307    fm_yaml_of: &HashMap<&PathBuf, &str>,
1308    key_of: impl Fn(&Parsed) -> Option<Vec<String>>,
1309) {
1310    let mut groups: HashMap<Vec<String>, Vec<PathBuf>> = HashMap::new();
1311    for (rel, p) in parsed {
1312        let is_type =
1313            p.fm.as_ref()
1314                .and_then(|m| m.get("type"))
1315                .and_then(scalar_string)
1316                .map(|t| t == type_)
1317                .unwrap_or(false);
1318        if !is_type {
1319            continue;
1320        }
1321        if let Some(key) = key_of(p) {
1322            groups.entry(key).or_default().push(rel.clone());
1323        }
1324    }
1325    for files in groups.values() {
1326        if files.len() > 1 {
1327            let (reported, related) = canonical_and_related(files);
1328            // Single-field collisions anchor to the field's line + carry the key;
1329            // tuple collisions anchor to line 1 with a null key.
1330            let (line, key) = match anchor_field {
1331                Some(f) => (
1332                    fm_yaml_of.get(&reported).and_then(|y| fm_key_line(y, f)),
1333                    Some(f.to_string()),
1334                ),
1335                None => (Some(1), None),
1336            };
1337            push(
1338                issues,
1339                Severity::Warning,
1340                code,
1341                &reported,
1342                line,
1343                key,
1344                format!(
1345                    "{type_} record shares its dedup key with {} other record(s)",
1346                    related.len()
1347                ),
1348                Some("merge with `dbmd rename`, or cross-link with `dbmd link`".into()),
1349                related,
1350            );
1351        }
1352    }
1353}
1354
1355/// Split a non-empty collision group into `(reported, related)`: the
1356/// lexicographically smallest store-relative path is the reported member; the
1357/// rest, sorted ascending, are `related`. Deterministic because store-relative
1358/// path is a total order — the property reporting rule #1 relies on.
1359fn canonical_and_related(files: &[PathBuf]) -> (PathBuf, Vec<PathBuf>) {
1360    let mut sorted = files.to_vec();
1361    sorted.sort();
1362    let reported = sorted[0].clone();
1363    let related = sorted[1..].to_vec();
1364    (reported, related)
1365}
1366
1367// ─────────────────────────────────────────────────────────────────────────────
1368//  Cross-file: hierarchical index.md + index.jsonl sync (validate_all only)
1369// ─────────────────────────────────────────────────────────────────────────────
1370
1371/// All `INDEX_*` and `INDEX_JSONL_*` checks across the three canonical levels.
1372fn check_indexes(store: &Store, files: &[PathBuf], issues: &mut Vec<Issue>) {
1373    // Group content files by their immediate parent folder (the type-folder,
1374    // *across date shards* — a sharded file's "type folder" is the folder right
1375    // under the layer). We key on the type-folder so shards roll up correctly.
1376    let mut type_folders: BTreeMap<PathBuf, Vec<PathBuf>> = BTreeMap::new();
1377    let mut layers_present: BTreeSet<&'static str> = BTreeSet::new();
1378    for rel in files {
1379        // The layer is the first path component — recorded independently of the
1380        // type-folder so a layer containing only loose files still requires an
1381        // `index.md`.
1382        if let Some(layer) = rel.iter().next().and_then(|s| s.to_str()) {
1383            match layer {
1384                "sources" => layers_present.insert("sources"),
1385                "records" => layers_present.insert("records"),
1386                "wiki" => layers_present.insert("wiki"),
1387                _ => false,
1388            };
1389        }
1390        if let Some(tf) = type_folder_of(rel) {
1391            type_folders.entry(tf).or_default().push(rel.clone());
1392        }
1393    }
1394
1395    // ── Root index.md ─────────────────────────────────────────────────────────
1396    if !files.is_empty() {
1397        let root_index = store.root.join("index.md");
1398        if !root_index.is_file() {
1399            push(
1400                issues,
1401                Severity::Error,
1402                codes::INDEX_MISSING,
1403                Path::new("index.md"),
1404                None,
1405                None,
1406                "store has files but no root `index.md`".into(),
1407                Some("run `dbmd index rebuild`".into()),
1408                vec![],
1409            );
1410        } else {
1411            check_index_scope(store, Path::new("index.md"), "root", None, issues);
1412        }
1413    }
1414
1415    // ── Layer index.md ────────────────────────────────────────────────────────
1416    for layer in &layers_present {
1417        let layer_index_rel = PathBuf::from(layer).join("index.md");
1418        let abs = store.root.join(&layer_index_rel);
1419        if !abs.is_file() {
1420            push(
1421                issues,
1422                Severity::Error,
1423                codes::INDEX_MISSING,
1424                &layer_index_rel,
1425                None,
1426                None,
1427                format!("layer `{layer}/` has files but no `index.md`"),
1428                Some("run `dbmd index rebuild`".into()),
1429                vec![],
1430            );
1431        } else {
1432            check_index_scope(store, &layer_index_rel, "layer", Some(layer), issues);
1433        }
1434    }
1435
1436    // ── Type-folder index.md + index.jsonl ───────────────────────────────────
1437    for (tf, members) in &type_folders {
1438        let index_md_rel = tf.join("index.md");
1439        let index_md_abs = store.root.join(&index_md_rel);
1440        let index_md_present = index_md_abs.is_file();
1441        if !index_md_present {
1442            // The whole folder index is absent → a single `INDEX_MISSING` keyed
1443            // on the FOLDER (not the would-be `index.md` path). When the index is
1444            // entirely missing we do NOT additionally evaluate per-entry
1445            // completeness or the `index.jsonl` twin: one `INDEX_MISSING` covers
1446            // the folder (precedence rule #4 in `corpus-b-edges/EXPECTED`).
1447            push(
1448                issues,
1449                Severity::Error,
1450                codes::INDEX_MISSING,
1451                tf,
1452                None,
1453                None,
1454                format!("non-empty folder `{}` has no index.md", tf.display()),
1455                Some(format!(
1456                    "run `dbmd index rebuild --folder {}`",
1457                    tf.display()
1458                )),
1459                vec![],
1460            );
1461            continue;
1462        }
1463
1464        check_index_scope(store, &index_md_rel, "type-folder", tf.to_str(), issues);
1465        check_type_folder_index_md(store, tf, &index_md_rel, members, issues);
1466
1467        // index.jsonl twin — must exist and be complete (uncapped). Only checked
1468        // when the `index.md` is present (above): a folder whose entire index is
1469        // missing is one `INDEX_MISSING`, not also an `INDEX_JSONL_MISSING`.
1470        let jsonl_rel = tf.join("index.jsonl");
1471        let jsonl_abs = store.root.join(&jsonl_rel);
1472        if !jsonl_abs.is_file() {
1473            push(
1474                issues,
1475                Severity::Error,
1476                codes::INDEX_JSONL_MISSING,
1477                &jsonl_rel,
1478                None,
1479                None,
1480                format!("type-folder `{}/` has no `index.jsonl` twin", tf.display()),
1481                Some("run `dbmd index rebuild`".into()),
1482                vec![],
1483            );
1484        } else {
1485            check_type_folder_index_jsonl(store, tf, &jsonl_rel, members, issues);
1486        }
1487    }
1488
1489    // ── Orphan index.md: an index file in a folder with no content. ──────────
1490    for rel in walk_index_files(&store.root) {
1491        let parent = rel.parent().unwrap_or(Path::new("")).to_path_buf();
1492        let parent_str = parent.to_string_lossy().to_string();
1493        let is_canonical = parent_str.is_empty() // root
1494            || matches!(parent_str.as_str(), "sources" | "records" | "wiki")
1495            || type_folders.contains_key(&parent);
1496        if !is_canonical {
1497            push(
1498                issues,
1499                Severity::Warning,
1500                codes::INDEX_ORPHAN,
1501                &rel,
1502                None,
1503                None,
1504                format!(
1505                    "`{}` sits in an empty or non-canonical folder",
1506                    rel.display()
1507                ),
1508                Some("remove it, or run `dbmd index rebuild`".into()),
1509                vec![],
1510            );
1511        }
1512    }
1513}
1514
1515/// Check a type-folder `index.md`'s entries against the folder's actual files:
1516/// stale entries (target gone), missing entries (file not listed), and
1517/// summary mismatches.
1518fn check_type_folder_index_md(
1519    store: &Store,
1520    tf: &Path,
1521    index_rel: &Path,
1522    members: &[PathBuf],
1523    issues: &mut Vec<Issue>,
1524) {
1525    let abs = store.root.join(index_rel);
1526    let Ok(text) = std::fs::read_to_string(&abs) else {
1527        return;
1528    };
1529    let entries = parse_index_entries(&text);
1530
1531    let listed: BTreeSet<PathBuf> = entries
1532        .iter()
1533        .map(|e| PathBuf::from(e.target.trim_end_matches(".md")))
1534        .collect();
1535
1536    // Stale entries + summary mismatch.
1537    for entry in &entries {
1538        let bare = entry.target.trim_end_matches(".md");
1539        let target_abs = store.root.join(format!("{bare}.md"));
1540        if !target_abs.is_file() {
1541            push(
1542                issues,
1543                Severity::Error,
1544                codes::INDEX_STALE_ENTRY,
1545                index_rel,
1546                Some(entry.line),
1547                None,
1548                format!("index entry `[[{bare}]]` points at a missing file"),
1549                Some("run `dbmd index rebuild`".into()),
1550                // The stale target the entry names (the file that no longer
1551                // exists) — so the agent can locate the dangling reference.
1552                vec![PathBuf::from(format!("{bare}.md"))],
1553            );
1554            continue;
1555        }
1556        // Summary mismatch: the entry text must equal the file's `summary`.
1557        if let Some(expected) = read_summary(&target_abs) {
1558            if let Some(text_part) = &entry.summary_text {
1559                if text_part.trim() != expected.trim() {
1560                    push(
1561                        issues,
1562                        Severity::Error,
1563                        codes::INDEX_SUMMARY_MISMATCH,
1564                        index_rel,
1565                        Some(entry.line),
1566                        None,
1567                        format!("index entry for `{bare}` text doesn't match the file's `summary`"),
1568                        Some("run `dbmd index rebuild`".into()),
1569                        vec![PathBuf::from(format!("{bare}.md"))],
1570                    );
1571                }
1572            }
1573        }
1574    }
1575
1576    // Missing entries: a member file not listed. Skip the index/log meta files.
1577    // The browse view caps at 500; only flag a missing entry when the folder is
1578    // under the cap (a capped folder legitimately omits older files).
1579    let content_members: Vec<&PathBuf> = members.iter().filter(|m| is_content_file(m)).collect();
1580    if content_members.len() <= 500 {
1581        for m in content_members {
1582            let bare = PathBuf::from(m.to_string_lossy().trim_end_matches(".md").to_string());
1583            if !listed.contains(&bare) {
1584                push(
1585                    issues,
1586                    Severity::Error,
1587                    codes::INDEX_MISSING_ENTRY,
1588                    index_rel,
1589                    None,
1590                    None,
1591                    format!(
1592                        "file `{}` is not listed in its folder's `index.md`",
1593                        m.display()
1594                    ),
1595                    Some("run `dbmd index rebuild`".into()),
1596                    vec![(*m).clone()],
1597                );
1598            }
1599        }
1600    }
1601    let _ = tf;
1602}
1603
1604/// Check a type-folder `index.jsonl` twin: it must list **every** file in the
1605/// folder (uncapped), every record must point at a real file, and each record's
1606/// fields must match the file's frontmatter.
1607fn check_type_folder_index_jsonl(
1608    store: &Store,
1609    tf: &Path,
1610    jsonl_rel: &Path,
1611    members: &[PathBuf],
1612    issues: &mut Vec<Issue>,
1613) {
1614    let abs = store.root.join(jsonl_rel);
1615    let Ok(text) = std::fs::read_to_string(&abs) else {
1616        return;
1617    };
1618
1619    // Parse records (last-write-wins by path), tolerating tombstones/blank lines.
1620    let mut records: BTreeMap<PathBuf, serde_json::Value> = BTreeMap::new();
1621    for (i, line) in text.lines().enumerate() {
1622        let line = line.trim();
1623        if line.is_empty() {
1624            continue;
1625        }
1626        let rec: serde_json::Value = match serde_json::from_str(line) {
1627            Ok(v) => v,
1628            Err(e) => {
1629                push(
1630                    issues,
1631                    Severity::Error,
1632                    codes::INDEX_JSONL_DESYNC,
1633                    jsonl_rel,
1634                    Some((i + 1) as u32),
1635                    None,
1636                    format!("`index.jsonl` line {} is not valid JSON: {e}", i + 1),
1637                    Some("run `dbmd index rebuild`".into()),
1638                    vec![],
1639                );
1640                continue;
1641            }
1642        };
1643        if let Some(path) = rec.get("path").and_then(|v| v.as_str()) {
1644            records.insert(PathBuf::from(path), rec);
1645        }
1646    }
1647
1648    let member_set: BTreeSet<PathBuf> = members
1649        .iter()
1650        .filter(|m| is_content_file(m))
1651        .cloned()
1652        .collect();
1653
1654    // jsonl record → missing file = desync.
1655    for path in records.keys() {
1656        let target_abs = store.root.join(path);
1657        if !target_abs.is_file() {
1658            push(
1659                issues,
1660                Severity::Error,
1661                codes::INDEX_JSONL_DESYNC,
1662                jsonl_rel,
1663                None,
1664                None,
1665                format!(
1666                    "`index.jsonl` record points at missing file `{}`",
1667                    path.display()
1668                ),
1669                Some("run `dbmd index rebuild`".into()),
1670                vec![],
1671            );
1672        }
1673    }
1674
1675    // file not in jsonl = desync (the jsonl is the complete twin — no cap).
1676    for m in &member_set {
1677        if !records.contains_key(m) {
1678            push(
1679                issues,
1680                Severity::Error,
1681                codes::INDEX_JSONL_DESYNC,
1682                jsonl_rel,
1683                None,
1684                None,
1685                format!(
1686                    "file `{}` is missing from the complete `index.jsonl`",
1687                    m.display()
1688                ),
1689                Some("run `dbmd index rebuild`".into()),
1690                vec![m.clone()],
1691            );
1692        }
1693    }
1694
1695    // Record fields stale vs. frontmatter. SPEC § Validation defines
1696    // `INDEX_JSONL_STALE` as "an `index.jsonl` record's fields don't match the
1697    // file's frontmatter" — ANY field, not just `summary`/`type`. The query and
1698    // search paths read every field straight from these sidecars (`tags`,
1699    // `links`, `created`, `updated`, plus type-specific `email` / `domain` /
1700    // `company` / `amount` / `vendor` …), so a single field left unchecked lets
1701    // a stale value answer queries with data that exists in no `.md` file.
1702    //
1703    // Rather than re-list (and drift from) every projected key, rebuild the
1704    // record the canonical projection would write for this file
1705    // ([`IndexRecord::expected_from_file`], the same path `index rebuild` uses)
1706    // and diff the two as flat JSON maps. Every key the projection emits is
1707    // covered automatically; `path` is the join key and is skipped.
1708    for (path, rec) in &records {
1709        let target_abs = store.root.join(path);
1710        if !target_abs.is_file() {
1711            continue;
1712        }
1713        let Ok(expected) = crate::index::IndexRecord::expected_from_file(&target_abs, path.clone())
1714        else {
1715            continue; // unreadable / unparseable frontmatter is reported elsewhere
1716        };
1717        let Ok(expected_json) = serde_json::to_value(&expected) else {
1718            continue;
1719        };
1720        let (Some(have), Some(want)) = (rec.as_object(), expected_json.as_object()) else {
1721            continue;
1722        };
1723
1724        // Compare the union of keys present on either side; a key the file
1725        // projects but the sidecar omits is just as stale as a wrong value.
1726        let mut mismatched_keys: BTreeSet<&str> = BTreeSet::new();
1727        for key in have.keys().chain(want.keys()) {
1728            if key == "path" {
1729                continue;
1730            }
1731            if have.get(key) != want.get(key) {
1732                mismatched_keys.insert(key);
1733            }
1734        }
1735
1736        if !mismatched_keys.is_empty() {
1737            let keys: Vec<&str> = mismatched_keys.into_iter().collect();
1738            push(
1739                issues,
1740                Severity::Error,
1741                codes::INDEX_JSONL_STALE,
1742                jsonl_rel,
1743                None,
1744                Some(keys.join(",")),
1745                format!(
1746                    "`index.jsonl` record for `{}` is stale ({})",
1747                    path.display(),
1748                    keys.join(", ")
1749                ),
1750                Some("run `dbmd index rebuild`".into()),
1751                vec![path.clone()],
1752            );
1753        }
1754    }
1755    let _ = tf;
1756}
1757
1758/// Check an index's `scope:` frontmatter against its filesystem location.
1759fn check_index_scope(
1760    store: &Store,
1761    index_rel: &Path,
1762    expected_scope: &str,
1763    expected_folder: Option<&str>,
1764    issues: &mut Vec<Issue>,
1765) {
1766    let abs = store.root.join(index_rel);
1767    let Ok(text) = std::fs::read_to_string(&abs) else {
1768        return;
1769    };
1770    let Some((yaml, _, _)) = split_frontmatter(&text) else {
1771        return;
1772    };
1773    let Ok(Value::Mapping(map)) = serde_yml::from_str::<Value>(&yaml) else {
1774        return;
1775    };
1776    let fm = yaml_map_to_btree(&map);
1777
1778    if let Some(scope) = fm.get("scope").and_then(scalar_string) {
1779        // Accept "type-folder" and the SPEC example's looser "folder" alias.
1780        let scope_ok =
1781            scope == expected_scope || (expected_scope == "type-folder" && scope == "folder");
1782        if !scope_ok {
1783            push(
1784                issues,
1785                Severity::Warning,
1786                codes::INDEX_WRONG_SCOPE,
1787                index_rel,
1788                fm_key_line(&yaml, "scope"),
1789                Some("scope".into()),
1790                format!(
1791                    "index `scope: {scope}` doesn't match location (expected `{expected_scope}`)"
1792                ),
1793                Some(format!("set `scope: {expected_scope}`")),
1794                vec![],
1795            );
1796        }
1797    }
1798    // folder: must match for layer/type-folder indexes.
1799    if let Some(expected) = expected_folder {
1800        if let Some(folder) = fm.get("folder").and_then(scalar_string) {
1801            if folder.trim_end_matches('/') != expected.trim_end_matches('/') {
1802                push(
1803                    issues,
1804                    Severity::Warning,
1805                    codes::INDEX_WRONG_SCOPE,
1806                    index_rel,
1807                    fm_key_line(&yaml, "folder"),
1808                    Some("folder".into()),
1809                    format!("index `folder: {folder}` doesn't match location `{expected}`"),
1810                    Some(format!("set `folder: {expected}`")),
1811                    vec![],
1812                );
1813            }
1814        }
1815    }
1816}
1817
1818// ─────────────────────────────────────────────────────────────────────────────
1819//  Cross-file: log.md well-formedness + ordering (validate_all only)
1820// ─────────────────────────────────────────────────────────────────────────────
1821
1822/// `LOG_*` checks: bad timestamps, unknown kinds, out-of-order entries.
1823fn check_log(store: &Store, issues: &mut Vec<Issue>) {
1824    let log_rel = Path::new("log.md");
1825    let abs = store.root.join(log_rel);
1826    let Ok(text) = std::fs::read_to_string(&abs) else {
1827        return;
1828    };
1829
1830    let mut prev: Option<DateTime<FixedOffset>> = None;
1831    for (i, line) in text.lines().enumerate() {
1832        if !line.starts_with("## [") {
1833            continue;
1834        }
1835        let line_no = (i + 1) as u32;
1836        match parse_log_header(line) {
1837            None => push(
1838                issues,
1839                Severity::Error,
1840                codes::LOG_BAD_TIMESTAMP,
1841                log_rel,
1842                Some(line_no),
1843                None,
1844                format!("log entry header has an unparseable timestamp: {line:?}"),
1845                Some("use `## [YYYY-MM-DD HH:MM] <kind> | <object>`".into()),
1846                vec![],
1847            ),
1848            Some((ts, kind, _object)) => {
1849                if !RECOGNIZED_LOG_KINDS.contains(&kind.as_str()) {
1850                    push(
1851                        issues,
1852                        Severity::Warning,
1853                        codes::LOG_UNKNOWN_KIND,
1854                        log_rel,
1855                        Some(line_no),
1856                        None,
1857                        format!("log entry kind `{kind}` is not recognized"),
1858                        None,
1859                        vec![],
1860                    );
1861                }
1862                if let Some(p) = prev {
1863                    if ts < p {
1864                        push(
1865                            issues,
1866                            Severity::Warning,
1867                            codes::LOG_OUT_OF_ORDER,
1868                            log_rel,
1869                            Some(line_no),
1870                            None,
1871                            "log entry is older than the entry above it (possible rewrite)".into(),
1872                            Some("append corrective entries; never reorder past ones".into()),
1873                            vec![],
1874                        );
1875                    }
1876                }
1877                prev = Some(ts);
1878            }
1879        }
1880    }
1881}
1882
1883// ─────────────────────────────────────────────────────────────────────────────
1884//  Self-contained primitives (collapse onto sibling modules once they land)
1885// ─────────────────────────────────────────────────────────────────────────────
1886
1887/// A minimal wiki-link found in a body: target, optional display, 1-based line.
1888struct Link {
1889    target: String,
1890    line: u32,
1891}
1892
1893/// True if the store marker (`DB.md`, uppercase) is present at the root. On a
1894/// case-insensitive filesystem `db.md` would also match `DB.md`; we require the
1895/// exact-cased directory entry to be present.
1896fn store_marker_present(store: &Store) -> bool {
1897    let want = store.root.join("DB.md");
1898    if !want.is_file() {
1899        return false;
1900    }
1901    // Reject a case-folded match (`db.md`) on case-insensitive filesystems.
1902    match std::fs::read_dir(&store.root) {
1903        Ok(entries) => entries
1904            .flatten()
1905            .any(|e| e.file_name().to_str() == Some("DB.md")),
1906        Err(_) => true, // can't enumerate; trust the is_file() above
1907    }
1908}
1909
1910/// Validate the store's identity file, `DB.md`: its frontmatter `type:` must be
1911/// `db-md`, it must carry both `scope` and `owner`, and its body may contain
1912/// only the three recognized `##` sections (`Agent instructions`, `Policies`,
1913/// `Schemas`).
1914///
1915/// `DB.md` is not a content file (no `summary`), so it is checked here rather
1916/// than through `check_content_file`. The marker presence is established by the
1917/// caller (`store_marker_present`); a malformed-frontmatter `DB.md` still counts
1918/// as a store (the marker is the filename), so we report its shape rather than
1919/// `NOT_A_STORE`. Issues anchor to `DB.md` as the store-relative path.
1920fn check_db_md(store: &Store, issues: &mut Vec<Issue>) {
1921    let rel = Path::new("DB.md");
1922    let abs = store.root.join("DB.md");
1923    let Ok(text) = std::fs::read_to_string(&abs) else {
1924        return; // marker present but unreadable: nothing more to say.
1925    };
1926
1927    let Some((fm_yaml, body, fm_end_line)) = split_frontmatter(&text) else {
1928        // No frontmatter block at all → it cannot declare `type: db-md` and has
1929        // neither required field. Report the type and both missing fields,
1930        // anchored to line 1 (the would-be opening fence).
1931        push(
1932            issues,
1933            Severity::Error,
1934            codes::DB_MD_BAD_TYPE,
1935            rel,
1936            Some(1),
1937            Some("type".into()),
1938            "DB.md has no frontmatter; it must declare `type: db-md`".into(),
1939            Some("add a `---` frontmatter block with `type: db-md`".into()),
1940            vec![],
1941        );
1942        for field in ["scope", "owner"] {
1943            push(
1944                issues,
1945                Severity::Error,
1946                codes::DB_MD_MISSING_FIELD,
1947                rel,
1948                Some(1),
1949                Some(field.into()),
1950                format!("DB.md frontmatter is missing required field `{field}`"),
1951                Some(format!("add `{field}:` to the DB.md frontmatter")),
1952                vec![],
1953            );
1954        }
1955        return;
1956    };
1957
1958    // Parse the frontmatter mapping. If it doesn't parse, we can still say the
1959    // identity contract is unmet (no provable `type: db-md`, no provable fields).
1960    let fm: Option<BTreeMap<String, Value>> = match serde_yml::from_str::<Value>(&fm_yaml) {
1961        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
1962        Ok(Value::Null) => Some(BTreeMap::new()),
1963        _ => None,
1964    };
1965
1966    match &fm {
1967        Some(map) => {
1968            // ── type: db-md ──────────────────────────────────────────────────
1969            let type_ = map.get("type").and_then(scalar_string);
1970            if type_.as_deref() != Some("db-md") {
1971                let (line, msg) = match &type_ {
1972                    Some(t) => (
1973                        fm_key_line(&fm_yaml, "type"),
1974                        format!("DB.md has `type: {t}`; a store's DB.md must be `type: db-md`"),
1975                    ),
1976                    None => (
1977                        Some(1),
1978                        "DB.md frontmatter has no `type:`; it must be `type: db-md`".to_string(),
1979                    ),
1980                };
1981                push(
1982                    issues,
1983                    Severity::Error,
1984                    codes::DB_MD_BAD_TYPE,
1985                    rel,
1986                    line,
1987                    Some("type".into()),
1988                    msg,
1989                    Some("set `type: db-md` in the DB.md frontmatter".into()),
1990                    vec![],
1991                );
1992            }
1993
1994            // ── required fields: scope + owner ───────────────────────────────
1995            for field in ["scope", "owner"] {
1996                let present = map
1997                    .get(field)
1998                    .and_then(scalar_string)
1999                    .map(|s| !s.trim().is_empty())
2000                    .unwrap_or(false);
2001                if !present {
2002                    push(
2003                        issues,
2004                        Severity::Error,
2005                        codes::DB_MD_MISSING_FIELD,
2006                        rel,
2007                        // A present-but-empty field anchors to its line; a fully
2008                        // absent one to the block top.
2009                        fm_key_line_or_top(&fm_yaml, field),
2010                        Some(field.into()),
2011                        format!("DB.md frontmatter is missing required field `{field}`"),
2012                        Some(format!("add `{field}:` to the DB.md frontmatter")),
2013                        vec![],
2014                    );
2015                }
2016            }
2017        }
2018        None => {
2019            // Unparseable frontmatter: the identity contract is unprovable. Emit
2020            // the type error and both field errors, anchored to the block top.
2021            push(
2022                issues,
2023                Severity::Error,
2024                codes::DB_MD_BAD_TYPE,
2025                rel,
2026                Some(1),
2027                Some("type".into()),
2028                "DB.md frontmatter isn't valid YAML; it must declare `type: db-md`".into(),
2029                Some("fix the DB.md frontmatter and set `type: db-md`".into()),
2030                vec![],
2031            );
2032            for field in ["scope", "owner"] {
2033                push(
2034                    issues,
2035                    Severity::Error,
2036                    codes::DB_MD_MISSING_FIELD,
2037                    rel,
2038                    Some(1),
2039                    Some(field.into()),
2040                    format!("DB.md frontmatter is missing required field `{field}`"),
2041                    Some(format!("add `{field}:` to the DB.md frontmatter")),
2042                    vec![],
2043                );
2044            }
2045        }
2046    }
2047
2048    // ── recognized `##` section headers only ─────────────────────────────────
2049    // The body's H2 headings must be one of the three the toolkit reads; any
2050    // other is a likely typo / misplacement (warning — the parser ignores it,
2051    // so the config is not corrupted, but the operator wrote a section that will
2052    // never be read). H3 sub-headings (Frozen pages, Ignored types, `### <type>`
2053    // schema blocks) live under their H2 and are not flagged here.
2054    for section in crate::parser::extract_sections(&body) {
2055        if section.level != 2 {
2056            continue;
2057        }
2058        let name = section.heading.trim().to_ascii_lowercase();
2059        if matches!(name.as_str(), "agent instructions" | "policies" | "schemas") {
2060            continue;
2061        }
2062        // `Section::line` is 1-based within the body; the body begins at file
2063        // line `fm_end_line + 1`.
2064        let file_line = fm_end_line + section.line;
2065        push(
2066            issues,
2067            Severity::Warning,
2068            codes::DB_MD_UNKNOWN_SECTION,
2069            rel,
2070            Some(file_line),
2071            None,
2072            format!(
2073                "DB.md has an unrecognized `## {}` section",
2074                section.heading.trim()
2075            ),
2076            Some(
2077                "DB.md sections are `## Agent instructions`, `## Policies`, `## Schemas` — \
2078                 remove or rename this heading"
2079                    .into(),
2080            ),
2081            vec![],
2082        );
2083    }
2084}
2085
2086/// The `NOT_A_STORE` issue for a root with no `DB.md`.
2087fn not_a_store_issue(store: &Store) -> Issue {
2088    Issue {
2089        severity: Severity::Error,
2090        code: codes::NOT_A_STORE,
2091        file: store.root.clone(),
2092        line: None,
2093        key: None,
2094        message: format!("{} has no DB.md; not a db.md store", store.root.display()),
2095        suggestion: Some("create a `DB.md` at the store root".into()),
2096        related: vec![],
2097    }
2098}
2099
2100/// The canonical home layer of a **recognized** content type, per SPEC §
2101/// Recognized types (the `Layer` column). `None` for custom / unrecognized
2102/// types (which carry no layer expectation and are never flagged) and for the
2103/// meta types `db-md` / `index` / `log` (which are not content files). This is
2104/// the single source the `LAYER_TYPE_MISMATCH` check consults.
2105fn canonical_layer_for_type(type_: &str) -> Option<&'static str> {
2106    match type_ {
2107        "email" | "transcript" | "pdf-source" => Some("sources"),
2108        "contact" | "company" | "expense" | "meeting" | "decision" | "invoice" => Some("records"),
2109        "wiki-page" => Some("wiki"),
2110        _ => None,
2111    }
2112}
2113
2114/// The layer a store-relative path lives under — its first path component, when
2115/// that component is one of the three canonical layers. `None` otherwise.
2116fn layer_of(rel: &Path) -> Option<&'static str> {
2117    match rel.iter().next().and_then(|s| s.to_str()) {
2118        Some("sources") => Some("sources"),
2119        Some("records") => Some("records"),
2120        Some("wiki") => Some("wiki"),
2121        _ => None,
2122    }
2123}
2124
2125/// True if a store-relative path is a content file: under `sources/`,
2126/// `records/`, or `wiki/` and not an `index.md`/`index.jsonl`/`log.md`.
2127fn is_content_file(rel: &Path) -> bool {
2128    let Some(first) = rel.iter().next().and_then(|s| s.to_str()) else {
2129        return false;
2130    };
2131    if !matches!(first, "sources" | "records" | "wiki") {
2132        return false;
2133    }
2134    let name = rel.file_name().and_then(|s| s.to_str()).unwrap_or("");
2135    if matches!(name, "index.md" | "index.jsonl" | "log.md") {
2136        return false;
2137    }
2138    name.ends_with(".md")
2139}
2140
2141/// Split a file into `(frontmatter_yaml, body, closing_fence_line)`. The block
2142/// must start at the very first line with `---` and end at the next `---`.
2143/// Returns `None` if there's no leading frontmatter block.
2144fn split_frontmatter(text: &str) -> Option<(String, String, u32)> {
2145    let mut lines = text.lines();
2146    let first = lines.next()?;
2147    if first.trim_end() != "---" {
2148        return None;
2149    }
2150    let mut yaml = String::new();
2151    let mut close_line: Option<u32> = None;
2152    // line 1 is the opening fence; YAML starts at line 2.
2153    let mut current = 1u32;
2154    for line in lines {
2155        current += 1;
2156        if line.trim_end() == "---" {
2157            close_line = Some(current);
2158            break;
2159        }
2160        yaml.push_str(line);
2161        yaml.push('\n');
2162    }
2163    let close_line = close_line?;
2164    // Body = everything after the closing fence.
2165    let body: String = text
2166        .lines()
2167        .skip(close_line as usize)
2168        .collect::<Vec<_>>()
2169        .join("\n");
2170    Some((yaml, body, close_line))
2171}
2172
2173/// Read just the `summary` field of a file, or `None` if absent/unparseable.
2174fn read_summary(abs: &Path) -> Option<String> {
2175    let text = std::fs::read_to_string(abs).ok()?;
2176    let (yaml, _, _) = split_frontmatter(&text)?;
2177    let value: Value = serde_yml::from_str(&yaml).ok()?;
2178    if let Value::Mapping(m) = value {
2179        m.get(Value::String("summary".into()))
2180            .and_then(scalar_string)
2181    } else {
2182        None
2183    }
2184}
2185
2186/// Convert a `serde_yml` mapping into a string-keyed [`BTreeMap`], dropping
2187/// non-string keys (frontmatter keys are always strings).
2188fn yaml_map_to_btree(map: &serde_yml::Mapping) -> BTreeMap<String, Value> {
2189    let mut out = BTreeMap::new();
2190    for (k, v) in map {
2191        if let Value::String(s) = k {
2192            out.insert(s.clone(), v.clone());
2193        }
2194    }
2195    out
2196}
2197
2198/// A scalar YAML value as a string (`String`/`Number`/`Bool`); `None` for
2199/// sequences/mappings/null.
2200fn scalar_string(v: &Value) -> Option<String> {
2201    match v {
2202        Value::String(s) => Some(s.clone()),
2203        Value::Number(n) => Some(n.to_string()),
2204        Value::Bool(b) => Some(b.to_string()),
2205        _ => None,
2206    }
2207}
2208
2209/// True if `tags` is a flat YAML sequence of scalars. A mapping, a scalar, or a
2210/// sequence containing a nested sequence/mapping → false (`TAGS_MALFORMED`).
2211fn is_flat_scalar_list(v: &Value) -> bool {
2212    match v {
2213        Value::Sequence(items) => items.iter().all(|it| scalar_string(it).is_some()),
2214        _ => false,
2215    }
2216}
2217
2218/// Extract every frontmatter wiki-link, returning `(key, Link)` pairs with the
2219/// link's 1-based file line. **Text-based, by necessity:** an unquoted
2220/// `company: [[records/companies/x]]` parses in YAML as a nested *sequence*, not
2221/// a string (because `[[x]]` is YAML flow-list-in-a-list); a quoted
2222/// `"[[...]]"` parses as a string. Scanning the raw frontmatter text catches
2223/// both forms uniformly, the way the link textually appears — the doctrine view.
2224///
2225/// `fm_start_line` is the file line of the first YAML line (file line 2, since
2226/// line 1 is the opening `---`), so the returned `Link::line` is absolute.
2227fn frontmatter_link_fields_text(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, Link)> {
2228    let mut out = Vec::new();
2229    for (key, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2230        for link in links {
2231            out.push((key.clone(), link));
2232        }
2233    }
2234    out
2235}
2236
2237/// The wiki-link targets declared under a single top-level frontmatter key
2238/// (text-based; handles quoted + unquoted forms). Empty if the key is absent or
2239/// carries no `[[...]]`.
2240fn frontmatter_links_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Vec<Link> {
2241    for (k, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2242        if k == key {
2243            return links;
2244        }
2245    }
2246    Vec::new()
2247}
2248
2249/// The raw value text under a single top-level frontmatter key (the remainder of
2250/// the key line plus any indented continuation/sequence lines), trimmed. Used to
2251/// decide whether a `link to` field holds a plain string vs. a wiki-link.
2252fn frontmatter_raw_value_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Option<String> {
2253    for (k, value_text, _links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2254        if k == key {
2255            return Some(value_text);
2256        }
2257    }
2258    None
2259}
2260
2261/// Split a frontmatter YAML block into `(key, raw_value_text, wiki_links)` for
2262/// each top-level key. A top-level key is a line with no leading indentation in
2263/// `name:` form; its value spans the rest of that line plus any deeper-indented
2264/// continuation lines (block scalars, block sequences) until the next top-level
2265/// key. Wiki-links are every `[[...]]` found anywhere in that span, with their
2266/// absolute file line.
2267fn frontmatter_key_blocks(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, String, Vec<Link>)> {
2268    let mut blocks: Vec<(String, String, Vec<Link>)> = Vec::new();
2269    let mut current: Option<(String, String, Vec<Link>)> = None;
2270
2271    for (idx, raw_line) in fm_yaml.lines().enumerate() {
2272        let file_line = fm_start_line + idx as u32;
2273        let indented = raw_line.starts_with(' ') || raw_line.starts_with('\t');
2274        let trimmed = raw_line.trim();
2275
2276        // A new top-level key: no indentation, `name:` prefix, not a list dash or
2277        // comment. (Indented or dash lines belong to the current key's value.)
2278        let new_key = if !indented && !trimmed.starts_with('#') && !trimmed.starts_with('-') {
2279            top_level_key(raw_line)
2280        } else {
2281            None
2282        };
2283
2284        if let Some((key, after)) = new_key {
2285            if let Some(done) = current.take() {
2286                blocks.push(done);
2287            }
2288            let mut links = Vec::new();
2289            collect_line_links(after, file_line, &mut links);
2290            current = Some((key, after.trim().to_string(), links));
2291        } else if let Some((_k, value_text, links)) = current.as_mut() {
2292            // Continuation of the current key's value (indented or dash line).
2293            if !value_text.is_empty() {
2294                value_text.push('\n');
2295            }
2296            value_text.push_str(trimmed);
2297            collect_line_links(raw_line, file_line, links);
2298        }
2299    }
2300    if let Some(done) = current.take() {
2301        blocks.push(done);
2302    }
2303    blocks
2304}
2305
2306/// Parse a top-level frontmatter key line into `(key, value_after_colon)`.
2307/// `None` if the line isn't a `name:` mapping entry.
2308fn top_level_key(line: &str) -> Option<(String, &str)> {
2309    let (key, rest) = line.split_once(':')?;
2310    let key = key.trim();
2311    if key.is_empty()
2312        || !key
2313            .chars()
2314            .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
2315    {
2316        return None;
2317    }
2318    Some((key.to_string(), rest))
2319}
2320
2321/// Append every `[[target]]` / `[[target|display]]` found in `s` to `links`,
2322/// each tagged with `file_line`.
2323fn collect_line_links(s: &str, file_line: u32, links: &mut Vec<Link>) {
2324    let bytes = s.as_bytes();
2325    let mut i = 0;
2326    while i + 1 < bytes.len() {
2327        if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2328            if let Some(close) = s[i + 2..].find("]]") {
2329                let inner = &s[i + 2..i + 2 + close];
2330                // Guard against `[[[` (nested) double-counting: the inner must
2331                // not itself open another `[[`.
2332                let target = inner
2333                    .trim_start_matches('[')
2334                    .split('|')
2335                    .next()
2336                    .unwrap_or(inner)
2337                    .trim()
2338                    .to_string();
2339                if !target.is_empty() {
2340                    links.push(Link {
2341                        target,
2342                        line: file_line,
2343                    });
2344                }
2345                i = i + 2 + close + 2;
2346                continue;
2347            }
2348        }
2349        i += 1;
2350    }
2351}
2352
2353/// Extract every `[[...]]` wiki-link from a body, with 1-based line numbers.
2354/// Skips fenced code blocks (```), so example links in docs don't trip the
2355/// validator.
2356fn extract_wiki_links(body: &str) -> Vec<Link> {
2357    let mut out = Vec::new();
2358    let mut in_fence = false;
2359    for (idx, line) in body.lines().enumerate() {
2360        let trimmed = line.trim_start();
2361        if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
2362            in_fence = !in_fence;
2363            continue;
2364        }
2365        if in_fence {
2366            continue;
2367        }
2368        let line_no = (idx + 1) as u32;
2369        let bytes = line.as_bytes();
2370        let mut i = 0;
2371        while i + 1 < bytes.len() {
2372            if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2373                if let Some(close) = line[i + 2..].find("]]") {
2374                    let inner = &line[i + 2..i + 2 + close];
2375                    let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
2376                    // Skip a triple-bracket `[[[…` opening: the inner content
2377                    // starts with `[`, so this is the rejected flow-form list
2378                    // mis-encoding (`[[[a]], [[b]]]`), not a real wiki-link. A
2379                    // legitimate target never starts with `[`. The frontmatter
2380                    // `WIKI_LINK_FLOW_FORM_LIST` check already owns that error;
2381                    // extracting a bogus body link here would double-report it as
2382                    // a spurious `WIKI_LINK_SHORT_FORM`.
2383                    if !target.is_empty() && !target.starts_with('[') {
2384                        out.push(Link {
2385                            target,
2386                            line: line_no,
2387                        });
2388                    }
2389                    i = i + 2 + close + 2;
2390                    continue;
2391                }
2392            }
2393            i += 1;
2394        }
2395    }
2396    out
2397}
2398
2399/// Detect the frontmatter wiki-link-list mis-encoding: a YAML flow-sequence
2400/// whose items are themselves sequences (`attendees: [[[a]], [[b]]]`). Returns
2401/// the offending keys. The canonical block-sequence form is not flagged.
2402fn detect_flow_form_link_lists(fm_yaml: &str) -> Vec<String> {
2403    let mut out = Vec::new();
2404    for line in fm_yaml.lines() {
2405        let Some((key, rest)) = line.split_once(':') else {
2406            continue;
2407        };
2408        let key = key.trim();
2409        if key.is_empty() || key.starts_with('#') || key.starts_with('-') {
2410            continue;
2411        }
2412        let rest = rest.trim();
2413        // Flow sequence whose first element is itself a `[` (i.e. `[[[`) — a
2414        // nested flow list, which is the wiki-link-list mis-encoding.
2415        if rest.starts_with("[[[") {
2416            out.push(key.to_string());
2417        }
2418    }
2419    out
2420}
2421
2422/// True if a bare target (no `.md`) is a full store-relative path: it contains a
2423/// `/` and its first segment is a known layer.
2424fn is_full_store_path(bare: &str) -> bool {
2425    let mut parts = bare.splitn(2, '/');
2426    let first = parts.next().unwrap_or("");
2427    let has_rest = parts.next().map(|r| !r.is_empty()).unwrap_or(false);
2428    matches!(first, "sources" | "records" | "wiki") && has_rest
2429}
2430
2431/// True if a bare target path is under `prefix` (both `.md`-stripped).
2432fn path_under_prefix(bare: &str, prefix: &str) -> bool {
2433    let prefix = prefix.trim_end_matches('/');
2434    bare == prefix || bare.starts_with(&format!("{prefix}/"))
2435}
2436
2437/// The type-folder for a store-relative content path: `<layer>/<type-folder>`
2438/// (the folder directly under the layer; date-shards roll up to it). `None` for
2439/// files directly in a layer folder or outside the three layers.
2440fn type_folder_of(rel: &Path) -> Option<PathBuf> {
2441    let comps: Vec<&str> = rel.iter().filter_map(|s| s.to_str()).collect();
2442    if comps.len() < 3 {
2443        return None; // need layer/type-folder/file at minimum
2444    }
2445    if !matches!(comps[0], "sources" | "records" | "wiki") {
2446        return None;
2447    }
2448    Some(PathBuf::from(comps[0]).join(comps[1]))
2449}
2450
2451/// **SWEEP.** Walk every `.md` content file under `sources/`/`records/`/`wiki/`,
2452/// returning store-relative paths to be parsed in full. Skips hidden dirs,
2453/// `log/`, and the index twin (`index.jsonl`). Used only by `validate_all`; the
2454/// working-set incoming-linker scan rides the embedded-ripgrep
2455/// `Store::find_links_to_any` (a single presence-only pass), so the loop default
2456/// never walks-and-*parses* the whole content tree.
2457fn walk_content_files(root: &Path) -> Vec<PathBuf> {
2458    let mut out = Vec::new();
2459    for layer in ["sources", "records", "wiki"] {
2460        let base = root.join(layer);
2461        if !base.is_dir() {
2462            continue;
2463        }
2464        for entry in walkdir::WalkDir::new(&base)
2465            .into_iter()
2466            .filter_entry(|e| {
2467                let name = e.file_name().to_str().unwrap_or("");
2468                !name.starts_with('.') && name != "log"
2469            })
2470            .flatten()
2471        {
2472            if !entry.file_type().is_file() {
2473                continue;
2474            }
2475            let name = entry.file_name().to_str().unwrap_or("");
2476            if name.ends_with(".md") && name != "index.md" {
2477                if let Ok(rel) = entry.path().strip_prefix(root) {
2478                    out.push(rel.to_path_buf());
2479                }
2480            }
2481        }
2482    }
2483    out.sort();
2484    out
2485}
2486
2487/// Every `index.md` under the store (root + layers + type-folders), as
2488/// store-relative paths. Used to detect orphan indexes.
2489fn walk_index_files(root: &Path) -> Vec<PathBuf> {
2490    let mut out = Vec::new();
2491    if root.join("index.md").is_file() {
2492        out.push(PathBuf::from("index.md"));
2493    }
2494    for layer in ["sources", "records", "wiki"] {
2495        let base = root.join(layer);
2496        if !base.is_dir() {
2497            continue;
2498        }
2499        for entry in walkdir::WalkDir::new(&base)
2500            .into_iter()
2501            .filter_entry(|e| {
2502                let name = e.file_name().to_str().unwrap_or("");
2503                !name.starts_with('.') && name != "log"
2504            })
2505            .flatten()
2506        {
2507            if entry.file_type().is_file() && entry.file_name().to_str() == Some("index.md") {
2508                if let Ok(rel) = entry.path().strip_prefix(root) {
2509                    out.push(rel.to_path_buf());
2510                }
2511            }
2512        }
2513    }
2514    out.sort();
2515    out
2516}
2517
2518/// A parsed `index.md` entry line: the wiki-link target, the optional summary
2519/// text after the `—`, and the 1-based line number.
2520struct IndexEntry {
2521    target: String,
2522    summary_text: Option<String>,
2523    line: u32,
2524}
2525
2526/// Parse the `- [[<path>]] — <summary>` entry lines of an `index.md`. Stops at a
2527/// `## More` footer (those lines aren't file entries). Root/layer entries with a
2528/// `|display` segment and a `(N)` count are parsed too — the target is the bare
2529/// path, the summary text is whatever follows the em dash.
2530fn parse_index_entries(text: &str) -> Vec<IndexEntry> {
2531    let mut out = Vec::new();
2532    let mut in_more = false;
2533    for (idx, line) in text.lines().enumerate() {
2534        let trimmed = line.trim_start();
2535        if trimmed.starts_with("## More") {
2536            in_more = true;
2537            continue;
2538        }
2539        if in_more {
2540            continue;
2541        }
2542        if !trimmed.starts_with("- ") {
2543            continue;
2544        }
2545        // Find the first `[[...]]`.
2546        let Some(open) = trimmed.find("[[") else {
2547            continue;
2548        };
2549        let Some(close_rel) = trimmed[open + 2..].find("]]") else {
2550            continue;
2551        };
2552        let inner = &trimmed[open + 2..open + 2 + close_rel];
2553        let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
2554
2555        // Summary text: whatever follows the first em dash (`—`) or ` - `.
2556        let after = &trimmed[open + 2 + close_rel + 2..];
2557        let summary_text = extract_index_entry_summary(after);
2558
2559        out.push(IndexEntry {
2560            target,
2561            summary_text,
2562            line: (idx + 1) as u32,
2563        });
2564    }
2565    out
2566}
2567
2568/// Pull the summary portion out of the text trailing an index entry's
2569/// wiki-link: drop a leading `(N files)` count, then the `—`/`-` separator, then
2570/// strip a trailing `· #tag` suffix.
2571fn extract_index_entry_summary(after: &str) -> Option<String> {
2572    let mut s = after.trim();
2573    // Drop a leading "(N ...)" count segment, if present.
2574    if s.starts_with('(') {
2575        if let Some(close) = s.find(')') {
2576            s = s[close + 1..].trim_start();
2577        }
2578    }
2579    // Require an em dash or hyphen separator before the summary.
2580    let s = if let Some(rest) = s.strip_prefix('—') {
2581        rest.trim()
2582    } else if let Some(rest) = s.strip_prefix('-') {
2583        rest.trim()
2584    } else {
2585        return None;
2586    };
2587    if s.is_empty() {
2588        return None;
2589    }
2590    // Strip a trailing `  ·  #tag #tag` suffix.
2591    let s = match s.split_once(" · ") {
2592        Some((summary, _tags)) => summary.trim(),
2593        None => s,
2594    };
2595    Some(s.to_string())
2596}
2597
2598/// Parse a `log.md` entry header `## [YYYY-MM-DD HH:MM] <kind> | <object>`.
2599/// Returns `(timestamp, kind, object)`; `None` if the timestamp is unparseable
2600/// or the header isn't well-formed.
2601fn parse_log_header(line: &str) -> Option<(DateTime<FixedOffset>, String, Option<String>)> {
2602    let rest = line.strip_prefix("## [")?;
2603    let close = rest.find(']')?;
2604    let ts_str = &rest[..close];
2605    let tail = rest[close + 1..].trim();
2606
2607    // Parse `YYYY-MM-DD HH:MM` (the SPEC header form) as a naive local time and
2608    // attach a zero offset — the log header carries minute precision, no zone.
2609    let naive = NaiveDateTime::parse_from_str(ts_str.trim(), "%Y-%m-%d %H:%M").ok()?;
2610    let offset = FixedOffset::east_opt(0)?;
2611    let ts = naive.and_local_timezone(offset).single()?;
2612
2613    // kind | object
2614    let (kind, object) = match tail.split_once('|') {
2615        Some((k, o)) => {
2616            let o = o.trim();
2617            (
2618                k.trim().to_string(),
2619                if o.is_empty() {
2620                    None
2621                } else {
2622                    Some(o.to_string())
2623                },
2624            )
2625        }
2626        None => (tail.to_string(), None),
2627    };
2628    if kind.is_empty() {
2629        return None;
2630    }
2631    Some((ts, kind, object))
2632}
2633
2634/// The timestamp of the most recent `validate` entry across `log.md` (active)
2635/// — the default working-set cutoff. Reads only headers; never the whole store.
2636fn last_validate_at(store: &Store) -> Option<DateTime<FixedOffset>> {
2637    let text = std::fs::read_to_string(store.root.join("log.md")).ok()?;
2638    let mut latest: Option<DateTime<FixedOffset>> = None;
2639    for line in text.lines() {
2640        if !line.starts_with("## [") {
2641            continue;
2642        }
2643        if let Some((ts, kind, _)) = parse_log_header(line) {
2644            if kind == "validate" {
2645                latest = Some(match latest {
2646                    Some(p) if p >= ts => p,
2647                    _ => ts,
2648                });
2649            }
2650        }
2651    }
2652    latest
2653}
2654
2655/// The set of content objects changed since `cutoff`, read from `log.md`
2656/// entries whose kind mutates a file. When `cutoff` is `None`, every mutating
2657/// entry counts (no prior validate window). Returns store-relative `.md` paths.
2658fn changed_objects_since(
2659    store: &Store,
2660    cutoff: Option<DateTime<FixedOffset>>,
2661) -> BTreeSet<PathBuf> {
2662    let mut out = BTreeSet::new();
2663    let Ok(text) = std::fs::read_to_string(store.root.join("log.md")) else {
2664        return out;
2665    };
2666    for line in text.lines() {
2667        if !line.starts_with("## [") {
2668            continue;
2669        }
2670        let Some((ts, kind, object)) = parse_log_header(line) else {
2671            continue;
2672        };
2673        if let Some(c) = cutoff {
2674            if ts < c {
2675                continue;
2676            }
2677        }
2678        if !matches!(
2679            kind.as_str(),
2680            "create" | "update" | "ingest" | "rename" | "delete" | "link"
2681        ) {
2682            continue;
2683        }
2684        if let Some(obj) = object {
2685            // The object slot is a store-relative path (or a wiki-link target).
2686            let bare = obj
2687                .trim()
2688                .trim_start_matches("[[")
2689                .trim_end_matches("]]")
2690                .split('|')
2691                .next()
2692                .unwrap_or("")
2693                .trim()
2694                .trim_end_matches(".md")
2695                .to_string();
2696            if bare.is_empty() {
2697                continue;
2698            }
2699            out.insert(PathBuf::from(format!("{bare}.md")));
2700        }
2701    }
2702    out
2703}
2704
2705/// The result of the [`derived_from_ignored_type`] policy check: the
2706/// `derived_from` target that resolves to an ignored-type record, plus that
2707/// record's type. Carries exactly what both the validate finding and the
2708/// write-time warning need to render their message.
2709#[derive(Debug, Clone, PartialEq, Eq)]
2710pub struct DerivedFromIgnored {
2711    /// The `derived_from` wiki-link target as written (bare store-relative path,
2712    /// no `.md`).
2713    pub target: String,
2714    /// The resolved `type` of that target, which is present in
2715    /// `store.config.ignored_types`.
2716    pub target_type: String,
2717}
2718
2719/// **The single authoritative `### Ignored types` derivation check.** Decides
2720/// whether a `wiki-page` derives from an ignored-type record: the type must be
2721/// `wiki-page`, `### Ignored types` must be non-empty, and some `derived_from`
2722/// target must resolve to a record whose `type` is in `ignored_types`. Returns
2723/// the first such target (and its type), or `None`.
2724///
2725/// Both surfaces call this so the policy lives in exactly one place:
2726/// [`check_content_file`] (read side — `dbmd validate`) feeds it the
2727/// `derived_from` targets it scanned from the raw frontmatter, and the write
2728/// surface (`dbmd write`) feeds it the targets from the composed frontmatter.
2729/// The link *extraction* differs per surface (text-scan with line numbers vs.
2730/// the parsed `Frontmatter`); the *decision* — type gate, target-type
2731/// resolution, and `ignored_types` membership — does not.
2732pub fn derived_from_ignored_type<I, S>(
2733    store: &Store,
2734    type_: &str,
2735    derived_from_targets: I,
2736) -> Option<DerivedFromIgnored>
2737where
2738    I: IntoIterator<Item = S>,
2739    S: AsRef<str>,
2740{
2741    if type_ != "wiki-page" || store.config.ignored_types.is_empty() {
2742        return None;
2743    }
2744    for target in derived_from_targets {
2745        let target = target.as_ref();
2746        if let Some(target_type) = link_target_type(store, target) {
2747            if store.config.ignored_types.contains(&target_type) {
2748                return Some(DerivedFromIgnored {
2749                    target: target.to_string(),
2750                    target_type,
2751                });
2752            }
2753        }
2754    }
2755    None
2756}
2757
2758/// Resolve the `type` of a wiki-link target file (bare, no `.md`), or `None`.
2759fn link_target_type(store: &Store, target: &str) -> Option<String> {
2760    let bare = target.trim_end_matches(".md");
2761    let abs = store.root.join(format!("{bare}.md"));
2762    let text = std::fs::read_to_string(&abs).ok()?;
2763    let (yaml, _, _) = split_frontmatter(&text)?;
2764    let value: Value = serde_yml::from_str(&yaml).ok()?;
2765    if let Value::Mapping(m) = value {
2766        m.get(Value::String("type".into())).and_then(scalar_string)
2767    } else {
2768        None
2769    }
2770}
2771
2772/// The canonical date-shaped fields for a recognized type (validated as
2773/// ISO-8601 dates, in addition to `created`/`updated`).
2774fn canonical_date_fields(type_: &str) -> &'static [&'static str] {
2775    match type_ {
2776        "email" => &["date"],
2777        "transcript" => &["recorded_at"],
2778        "pdf-source" => &["received_at"],
2779        "contact" => &["first_touch", "last_touch"],
2780        "expense" => &["date"],
2781        "meeting" => &["date"],
2782        "invoice" => &["date", "paid_at"],
2783        _ => &[],
2784    }
2785}
2786
2787/// The meeting dedup key: `date` is handled by the caller; this returns the
2788/// sorted attendee set joined into a stable string. Attendees are wiki-links
2789/// (block-sequence), extracted from the raw frontmatter text so the unquoted
2790/// form is handled. `None` if no attendees.
2791fn meeting_attendees_key(p: &Parsed) -> Option<String> {
2792    let mut set = BTreeSet::new();
2793    for link in frontmatter_links_for_key(&p.fm_yaml, "attendees", 2) {
2794        let norm = link.target.trim_end_matches(".md").to_lowercase();
2795        if !norm.is_empty() {
2796            set.insert(norm);
2797        }
2798    }
2799    if set.is_empty() {
2800        return None;
2801    }
2802    Some(set.into_iter().collect::<Vec<_>>().join(","))
2803}
2804
2805// ── Shape validators ─────────────────────────────────────────────────────────
2806
2807/// True if a string is RFC3339 / ISO-8601 with a time + zone (the
2808/// `created`/`updated` contract: `2026-05-27T08:00:00-07:00`).
2809fn is_iso8601(s: &str) -> bool {
2810    DateTime::parse_from_rfc3339(s.trim()).is_ok()
2811}
2812
2813/// True if a string is an ISO-8601 *date* (`2026-05-27`) or a full RFC3339
2814/// datetime. Type-specific date fields (`expense.date`, `contact.last_touch`)
2815/// accept the date-only form per the SPEC's worked example.
2816fn is_iso8601_date_or_datetime(s: &str) -> bool {
2817    let s = s.trim();
2818    if DateTime::parse_from_rfc3339(s).is_ok() {
2819        return true;
2820    }
2821    chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_ok()
2822}
2823
2824/// True for `<local>@<domain>` with a non-empty local part and a dotted domain.
2825fn is_email(s: &str) -> bool {
2826    let s = s.trim();
2827    let Some((local, domain)) = s.split_once('@') else {
2828        return false;
2829    };
2830    !local.is_empty()
2831        && domain.contains('.')
2832        && !domain.starts_with('.')
2833        && !domain.ends_with('.')
2834        && !domain.contains(' ')
2835        && !local.contains(' ')
2836}
2837
2838/// True for a currency amount: an optional symbol or 3-letter ISO code, then a
2839/// plain decimal number with optional thousands separators and ≤ 2 decimals.
2840///
2841/// The numeric part is validated by hand (not `f64::parse`) so the non-numeric
2842/// floats `f64` accepts — `inf`, `-inf`, `NaN`, and `1e3`-style exponents — are
2843/// rejected, and the ≤ 2-decimal rule is actually enforced.
2844fn is_currency(s: &str) -> bool {
2845    let mut t = s.trim();
2846    // Strip a leading currency symbol …
2847    for sym in ["$", "€", "£", "¥"] {
2848        if let Some(rest) = t.strip_prefix(sym) {
2849            t = rest.trim_start();
2850            break;
2851        }
2852    }
2853    // … or a leading 3-letter ISO-4217-ish code (`USD 100`, `EUR 9.50`). The
2854    // code must be exactly three ASCII letters and separated from the number by
2855    // whitespace, so a bare `USD` with no amount still fails.
2856    if let Some((head, rest)) = t.split_once(char::is_whitespace) {
2857        if head.len() == 3 && head.chars().all(|c| c.is_ascii_alphabetic()) {
2858            t = rest.trim_start();
2859        }
2860    }
2861
2862    let cleaned: String = t.chars().filter(|c| *c != ',').collect();
2863    is_plain_amount(cleaned.trim())
2864}
2865
2866/// True for a bare decimal amount: optional sign, ≥ 1 digit, an optional
2867/// fractional part of 1–2 digits. No exponents, no `inf`/`NaN`, no empty string.
2868fn is_plain_amount(s: &str) -> bool {
2869    let digits = s.strip_prefix(['+', '-']).unwrap_or(s);
2870    let (int_part, frac_part) = match digits.split_once('.') {
2871        Some((i, f)) => (i, Some(f)),
2872        None => (digits, None),
2873    };
2874    if int_part.is_empty() || !int_part.bytes().all(|b| b.is_ascii_digit()) {
2875        return false;
2876    }
2877    match frac_part {
2878        None => true,
2879        Some(f) => (1..=2).contains(&f.len()) && f.bytes().all(|b| b.is_ascii_digit()),
2880    }
2881}
2882
2883/// True for an http(s) URL.
2884fn is_url(s: &str) -> bool {
2885    let s = s.trim();
2886    (s.starts_with("http://") || s.starts_with("https://")) && s.len() > "https://".len()
2887}
2888
2889/// A short, deterministic suggestion for a `SCHEMA_SHAPE_MISMATCH`.
2890fn shape_suggestion(shape: Shape) -> String {
2891    match shape {
2892        Shape::String => "use a scalar string".into(),
2893        Shape::Int => "use an integer".into(),
2894        Shape::Bool => "use `true` or `false`".into(),
2895        Shape::Date => "use an ISO-8601 date, e.g. 2026-05-27".into(),
2896        Shape::Email => "use a `<local>@<domain>` address".into(),
2897        Shape::Currency => "use a numeric amount, e.g. 1234.56".into(),
2898        Shape::Url => "use an http(s) URL".into(),
2899    }
2900}
2901
2902/// Suggest a full-path rewrite for a short-form wiki-link. Without the layer we
2903/// can't know the folder, so the suggestion is generic but actionable.
2904fn short_form_suggestion(bare: &str) -> Option<String> {
2905    Some(format!(
2906        "use a full store-relative path, e.g. [[records/contacts/{}]]",
2907        slugish(bare)
2908    ))
2909}
2910
2911/// A filesystem-ish leaf for a plain string (lowercase, spaces → hyphens).
2912fn slugish(s: &str) -> String {
2913    s.trim()
2914        .to_lowercase()
2915        .chars()
2916        .map(|c| if c.is_whitespace() { '-' } else { c })
2917        .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '/' || *c == '_')
2918        .collect()
2919}
2920
2921/// Push a fully-formed [`Issue`].
2922#[allow(clippy::too_many_arguments)]
2923fn push(
2924    issues: &mut Vec<Issue>,
2925    severity: Severity,
2926    code: &'static str,
2927    file: &Path,
2928    line: Option<u32>,
2929    key: Option<String>,
2930    message: String,
2931    suggestion: Option<String>,
2932    related: Vec<PathBuf>,
2933) {
2934    issues.push(Issue {
2935        severity,
2936        code,
2937        file: file.to_path_buf(),
2938        line,
2939        key,
2940        message,
2941        suggestion,
2942        related,
2943    });
2944}
2945
2946/// 1-based line of a top-level frontmatter key inside the YAML block, offset to
2947/// the file (the YAML starts at file line 2). `None` if not found.
2948fn fm_key_line(fm_yaml: &str, key: &str) -> Option<u32> {
2949    for (i, line) in fm_yaml.lines().enumerate() {
2950        let trimmed = line.trim_start();
2951        // A top-level key line: `key:` with no leading list dash.
2952        if let Some(rest) = trimmed.strip_prefix(key) {
2953            if rest.starts_with(':') && line.starts_with(key) {
2954                // +2: file line 1 is the opening `---`, YAML line 0 → file line 2.
2955                return Some((i as u32) + 2);
2956            }
2957        }
2958    }
2959    None
2960}
2961
2962/// The line a *field-absence* issue (a required key that is missing entirely)
2963/// anchors to: the key's line when present, else line `1` — the frontmatter
2964/// block's opening `---`. A missing key has no line of its own; anchoring it to
2965/// the block top gives the agent (and the `EXPECTED` golden) a stable, non-null
2966/// line to point at instead of an unhelpful `null`.
2967fn fm_key_line_or_top(fm_yaml: &str, key: &str) -> Option<u32> {
2968    fm_key_line(fm_yaml, key).or(Some(1))
2969}
2970
2971/// A stable sort order for issues: by file, then line, then code. Keeps `--json`
2972/// output deterministic across runs.
2973fn issue_order(a: &Issue, b: &Issue) -> std::cmp::Ordering {
2974    a.file
2975        .cmp(&b.file)
2976        .then(a.line.cmp(&b.line))
2977        .then(a.code.cmp(b.code))
2978        .then(a.key.cmp(&b.key))
2979}
2980
2981// ═════════════════════════════════════════════════════════════════════════════
2982//  Tests
2983// ═════════════════════════════════════════════════════════════════════════════
2984
2985#[cfg(test)]
2986mod tests {
2987    use super::*;
2988    use crate::parser::Config;
2989    use std::fs;
2990    use tempfile::TempDir;
2991
2992    /// A test store builder over a real tempdir. Every helper writes real files
2993    /// so the assertions exercise real behavior, not mocks.
2994    struct Fixture {
2995        dir: TempDir,
2996        config: Config,
2997    }
2998
2999    impl Fixture {
3000        /// A fresh store with a **valid** `DB.md` (the identity contract:
3001        /// `type: db-md` + `scope` + `owner`) and the three layer dirs. A valid
3002        /// DB.md keeps `check_db_md` silent so a "clean store" fixture is truly
3003        /// clean; tests that want a broken DB.md write their own via `write`.
3004        fn new() -> Self {
3005            let dir = TempDir::new().unwrap();
3006            fs::write(
3007                dir.path().join("DB.md"),
3008                "---\ntype: db-md\nscope: company\nowner: Test\n---\n",
3009            )
3010            .unwrap();
3011            for layer in ["sources", "records", "wiki"] {
3012                fs::create_dir_all(dir.path().join(layer)).unwrap();
3013            }
3014            Fixture {
3015                dir,
3016                config: Config::default(),
3017            }
3018        }
3019
3020        /// A store with no `DB.md` marker.
3021        fn bare() -> Self {
3022            let dir = TempDir::new().unwrap();
3023            Fixture {
3024                dir,
3025                config: Config::default(),
3026            }
3027        }
3028
3029        /// Write a file at a store-relative path, creating parent dirs.
3030        fn write(&self, rel: &str, contents: &str) {
3031            let abs = self.dir.path().join(rel);
3032            fs::create_dir_all(abs.parent().unwrap()).unwrap();
3033            fs::write(abs, contents).unwrap();
3034        }
3035
3036        fn store(&self) -> Store {
3037            Store {
3038                root: self.dir.path().to_path_buf(),
3039                config: self.config.clone(),
3040            }
3041        }
3042
3043        fn store_all(&self) -> Vec<Issue> {
3044            validate_all(&self.store()).unwrap()
3045        }
3046
3047        /// Write the canonical `index.md` + `index.jsonl` at every level via the
3048        /// real builder ([`crate::index::Index::rebuild_all`]) — the same
3049        /// projection a `dbmd index rebuild` produces. Use this (rather than a
3050        /// hand-typed sidecar line) whenever a test asserts a *clean* store, so
3051        /// the sidecar carries the COMPLETE per-field projection and the fixture
3052        /// can't silently drift from what the index writer emits.
3053        fn rebuild_indexes(&self) {
3054            crate::index::Index::rebuild_all(&self.store()).unwrap();
3055        }
3056    }
3057
3058    /// True if any issue has this code.
3059    fn has(issues: &[Issue], code: &str) -> bool {
3060        issues.iter().any(|i| i.code == code)
3061    }
3062
3063    /// Count issues with a code.
3064    fn count(issues: &[Issue], code: &str) -> usize {
3065        issues.iter().filter(|i| i.code == code).count()
3066    }
3067
3068    /// The first issue with a code, or panic.
3069    fn find<'a>(issues: &'a [Issue], code: &str) -> &'a Issue {
3070        issues
3071            .iter()
3072            .find(|i| i.code == code)
3073            .unwrap_or_else(|| panic!("expected an issue with code {code}; got {issues:#?}"))
3074    }
3075
3076    /// A minimal valid `contact` body for reuse.
3077    fn valid_contact(summary: &str) -> String {
3078        format!(
3079            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{summary}\"\nname: A\n---\n\n# A\n"
3080        )
3081    }
3082
3083    // ── store marker ──────────────────────────────────────────────────────────
3084
3085    #[test]
3086    fn not_a_store_when_db_md_absent() {
3087        let fx = Fixture::bare();
3088        let issues = fx.store_all();
3089        assert_eq!(issues.len(), 1, "only NOT_A_STORE expected: {issues:#?}");
3090        assert_eq!(issues[0].code, codes::NOT_A_STORE);
3091        assert!(issues[0].is_error());
3092    }
3093
3094    #[test]
3095    fn working_set_also_reports_not_a_store() {
3096        let fx = Fixture::bare();
3097        let issues = validate_working_set(&fx.store(), None).unwrap();
3098        assert!(has(&issues, codes::NOT_A_STORE));
3099    }
3100
3101    #[test]
3102    fn clean_store_has_no_issues() {
3103        let fx = Fixture::new();
3104        fx.write("records/contacts/a.md", &valid_contact("A contact"));
3105        // Build the canonical indexes (complete per-field jsonl included) the
3106        // same way `dbmd index rebuild` does, so a freshly-rebuilt store is
3107        // proven clean across every projected field, not just summary/type.
3108        fx.rebuild_indexes();
3109        let issues = fx.store_all();
3110        assert!(
3111            issues.is_empty(),
3112            "expected a clean store, got: {issues:#?}"
3113        );
3114    }
3115
3116    // ── DB.md structure ───────────────────────────────────────────────────────
3117
3118    /// The `Fixture::new` DB.md is valid → no `DB_MD_*` issue. This pins the
3119    /// "valid identity file is silent" half (a bug that flagged a valid DB.md
3120    /// would fail here).
3121    #[test]
3122    fn valid_db_md_emits_no_structure_issue() {
3123        let fx = Fixture::new();
3124        let issues = fx.store_all();
3125        assert!(
3126            !has(&issues, codes::DB_MD_BAD_TYPE)
3127                && !has(&issues, codes::DB_MD_MISSING_FIELD)
3128                && !has(&issues, codes::DB_MD_UNKNOWN_SECTION),
3129            "a valid DB.md (type: db-md + scope + owner, recognized sections) is silent: {issues:#?}"
3130        );
3131    }
3132
3133    /// A DB.md whose `type:` isn't `db-md` → `DB_MD_BAD_TYPE`, keyed on `type`,
3134    /// anchored to the `type:` line (file line 2). Failing to read the type, or
3135    /// accepting a non-`db-md` type, breaks this.
3136    #[test]
3137    fn db_md_wrong_type_is_error() {
3138        let fx = Fixture::new();
3139        fx.write("DB.md", "---\ntype: notes\nscope: company\nowner: T\n---\n");
3140        let issues = fx.store_all();
3141        let i = find(&issues, codes::DB_MD_BAD_TYPE);
3142        assert!(i.is_error());
3143        assert_eq!(i.file, PathBuf::from("DB.md"));
3144        assert_eq!(i.key.as_deref(), Some("type"));
3145        assert_eq!(i.line, Some(2), "anchors to the `type:` line");
3146    }
3147
3148    /// A DB.md missing `scope` and `owner` → one `DB_MD_MISSING_FIELD` per
3149    /// absent field, each keyed on its field name, anchored to the block top.
3150    #[test]
3151    fn db_md_missing_scope_and_owner_each_report() {
3152        let fx = Fixture::new();
3153        fx.write("DB.md", "---\ntype: db-md\n---\n");
3154        let issues = fx.store_all();
3155        assert_eq!(
3156            count(&issues, codes::DB_MD_MISSING_FIELD),
3157            2,
3158            "both scope and owner absent → two issues: {issues:#?}"
3159        );
3160        let keys: BTreeSet<Option<String>> = issues
3161            .iter()
3162            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
3163            .map(|i| i.key.clone())
3164            .collect();
3165        assert_eq!(
3166            keys,
3167            BTreeSet::from([Some("scope".to_string()), Some("owner".to_string())]),
3168            "one issue keyed on each missing field"
3169        );
3170        for i in issues
3171            .iter()
3172            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
3173        {
3174            assert!(i.is_error());
3175            assert_eq!(i.line, Some(1), "absent field anchors to the block top");
3176        }
3177    }
3178
3179    /// A present-but-blank required field is still missing (`DB_MD_MISSING_FIELD`),
3180    /// anchored to its own line — guarding against an "is the key textually
3181    /// present?" shortcut that would miss `owner:` with an empty value.
3182    #[test]
3183    fn db_md_blank_required_field_is_missing() {
3184        let fx = Fixture::new();
3185        fx.write(
3186            "DB.md",
3187            "---\ntype: db-md\nscope: company\nowner: \"\"\n---\n",
3188        );
3189        let issues = fx.store_all();
3190        let i = find(&issues, codes::DB_MD_MISSING_FIELD);
3191        assert_eq!(i.key.as_deref(), Some("owner"));
3192        assert_eq!(
3193            i.line,
3194            Some(4),
3195            "a present-but-empty field anchors to its line"
3196        );
3197        assert!(
3198            count(&issues, codes::DB_MD_MISSING_FIELD) == 1,
3199            "scope is present and non-empty → only owner reported"
3200        );
3201    }
3202
3203    /// An unrecognized `##` section → `DB_MD_UNKNOWN_SECTION` (warning), anchored
3204    /// to the heading's file line; the three recognized sections stay silent.
3205    #[test]
3206    fn db_md_unknown_section_is_warning() {
3207        let fx = Fixture::new();
3208        fx.write(
3209            "DB.md",
3210            // line 1 `---`, 2 type, 3 scope, 4 owner, 5 `---`, 6 blank,
3211            // 7 `## Agent instructions`, 8 blank, 9 prose, 10 blank,
3212            // 11 `## Glossary`.
3213            "---\ntype: db-md\nscope: company\nowner: T\n---\n\n## Agent instructions\n\nbe good\n\n## Glossary\n\nterms\n",
3214        );
3215        let issues = fx.store_all();
3216        let i = find(&issues, codes::DB_MD_UNKNOWN_SECTION);
3217        assert!(!i.is_error(), "unknown section is a warning, not an error");
3218        assert_eq!(i.severity, Severity::Warning);
3219        assert_eq!(
3220            i.line,
3221            Some(11),
3222            "anchors to the `## Glossary` heading line"
3223        );
3224        assert!(
3225            i.message.contains("Glossary"),
3226            "the message names the offending section: {}",
3227            i.message
3228        );
3229        // The recognized `## Agent instructions` section did NOT fire.
3230        assert_eq!(
3231            count(&issues, codes::DB_MD_UNKNOWN_SECTION),
3232            1,
3233            "only the unrecognized section is flagged: {issues:#?}"
3234        );
3235    }
3236
3237    /// A DB.md with no frontmatter at all → `DB_MD_BAD_TYPE` plus both
3238    /// `DB_MD_MISSING_FIELD`s (no provable type, no provable fields).
3239    #[test]
3240    fn db_md_no_frontmatter_reports_type_and_both_fields() {
3241        let fx = Fixture::new();
3242        fx.write("DB.md", "# just a heading, no frontmatter\n");
3243        let issues = fx.store_all();
3244        assert!(has(&issues, codes::DB_MD_BAD_TYPE));
3245        assert_eq!(count(&issues, codes::DB_MD_MISSING_FIELD), 2);
3246    }
3247
3248    // ── layer-appropriate type ──────────────────────────────────────────────────
3249
3250    /// A `contact` (records-layer type) under `sources/` → `LAYER_TYPE_MISMATCH`
3251    /// (warning), keyed on `type`. The check must compare the type's canonical
3252    /// layer against the file's actual layer.
3253    #[test]
3254    fn contact_under_sources_is_layer_mismatch() {
3255        let fx = Fixture::new();
3256        fx.write(
3257            "sources/misc/c.md",
3258            &valid_contact("a contact in the wrong layer"),
3259        );
3260        let issues = fx.store_all();
3261        let i = find(&issues, codes::LAYER_TYPE_MISMATCH);
3262        assert!(!i.is_error(), "layer mismatch is a warning, not an error");
3263        assert_eq!(i.severity, Severity::Warning);
3264        assert_eq!(i.file, PathBuf::from("sources/misc/c.md"));
3265        assert_eq!(i.key.as_deref(), Some("type"));
3266        assert!(
3267            i.message.contains("records") && i.message.contains("sources"),
3268            "message names both the expected and actual layer: {}",
3269            i.message
3270        );
3271    }
3272
3273    /// An `email` (sources-layer type) under `wiki/` → `LAYER_TYPE_MISMATCH`.
3274    #[test]
3275    fn email_under_wiki_is_layer_mismatch() {
3276        let fx = Fixture::new();
3277        fx.write(
3278            "wiki/notes/e.md",
3279            "---\ntype: email\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: misfiled email\n---\n\n# E\n",
3280        );
3281        let issues = fx.store_all();
3282        let i = find(&issues, codes::LAYER_TYPE_MISMATCH);
3283        assert_eq!(i.file, PathBuf::from("wiki/notes/e.md"));
3284    }
3285
3286    /// A `contact` under `records/` (its canonical layer) → NO layer issue.
3287    /// Pins the no-false-positive half: a correctly-placed recognized type is
3288    /// silent, so a bug that flagged every typed file would fail here.
3289    #[test]
3290    fn contact_under_records_is_not_flagged() {
3291        let fx = Fixture::new();
3292        fx.write("records/contacts/a.md", &valid_contact("correctly placed"));
3293        let issues = fx.store_all();
3294        assert!(
3295            !has(&issues, codes::LAYER_TYPE_MISMATCH),
3296            "a contact under records/ is correctly placed: {issues:#?}"
3297        );
3298    }
3299
3300    /// A CUSTOM (unrecognized) type carries no layer expectation → never flagged,
3301    /// in any layer. Guards against treating "no canonical layer" as a mismatch.
3302    #[test]
3303    fn custom_type_has_no_layer_expectation() {
3304        let fx = Fixture::new();
3305        fx.write(
3306            "wiki/notes/p.md",
3307            "---\ntype: proposal\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a custom-typed note\n---\n\n# P\n",
3308        );
3309        let issues = fx.store_all();
3310        assert!(
3311            !has(&issues, codes::LAYER_TYPE_MISMATCH),
3312            "a custom type is ambient context with no layer rule: {issues:#?}"
3313        );
3314    }
3315
3316    /// `wiki-page` is the wiki-layer type → silent under `wiki/`, flagged under
3317    /// `records/`. Covers the third layer of the mapping in both directions.
3318    #[test]
3319    fn wiki_page_layer_rule_both_directions() {
3320        let fx = Fixture::new();
3321        fx.write(
3322            "wiki/topics/ok.md",
3323            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: properly placed synthesis\n---\n\n# OK\n",
3324        );
3325        fx.write(
3326            "records/topics/bad.md",
3327            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: synthesis misfiled into records\n---\n\n# BAD\n",
3328        );
3329        let issues = fx.store_all();
3330        let hits: Vec<&Issue> = issues
3331            .iter()
3332            .filter(|i| i.code == codes::LAYER_TYPE_MISMATCH)
3333            .collect();
3334        assert_eq!(hits.len(), 1, "only the misplaced one fires: {hits:#?}");
3335        assert_eq!(hits[0].file, PathBuf::from("records/topics/bad.md"));
3336    }
3337
3338    /// The layer check is a per-file check, so it must also fire in the
3339    /// O(changed) working-set scope (not only `--all`) — for a file the log
3340    /// names as changed. A bug that placed it solely in the sweep would fail
3341    /// here. (The working set is log-driven, so the file must have a log entry.)
3342    #[test]
3343    fn layer_mismatch_fires_in_working_set_scope() {
3344        let fx = Fixture::new();
3345        fx.write(
3346            "sources/misc/c.md",
3347            &valid_contact("wrong layer, working set"),
3348        );
3349        fx.write(
3350            "log.md",
3351            "---\ntype: log\n---\n\n## [2026-05-22 10:00] create | sources/misc/c\nadded\n",
3352        );
3353        let issues = validate_working_set(&fx.store(), None).unwrap();
3354        assert!(
3355            has(&issues, codes::LAYER_TYPE_MISMATCH),
3356            "the per-file layer check runs in the working-set scope too: {issues:#?}"
3357        );
3358    }
3359
3360    // ── frontmatter ─────────────────────────────────────────────────────────
3361
3362    #[test]
3363    fn missing_type_is_error() {
3364        let fx = Fixture::new();
3365        fx.write(
3366            "records/contacts/a.md",
3367            "---\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\n# A\n",
3368        );
3369        let issues = fx.store_all();
3370        assert!(has(&issues, codes::FM_MISSING_TYPE));
3371        assert!(find(&issues, codes::FM_MISSING_TYPE).is_error());
3372    }
3373
3374    #[test]
3375    fn content_file_with_no_frontmatter_block_reports_type_and_summary() {
3376        let fx = Fixture::new();
3377        fx.write(
3378            "wiki/people/a.md",
3379            "# Just a heading\n\nNo frontmatter here.\n",
3380        );
3381        let issues = fx.store_all();
3382        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
3383        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
3384    }
3385
3386    #[test]
3387    fn content_file_with_empty_frontmatter_reports_type_and_summary() {
3388        let fx = Fixture::new();
3389        fx.write("wiki/people/a.md", "---\n---\n\nbody\n");
3390        let issues = fx.store_all();
3391        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
3392        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
3393    }
3394
3395    #[test]
3396    fn malformed_yaml_is_error_and_suppresses_field_checks() {
3397        let fx = Fixture::new();
3398        // A tab inside a mapping value is invalid YAML.
3399        fx.write(
3400            "records/contacts/a.md",
3401            "---\ntype: contact\n  bad: : : :\n: : nope\n---\n\nbody\n",
3402        );
3403        let issues = fx.store_all();
3404        assert!(has(&issues, codes::FM_MALFORMED_YAML));
3405        // When YAML doesn't parse we don't *also* claim the summary is missing;
3406        // the agent fixes the YAML first.
3407        assert!(
3408            !has(&issues, codes::SUMMARY_MISSING),
3409            "malformed YAML should suppress SUMMARY_MISSING: {issues:#?}"
3410        );
3411    }
3412
3413    #[test]
3414    fn bad_created_timestamp_is_error() {
3415        let fx = Fixture::new();
3416        fx.write(
3417            "records/contacts/a.md",
3418            "---\ntype: contact\ncreated: not-a-date\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
3419        );
3420        let issues = fx.store_all();
3421        let issue = find(&issues, codes::FM_BAD_TIMESTAMP);
3422        assert_eq!(issue.key.as_deref(), Some("created"));
3423        assert!(issue.is_error());
3424    }
3425
3426    #[test]
3427    fn date_only_created_is_rejected_but_type_date_field_accepted() {
3428        let fx = Fixture::new();
3429        // `created` must be a full RFC3339 datetime → a date-only value is bad.
3430        // `last_touch` is a type-specific date field → date-only is fine.
3431        fx.write(
3432            "records/contacts/a.md",
3433            "---\ntype: contact\ncreated: 2026-05-22\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\nlast_touch: 2026-05-22\n---\n\n# A\n",
3434        );
3435        let issues = fx.store_all();
3436        let created_issues: Vec<_> = issues
3437            .iter()
3438            .filter(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created"))
3439            .collect();
3440        assert_eq!(
3441            created_issues.len(),
3442            1,
3443            "date-only `created` must fail: {issues:#?}"
3444        );
3445        assert!(
3446            !issues.iter().any(
3447                |i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("last_touch")
3448            ),
3449            "date-only `last_touch` is valid: {issues:#?}"
3450        );
3451    }
3452
3453    // ── summary ─────────────────────────────────────────────────────────────
3454
3455    #[test]
3456    fn summary_missing_empty_multiline_toolong() {
3457        let fx = Fixture::new();
3458        fx.write(
3459            "wiki/people/missing.md",
3460            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\n---\n\nbody\n",
3461        );
3462        fx.write(
3463            "wiki/people/empty.md",
3464            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"   \"\n---\n\nbody\n",
3465        );
3466        let long = "x".repeat(201);
3467        fx.write(
3468            "wiki/people/long.md",
3469            &format!("---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{long}\"\n---\n\nbody\n"),
3470        );
3471        let issues = fx.store_all();
3472        assert!(has(&issues, codes::SUMMARY_MISSING));
3473        assert_eq!(
3474            find(&issues, codes::SUMMARY_MISSING).file,
3475            PathBuf::from("wiki/people/missing.md")
3476        );
3477        assert!(has(&issues, codes::SUMMARY_EMPTY));
3478        assert!(has(&issues, codes::SUMMARY_TOO_LONG));
3479        assert_eq!(
3480            find(&issues, codes::SUMMARY_TOO_LONG).severity,
3481            Severity::Warning
3482        );
3483    }
3484
3485    #[test]
3486    fn summary_multiline_via_yaml_block_scalar() {
3487        let fx = Fixture::new();
3488        // A literal block scalar produces a value with a newline.
3489        fx.write(
3490            "wiki/people/a.md",
3491            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: |\n  line one\n  line two\n---\n\nbody\n",
3492        );
3493        let issues = fx.store_all();
3494        assert!(has(&issues, codes::SUMMARY_MULTILINE), "{issues:#?}");
3495    }
3496
3497    #[test]
3498    fn summary_exactly_200_chars_is_ok() {
3499        let fx = Fixture::new();
3500        let s = "y".repeat(200);
3501        fx.write(
3502            "wiki/people/a.md",
3503            &format!("---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{s}\"\n---\n\nbody\n"),
3504        );
3505        let issues = fx.store_all();
3506        assert!(
3507            !has(&issues, codes::SUMMARY_TOO_LONG),
3508            "200 is the bound, inclusive: {issues:#?}"
3509        );
3510    }
3511
3512    #[test]
3513    fn meta_files_need_no_summary() {
3514        let fx = Fixture::new();
3515        // The root/layer/type indexes + log carry no summary and must not be
3516        // flagged. (A lone DB.md store with one contact and full indexes.)
3517        fx.write("records/contacts/a.md", &valid_contact("A contact"));
3518        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n# I\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
3519        fx.write(
3520            "records/index.md",
3521            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
3522        );
3523        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — A contact\n");
3524        fx.write(
3525            "records/contacts/index.jsonl",
3526            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"A contact\"}\n",
3527        );
3528        fx.write("log.md", "---\ntype: log\n---\n\n# Log\n");
3529        let issues = fx.store_all();
3530        assert!(!has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
3531    }
3532
3533    // ── tags ────────────────────────────────────────────────────────────────
3534
3535    #[test]
3536    fn nested_tags_warns_flat_tags_ok() {
3537        let fx = Fixture::new();
3538        fx.write(
3539            "records/contacts/nested.md",
3540            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags:\n  - good\n  - [nested, list]\n---\n\n# A\n",
3541        );
3542        fx.write(
3543            "records/contacts/flat.md",
3544            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags: [customer, vip]\n---\n\n# A\n",
3545        );
3546        let issues = fx.store_all();
3547        let tag_issues: Vec<_> = issues
3548            .iter()
3549            .filter(|i| i.code == codes::TAGS_MALFORMED)
3550            .collect();
3551        assert_eq!(
3552            tag_issues.len(),
3553            1,
3554            "only the nested-tags file should warn: {issues:#?}"
3555        );
3556        assert_eq!(
3557            tag_issues[0].file,
3558            PathBuf::from("records/contacts/nested.md")
3559        );
3560        assert_eq!(tag_issues[0].severity, Severity::Warning);
3561    }
3562
3563    // ── wiki-links ────────────────────────────────────────────────────────────
3564
3565    #[test]
3566    fn short_form_wiki_link_is_error() {
3567        let fx = Fixture::new();
3568        let mut body = valid_contact("links to a short form");
3569        body.push_str("\nSee [[sarah-chen]] for details.\n");
3570        fx.write("wiki/people/a.md", &body);
3571        let issues = fx.store_all();
3572        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
3573        assert!(issue.is_error());
3574        assert!(issue.message.contains("sarah-chen"));
3575        // A short-form link must NOT also be reported broken — fix the form first.
3576        assert!(
3577            !issues
3578                .iter()
3579                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.message.contains("sarah-chen")),
3580            "short-form should suppress broken: {issues:#?}"
3581        );
3582    }
3583
3584    #[test]
3585    fn broken_full_path_wiki_link_is_error() {
3586        let fx = Fixture::new();
3587        let mut body = valid_contact("links to a missing file");
3588        body.push_str("\nSee [[records/contacts/ghost]].\n");
3589        fx.write("wiki/people/a.md", &body);
3590        let issues = fx.store_all();
3591        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
3592        assert!(issue.is_error());
3593        assert!(issue.message.contains("records/contacts/ghost"));
3594    }
3595
3596    #[test]
3597    fn valid_full_path_wiki_link_passes() {
3598        let fx = Fixture::new();
3599        fx.write("records/contacts/target.md", &valid_contact("target"));
3600        let mut body = valid_contact("links to target");
3601        body.push_str("\nSee [[records/contacts/target]].\n");
3602        fx.write("wiki/people/a.md", &body);
3603        let issues = fx.store_all();
3604        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
3605        assert!(!has(&issues, codes::WIKI_LINK_SHORT_FORM), "{issues:#?}");
3606    }
3607
3608    #[test]
3609    fn md_extension_wiki_link_warns_and_resolves() {
3610        let fx = Fixture::new();
3611        fx.write("records/contacts/target.md", &valid_contact("target"));
3612        let mut body = valid_contact("links with extension");
3613        body.push_str("\nSee [[records/contacts/target.md]].\n");
3614        fx.write("wiki/people/a.md", &body);
3615        let issues = fx.store_all();
3616        let issue = find(&issues, codes::WIKI_LINK_HAS_EXTENSION);
3617        assert_eq!(issue.severity, Severity::Warning);
3618        assert_eq!(
3619            issue.suggestion.as_deref(),
3620            Some("drop the extension: [[records/contacts/target]]")
3621        );
3622        // The target exists once `.md` is stripped → not broken.
3623        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
3624    }
3625
3626    #[test]
3627    fn wiki_links_in_code_fences_are_ignored() {
3628        let fx = Fixture::new();
3629        let mut body = valid_contact("has a fenced example");
3630        body.push_str("\n```\n[[sarah-chen]]\n```\n");
3631        fx.write("wiki/people/a.md", &body);
3632        let issues = fx.store_all();
3633        assert!(
3634            !has(&issues, codes::WIKI_LINK_SHORT_FORM),
3635            "fenced wiki-links must be ignored: {issues:#?}"
3636        );
3637    }
3638
3639    #[test]
3640    fn flow_form_link_list_in_frontmatter_is_error() {
3641        let fx = Fixture::new();
3642        fx.write(
3643            "records/meetings/m.md",
3644            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees: [[[records/contacts/a]], [[records/contacts/b]]]\n---\n\n# M\n",
3645        );
3646        let issues = fx.store_all();
3647        let issue = find(&issues, codes::WIKI_LINK_FLOW_FORM_LIST);
3648        assert!(issue.is_error());
3649        assert_eq!(issue.key.as_deref(), Some("attendees"));
3650    }
3651
3652    #[test]
3653    fn block_form_link_list_in_frontmatter_is_not_flow_form() {
3654        let fx = Fixture::new();
3655        fx.write("records/contacts/a.md", &valid_contact("a"));
3656        fx.write("records/contacts/b.md", &valid_contact("b"));
3657        fx.write(
3658            "records/meetings/m.md",
3659            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees:\n  - [[records/contacts/a]]\n  - [[records/contacts/b]]\n---\n\n# M\n",
3660        );
3661        let issues = fx.store_all();
3662        assert!(
3663            !has(&issues, codes::WIKI_LINK_FLOW_FORM_LIST),
3664            "{issues:#?}"
3665        );
3666        // Block-form link targets are still integrity-checked (both exist here).
3667        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
3668    }
3669
3670    #[test]
3671    fn frontmatter_short_form_link_field_is_error() {
3672        let fx = Fixture::new();
3673        // `related` is a *custom* (non-schema) wiki-link field, so it goes
3674        // through the generic doctrine path → a short form is WIKI_LINK_SHORT_FORM.
3675        fx.write(
3676            "wiki/people/a.md",
3677            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: \"[[sarah-chen]]\"\n---\n\n# A\n",
3678        );
3679        let issues = fx.store_all();
3680        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
3681        assert!(issue.is_error());
3682        assert_eq!(issue.key.as_deref(), Some("related"));
3683    }
3684
3685    #[test]
3686    fn unquoted_frontmatter_link_is_recognized() {
3687        // An UNQUOTED `[[...]]` parses in YAML as a nested sequence, not a
3688        // string. The validator must still see it as a wiki-link (text-based
3689        // extraction). A short-form custom field must report SHORT_FORM, and a
3690        // full-path one with a missing target must report BROKEN.
3691        let fx = Fixture::new();
3692        fx.write(
3693            "wiki/people/short.md",
3694            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[sarah-chen]]\n---\n\n# A\n",
3695        );
3696        fx.write(
3697            "wiki/people/broken.md",
3698            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[records/contacts/ghost]]\n---\n\n# A\n",
3699        );
3700        let issues = fx.store_all();
3701        assert!(
3702            issues.iter().any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
3703                && i.file == *"wiki/people/short.md"
3704                && i.key.as_deref() == Some("related")),
3705            "unquoted short-form frontmatter link must be caught: {issues:#?}"
3706        );
3707        assert!(
3708            issues
3709                .iter()
3710                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.file == *"wiki/people/broken.md"),
3711            "unquoted full-path frontmatter link to a missing file must be caught: {issues:#?}"
3712        );
3713    }
3714
3715    #[test]
3716    fn short_form_canonical_link_field_is_prefix_mismatch() {
3717        // A short-form value in a *canonical* link field (`contact.company`) is
3718        // a SCHEMA_LINK_PREFIX_MISMATCH (the target isn't under the prefix), not
3719        // a bare SHORT_FORM — the schema path owns that field's vocabulary.
3720        let fx = Fixture::new();
3721        fx.write(
3722            "records/contacts/a.md",
3723            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[northstar]]\"\n---\n\n# A\n",
3724        );
3725        let issues = fx.store_all();
3726        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
3727        assert_eq!(issue.key.as_deref(), Some("company"));
3728        // The same link must NOT also be double-reported via the generic path.
3729        assert!(
3730            !issues
3731                .iter()
3732                .any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
3733                    && i.key.as_deref() == Some("company")),
3734            "schema link fields are checked once, by the schema path: {issues:#?}"
3735        );
3736    }
3737
3738    // ── schema: implicit canonical link fields ───────────────────────────────
3739
3740    #[test]
3741    fn contact_company_plain_string_is_link_prefix_mismatch() {
3742        let fx = Fixture::new();
3743        fx.write(
3744            "records/contacts/a.md",
3745            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
3746        );
3747        let issues = fx.store_all();
3748        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
3749        assert!(issue.is_error());
3750        assert_eq!(issue.key.as_deref(), Some("company"));
3751        let sugg = issue.suggestion.as_deref().unwrap();
3752        assert!(
3753            sugg.contains("records/companies/"),
3754            "suggestion should name the prefix: {sugg}"
3755        );
3756    }
3757
3758    #[test]
3759    fn contact_company_wrong_prefix_is_link_prefix_mismatch() {
3760        let fx = Fixture::new();
3761        // Points under records/people/ but the canonical prefix is companies/.
3762        fx.write(
3763            "records/people/acme.md",
3764            &valid_contact("acme as a person? wrong"),
3765        );
3766        fx.write(
3767            "records/contacts/a.md",
3768            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"[[records/people/acme]]\"\n---\n\n# Sarah\n",
3769        );
3770        let issues = fx.store_all();
3771        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
3772        assert_eq!(issue.key.as_deref(), Some("company"));
3773    }
3774
3775    #[test]
3776    fn contact_company_correct_link_passes_schema() {
3777        let fx = Fixture::new();
3778        fx.write(
3779            "records/companies/acme.md",
3780            "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a company\nname: Acme\n---\n\n# Acme\n",
3781        );
3782        fx.write(
3783            "records/contacts/a.md",
3784            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"[[records/companies/acme]]\"\n---\n\n# Sarah\n",
3785        );
3786        let issues = fx.store_all();
3787        assert!(
3788            !has(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH),
3789            "{issues:#?}"
3790        );
3791    }
3792
3793    // ── schema: explicit DB.md schema (required / shape / enum) ───────────────
3794
3795    #[test]
3796    fn explicit_schema_required_shape_enum() {
3797        let fx = {
3798            let mut fx = Fixture::new();
3799            // contact schema: name required, email required+email shape,
3800            // status enum: active|inactive
3801            let schema = Schema {
3802                fields: vec![
3803                    FieldSpec {
3804                        name: "name".into(),
3805                        required: true,
3806                        ..Default::default()
3807                    },
3808                    FieldSpec {
3809                        name: "email".into(),
3810                        required: true,
3811                        shape: Some(Shape::Email),
3812                        ..Default::default()
3813                    },
3814                    FieldSpec {
3815                        name: "status".into(),
3816                        enum_values: Some(vec!["active".into(), "inactive".into()]),
3817                        ..Default::default()
3818                    },
3819                ],
3820            };
3821            fx.config.schemas.insert("contact".into(), schema);
3822            fx
3823        };
3824        fx.write(
3825            "records/contacts/a.md",
3826            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nemail: not-an-email\nstatus: archived\n---\n\n# A\n",
3827        );
3828        let issues = fx.store_all();
3829        // name absent → MISSING_REQUIRED
3830        assert!(
3831            issues
3832                .iter()
3833                .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
3834                    && i.key.as_deref() == Some("name")),
3835            "{issues:#?}"
3836        );
3837        // email malformed → SHAPE_MISMATCH
3838        assert!(
3839            issues.iter().any(
3840                |i| i.code == codes::SCHEMA_SHAPE_MISMATCH && i.key.as_deref() == Some("email")
3841            ),
3842            "{issues:#?}"
3843        );
3844        // status archived not in enum → ENUM_VIOLATION
3845        assert!(
3846            issues
3847                .iter()
3848                .any(|i| i.code == codes::SCHEMA_ENUM_VIOLATION
3849                    && i.key.as_deref() == Some("status")),
3850            "{issues:#?}"
3851        );
3852    }
3853
3854    #[test]
3855    fn explicit_schema_overrides_implicit_canonical() {
3856        // An explicit `contact` schema with NO company link field means a plain
3857        // `company` string is fine (the implicit canonical link is overridden).
3858        let mut fx = Fixture::new();
3859        fx.config.schemas.insert(
3860            "contact".into(),
3861            Schema {
3862                fields: vec![FieldSpec {
3863                    name: "name".into(),
3864                    required: true,
3865                    ..Default::default()
3866                }],
3867            },
3868        );
3869        fx.write(
3870            "records/contacts/a.md",
3871            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
3872        );
3873        let issues = fx.store_all();
3874        assert!(
3875            !has(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH),
3876            "explicit schema with no company link should override the implicit canonical one: {issues:#?}"
3877        );
3878    }
3879
3880    #[test]
3881    fn schema_shape_int_and_url_and_currency() {
3882        let mut fx = Fixture::new();
3883        fx.config.schemas.insert(
3884            "widget".into(),
3885            Schema {
3886                fields: vec![
3887                    FieldSpec {
3888                        name: "qty".into(),
3889                        shape: Some(Shape::Int),
3890                        ..Default::default()
3891                    },
3892                    FieldSpec {
3893                        name: "site".into(),
3894                        shape: Some(Shape::Url),
3895                        ..Default::default()
3896                    },
3897                    FieldSpec {
3898                        name: "price".into(),
3899                        shape: Some(Shape::Currency),
3900                        ..Default::default()
3901                    },
3902                ],
3903            },
3904        );
3905        // `USD 100` is the corpus-realistic shape (an `expense.currency`-style
3906        // ISO code + amount). It must pass — it used to spuriously fail.
3907        fx.write(
3908            "records/widgets/ok.md",
3909            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nqty: 5\nsite: https://example.com\nprice: \"USD 1,234.50\"\n---\n\n# ok\n",
3910        );
3911        // `free` is non-numeric; `inf`/`NaN`/3-decimal used to slip through
3912        // because the old impl leaned on `f64::parse`. `price: inf` here guards
3913        // the under-rejection half of the finding.
3914        fx.write(
3915            "records/widgets/bad.md",
3916            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nqty: five\nsite: ftp://nope\nprice: inf\n---\n\n# bad\n",
3917        );
3918        let issues = fx.store_all();
3919        let bad_shape: Vec<_> = issues
3920            .iter()
3921            .filter(|i| {
3922                i.code == codes::SCHEMA_SHAPE_MISMATCH && i.file == *"records/widgets/bad.md"
3923            })
3924            .map(|i| i.key.clone().unwrap_or_default())
3925            .collect();
3926        assert!(bad_shape.contains(&"qty".to_string()), "{issues:#?}");
3927        assert!(bad_shape.contains(&"site".to_string()), "{issues:#?}");
3928        assert!(
3929            bad_shape.contains(&"price".to_string()),
3930            "inf must be rejected as currency: {issues:#?}"
3931        );
3932        assert!(
3933            !issues
3934                .iter()
3935                .any(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH
3936                    && i.file == *"records/widgets/ok.md"),
3937            "valid shapes (incl. `USD 1,234.50`) must not fire: {issues:#?}"
3938        );
3939    }
3940
3941    #[test]
3942    fn is_currency_accepts_codes_and_rejects_non_numeric() {
3943        // Symbols and 3-letter ISO codes both strip; plain numbers pass.
3944        for ok in [
3945            "100",
3946            "1234.56",
3947            "$1,234.50",
3948            "USD 100", // the finding's headline probe — used to be false
3949            "usd 100", // case-insensitive code
3950            "EUR 9.50",
3951            "£12",
3952            "¥1000",
3953            "-5.00", // signed amounts are real (refunds)
3954            "+5",
3955            "1,000,000",
3956        ] {
3957            assert!(is_currency(ok), "expected currency: {ok:?}");
3958        }
3959        // Non-numeric floats `f64::parse` would accept, and the > 2-decimal /
3960        // bare-code / exponent cases the docstring forbids.
3961        for bad in [
3962            "inf", "-inf", "infinity", "NaN", "nan",    // f64 accepts these; we must not
3963            "12.999", // 3 decimals
3964            "1.2345", // 4 decimals
3965            "USD",    // bare code, no amount
3966            "$",      // bare symbol
3967            "free", "", " ", "1e3",      // exponent form
3968            "1.",       // trailing dot, no fractional digits
3969            ".5",       // leading dot, no integer digits
3970            "1 000",    // space as separator is not a thousands separator
3971            "USDD 100", // 4-letter "code" must not strip
3972        ] {
3973            assert!(!is_currency(bad), "expected NOT currency: {bad:?}");
3974        }
3975    }
3976
3977    // ── policies ───────────────────────────────────────────────────────────
3978
3979    #[test]
3980    fn ignored_type_present_is_info() {
3981        let mut fx = Fixture::new();
3982        fx.config.ignored_types.push("temp".into());
3983        fx.write(
3984            "records/temps/x.md",
3985            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
3986        );
3987        let issues = fx.store_all();
3988        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_PRESENT);
3989        assert_eq!(issue.severity, Severity::Info);
3990        assert!(!issue.is_error());
3991    }
3992
3993    #[test]
3994    fn wiki_page_derived_from_ignored_type_warns() {
3995        let mut fx = Fixture::new();
3996        fx.config.ignored_types.push("temp".into());
3997        fx.write(
3998            "records/temps/x.md",
3999            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
4000        );
4001        fx.write(
4002            "wiki/themes/t.md",
4003            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: derived\nderived_from: \"[[records/temps/x]]\"\n---\n\n# t\n",
4004        );
4005        let issues = fx.store_all();
4006        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_DERIVED);
4007        assert_eq!(issue.severity, Severity::Warning);
4008        assert_eq!(issue.key.as_deref(), Some("derived_from"));
4009    }
4010
4011    /// The shared `derived_from_ignored_type` entry point — the single
4012    /// policy-decision both `dbmd validate` (read) and `dbmd write` (write-time
4013    /// warning) now route through, so they cannot diverge. This pins its
4014    /// contract directly: the type gate, the empty-ignored-types gate, a
4015    /// positive match carrying the resolved target type, and a non-ignored
4016    /// target rejected.
4017    #[test]
4018    fn derived_from_ignored_type_is_the_shared_policy_decision() {
4019        let mut fx = Fixture::new();
4020        fx.config.ignored_types.push("secret".into());
4021        // An ignored-type record …
4022        fx.write(
4023            "records/secrets/s.md",
4024            "---\ntype: secret\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: hush\n---\n\n# s\n",
4025        );
4026        // … and a non-ignored record.
4027        fx.write(
4028            "records/contacts/c.md",
4029            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nname: C\n---\n\n# c\n",
4030        );
4031        let store = fx.store();
4032
4033        // Positive: a wiki-page deriving from the ignored-type record matches,
4034        // and the hit carries both the target (as written) and its resolved type.
4035        let hit =
4036            derived_from_ignored_type(&store, "wiki-page", std::iter::once("records/secrets/s"))
4037                .expect("wiki-page → ignored-type record must match");
4038        assert_eq!(hit.target, "records/secrets/s");
4039        assert_eq!(hit.target_type, "secret");
4040
4041        // Type gate: a non-`wiki-page` type never triggers, even with the same
4042        // ignored-type target.
4043        assert_eq!(
4044            derived_from_ignored_type(&store, "contact", std::iter::once("records/secrets/s")),
4045            None,
4046            "only wiki-page derivation is policed"
4047        );
4048
4049        // Target gate: a wiki-page deriving from a non-ignored record is fine.
4050        assert_eq!(
4051            derived_from_ignored_type(&store, "wiki-page", std::iter::once("records/contacts/c")),
4052            None,
4053            "deriving from a non-ignored type is allowed"
4054        );
4055
4056        // First match wins across multiple targets (here the second is the hit).
4057        let hit = derived_from_ignored_type(
4058            &store,
4059            "wiki-page",
4060            ["records/contacts/c", "records/secrets/s"],
4061        )
4062        .expect("a later ignored-type target must still be found");
4063        assert_eq!(hit.target, "records/secrets/s");
4064
4065        // Empty-policy gate: with no `### Ignored types`, nothing is policed.
4066        fx.config.ignored_types.clear();
4067        let store = fx.store();
4068        assert_eq!(
4069            derived_from_ignored_type(&store, "wiki-page", std::iter::once("records/secrets/s")),
4070            None,
4071            "an empty ignored-types policy short-circuits"
4072        );
4073    }
4074
4075    // ── duplicates ───────────────────────────────────────────────────────────
4076
4077    #[test]
4078    fn dup_id_is_hard_error_with_related() {
4079        let fx = Fixture::new();
4080        fx.write(
4081            "records/contacts/a.md",
4082            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
4083        );
4084        fx.write(
4085            "records/contacts/b.md",
4086            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
4087        );
4088        let issues = fx.store_all();
4089        // Reporting rule #1: ONE issue per collision group, keyed on the
4090        // lexicographically smallest path (`a.md`), partner in `related`.
4091        assert_eq!(
4092            count(&issues, codes::DUP_ID),
4093            1,
4094            "one issue per group: {issues:#?}"
4095        );
4096        let a = issues.iter().find(|i| i.code == codes::DUP_ID).unwrap();
4097        assert_eq!(a.file, PathBuf::from("records/contacts/a.md"));
4098        assert!(a.is_error());
4099        assert_eq!(a.key.as_deref(), Some("id"));
4100        assert_eq!(
4101            a.line,
4102            Some(3),
4103            "anchors to the `id` line on the reported file"
4104        );
4105        assert_eq!(a.related, vec![PathBuf::from("records/contacts/b.md")]);
4106    }
4107
4108    #[test]
4109    fn dup_id_not_fired_in_working_set() {
4110        // DUP_* is an --all-only cross-file check; the working set must not run it.
4111        let fx = Fixture::new();
4112        fx.write(
4113            "records/contacts/a.md",
4114            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
4115        );
4116        fx.write(
4117            "records/contacts/b.md",
4118            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
4119        );
4120        // Log says both changed since epoch, so they're in the working set.
4121        fx.write(
4122            "log.md",
4123            "---\ntype: log\n---\n\n## [2026-05-22 10:00] create | records/contacts/a\nx\n\n## [2026-05-22 10:01] create | records/contacts/b\nx\n",
4124        );
4125        let issues = validate_working_set(&fx.store(), None).unwrap();
4126        assert!(
4127            !has(&issues, codes::DUP_ID),
4128            "DUP_ID is --all only: {issues:#?}"
4129        );
4130    }
4131
4132    #[test]
4133    fn dup_contact_email_is_warning() {
4134        let fx = Fixture::new();
4135        for (f, name) in [("a", "A"), ("b", "B")] {
4136            fx.write(
4137                &format!("records/contacts/{f}.md"),
4138                &format!("---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: s\nname: {name}\nemail: dup@x.com\n---\n\n# {name}\n"),
4139            );
4140        }
4141        let issues = fx.store_all();
4142        // One issue per group (rule #1), keyed on the smallest path, anchored to
4143        // the `email` field.
4144        assert_eq!(count(&issues, codes::DUP_CONTACT_EMAIL), 1);
4145        let dup = find(&issues, codes::DUP_CONTACT_EMAIL);
4146        assert_eq!(dup.severity, Severity::Warning);
4147        assert_eq!(dup.file, PathBuf::from("records/contacts/a.md"));
4148        assert_eq!(dup.key.as_deref(), Some("email"));
4149        assert_eq!(dup.related, vec![PathBuf::from("records/contacts/b.md")]);
4150    }
4151
4152    #[test]
4153    fn dup_expense_tuple_and_clean_when_one_field_differs() {
4154        let fx = Fixture::new();
4155        fx.write("records/companies/acme.md", "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: c\nname: Acme\n---\n# A\n");
4156        let exp = |f: &str, amount: &str| {
4157            format!(
4158            "---\ntype: expense\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: e\ndate: 2026-05-01\namount: {amount}\nvendor: \"[[records/companies/acme]]\"\n---\n\n# {f}\n"
4159        )
4160        };
4161        fx.write("records/expenses/e1.md", &exp("e1", "100"));
4162        fx.write("records/expenses/e2.md", &exp("e2", "100"));
4163        fx.write("records/expenses/e3.md", &exp("e3", "200")); // different amount
4164        let issues = fx.store_all();
4165        // One issue for the e1+e2 group (rule #1), keyed on the smallest path
4166        // (e1) with e2 in `related`; e3 differs on amount and never appears.
4167        assert_eq!(
4168            count(&issues, codes::DUP_EXPENSE_TUPLE),
4169            1,
4170            "only e1+e2 collide, one issue: {issues:#?}"
4171        );
4172        let dup = find(&issues, codes::DUP_EXPENSE_TUPLE);
4173        assert_eq!(dup.file, PathBuf::from("records/expenses/e1.md"));
4174        assert_eq!(dup.line, Some(1), "tuple collision anchors to line 1");
4175        assert_eq!(dup.related, vec![PathBuf::from("records/expenses/e2.md")]);
4176        assert!(
4177            !issues.iter().any(|i| i.code == codes::DUP_EXPENSE_TUPLE
4178                && i.related.contains(&PathBuf::from("records/expenses/e3.md"))),
4179            "e3 differs on amount and must not collide: {issues:#?}"
4180        );
4181    }
4182
4183    #[test]
4184    fn dup_meeting_tuple_is_attendee_set_order_independent() {
4185        let fx = Fixture::new();
4186        fx.write("records/contacts/a.md", &valid_contact("a"));
4187        fx.write("records/contacts/b.md", &valid_contact("b"));
4188        let m = |f: &str, order: &str| {
4189            let attendees = if order == "ab" {
4190                "  - [[records/contacts/a]]\n  - [[records/contacts/b]]"
4191            } else {
4192                "  - [[records/contacts/b]]\n  - [[records/contacts/a]]"
4193            };
4194            format!(
4195                "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nattendees:\n{attendees}\n---\n\n# {f}\n"
4196            )
4197        };
4198        fx.write("records/meetings/m1.md", &m("m1", "ab"));
4199        fx.write("records/meetings/m2.md", &m("m2", "ba"));
4200        let issues = fx.store_all();
4201        // One issue per group (rule #1): the attendee SET is order-independent,
4202        // so m1 (ab) and m2 (ba) collide → a single issue on the smaller path.
4203        assert_eq!(
4204            count(&issues, codes::DUP_MEETING_TUPLE),
4205            1,
4206            "same date + same attendee set (any order) collide as one issue: {issues:#?}"
4207        );
4208        let dup = find(&issues, codes::DUP_MEETING_TUPLE);
4209        assert_eq!(dup.file, PathBuf::from("records/meetings/m1.md"));
4210        assert_eq!(dup.related, vec![PathBuf::from("records/meetings/m2.md")]);
4211    }
4212
4213    // ── indexes ───────────────────────────────────────────────────────────────
4214
4215    #[test]
4216    fn missing_indexes_at_all_three_levels() {
4217        let fx = Fixture::new();
4218        fx.write("records/contacts/a.md", &valid_contact("a"));
4219        let issues = fx.store_all();
4220        // root, layer (records), and type-folder (records/contacts) all missing.
4221        // The type-folder INDEX_MISSING is keyed on the FOLDER path (not its
4222        // would-be index.md), per the field convention `EXPECTED` pins.
4223        let missing_files: BTreeSet<PathBuf> = issues
4224            .iter()
4225            .filter(|i| i.code == codes::INDEX_MISSING)
4226            .map(|i| i.file.clone())
4227            .collect();
4228        assert!(
4229            missing_files.contains(&PathBuf::from("index.md")),
4230            "{issues:#?}"
4231        );
4232        assert!(
4233            missing_files.contains(&PathBuf::from("records/index.md")),
4234            "{issues:#?}"
4235        );
4236        assert!(
4237            missing_files.contains(&PathBuf::from("records/contacts")),
4238            "{issues:#?}"
4239        );
4240        // When the index.md is entirely absent we do NOT additionally fire
4241        // INDEX_JSONL_MISSING — one INDEX_MISSING covers the folder (rule #4).
4242        assert!(!has(&issues, codes::INDEX_JSONL_MISSING), "{issues:#?}");
4243    }
4244
4245    #[test]
4246    fn index_stale_entry_and_missing_entry() {
4247        let fx = Fixture::new();
4248        fx.write(
4249            "records/contacts/present.md",
4250            &valid_contact("present contact"),
4251        );
4252        // Indexes for the parents (root/layer) present so we isolate type-folder.
4253        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4254        fx.write(
4255            "records/index.md",
4256            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4257        );
4258        // Type-folder index lists a GHOST (stale) and omits `present` (missing).
4259        fx.write(
4260            "records/contacts/index.md",
4261            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/ghost]] — gone\n",
4262        );
4263        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/present.md\",\"type\":\"contact\",\"summary\":\"present contact\"}\n");
4264        let issues = fx.store_all();
4265        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
4266        assert!(stale.message.contains("ghost"));
4267        assert!(stale.is_error());
4268        let missing = find(&issues, codes::INDEX_MISSING_ENTRY);
4269        assert!(
4270            missing.message.contains("present.md"),
4271            "{}",
4272            missing.message
4273        );
4274    }
4275
4276    #[test]
4277    fn index_summary_mismatch() {
4278        let fx = Fixture::new();
4279        fx.write("records/contacts/a.md", &valid_contact("the real summary"));
4280        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4281        fx.write(
4282            "records/index.md",
4283            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4284        );
4285        fx.write(
4286            "records/contacts/index.md",
4287            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a STALE summary\n",
4288        );
4289        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"the real summary\"}\n");
4290        let issues = fx.store_all();
4291        let issue = find(&issues, codes::INDEX_SUMMARY_MISMATCH);
4292        assert!(issue.is_error());
4293        assert_eq!(issue.related, vec![PathBuf::from("records/contacts/a.md")]);
4294    }
4295
4296    #[test]
4297    fn index_summary_match_passes() {
4298        let fx = Fixture::new();
4299        fx.write("records/contacts/a.md", &valid_contact("matching summary"));
4300        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4301        fx.write(
4302            "records/index.md",
4303            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4304        );
4305        fx.write(
4306            "records/contacts/index.md",
4307            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — matching summary\n",
4308        );
4309        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"matching summary\"}\n");
4310        let issues = fx.store_all();
4311        assert!(!has(&issues, codes::INDEX_SUMMARY_MISMATCH), "{issues:#?}");
4312    }
4313
4314    #[test]
4315    fn index_entry_with_tag_suffix_matches_summary() {
4316        let fx = Fixture::new();
4317        fx.write("records/contacts/a.md", &valid_contact("clean summary"));
4318        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4319        fx.write(
4320            "records/index.md",
4321            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4322        );
4323        // Entry carries a ` · #tag` suffix which must be stripped before compare.
4324        fx.write(
4325            "records/contacts/index.md",
4326            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — clean summary · #customer\n",
4327        );
4328        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"clean summary\"}\n");
4329        let issues = fx.store_all();
4330        assert!(
4331            !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
4332            "tag suffix should be stripped: {issues:#?}"
4333        );
4334    }
4335
4336    #[test]
4337    fn index_jsonl_desync_missing_file_in_jsonl() {
4338        let fx = Fixture::new();
4339        fx.write("records/contacts/a.md", &valid_contact("a"));
4340        fx.write("records/contacts/b.md", &valid_contact("b"));
4341        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (2 files)\n");
4342        fx.write(
4343            "records/index.md",
4344            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4345        );
4346        fx.write(
4347            "records/contacts/index.md",
4348            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n- [[records/contacts/b]] — b\n",
4349        );
4350        // jsonl only lists `a` → `b` is a desync (the twin must be complete).
4351        fx.write(
4352            "records/contacts/index.jsonl",
4353            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
4354        );
4355        let issues = fx.store_all();
4356        let desync = find(&issues, codes::INDEX_JSONL_DESYNC);
4357        assert!(desync.message.contains("b.md"), "{}", desync.message);
4358    }
4359
4360    #[test]
4361    fn index_jsonl_desync_record_points_at_missing_file() {
4362        let fx = Fixture::new();
4363        fx.write("records/contacts/a.md", &valid_contact("a"));
4364        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4365        fx.write(
4366            "records/index.md",
4367            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4368        );
4369        fx.write(
4370            "records/contacts/index.md",
4371            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
4372        );
4373        fx.write(
4374            "records/contacts/index.jsonl",
4375            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
4376        );
4377        let issues = fx.store_all();
4378        assert!(
4379            issues
4380                .iter()
4381                .any(|i| i.code == codes::INDEX_JSONL_DESYNC && i.message.contains("ghost.md")),
4382            "{issues:#?}"
4383        );
4384    }
4385
4386    #[test]
4387    fn index_jsonl_stale_summary() {
4388        let fx = Fixture::new();
4389        fx.write("records/contacts/a.md", &valid_contact("real summary"));
4390        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4391        fx.write(
4392            "records/index.md",
4393            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4394        );
4395        fx.write(
4396            "records/contacts/index.md",
4397            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — real summary\n",
4398        );
4399        // jsonl summary disagrees with the file frontmatter.
4400        fx.write(
4401            "records/contacts/index.jsonl",
4402            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"OUTDATED\"}\n",
4403        );
4404        let issues = fx.store_all();
4405        let stale = find(&issues, codes::INDEX_JSONL_STALE);
4406        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
4407        assert!(stale.key.as_deref().unwrap().contains("summary"));
4408    }
4409
4410    /// The whole point of `INDEX_JSONL_STALE`: a sidecar field the query/search
4411    /// path actually reads (`email`, `domain`, the `(date,amount,vendor)` dedup
4412    /// tuple, `tags`, `updated`, `links`, `company` …) that disagrees with the
4413    /// `.md` is STALE — even when `summary` and `type` are perfectly correct.
4414    /// Pre-fix the validator only diffed summary+type, so a sidecar with a wrong
4415    /// `email` validated clean and answered `--where email=…` with a phantom
4416    /// value present in no file. This is the direct regression guard.
4417    #[test]
4418    fn index_jsonl_stale_queryable_field_email() {
4419        let fx = Fixture::new();
4420        let contact = "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"a contact\"\nname: A\nemail: real@correct.com\n---\n\n# A\n";
4421        fx.write("records/contacts/a.md", contact);
4422        // Start from the canonical, fully-correct sidecar set …
4423        fx.rebuild_indexes();
4424        let jsonl_path = fx.dir.path().join("records/contacts/index.jsonl");
4425        let good = fs::read_to_string(&jsonl_path).unwrap();
4426        // sanity: the canonical store is clean (no STALE on a fresh rebuild).
4427        assert!(
4428            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
4429            "freshly-rebuilt sidecar must not be stale"
4430        );
4431        // … then desync ONLY the email so it's the single differing field.
4432        assert!(
4433            good.contains("real@correct.com"),
4434            "sidecar projects email: {good}"
4435        );
4436        fx.write(
4437            "records/contacts/index.jsonl",
4438            &good.replace("real@correct.com", "STALE-WRONG@evil.com"),
4439        );
4440
4441        let issues = fx.store_all();
4442        let stale = find(&issues, codes::INDEX_JSONL_STALE);
4443        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
4444        // The mismatch is reported precisely on `email`, and summary/type — which
4445        // still match — are NOT named.
4446        let key = stale.key.as_deref().unwrap();
4447        assert!(
4448            key.contains("email"),
4449            "expected `email` in stale key, got {key:?}"
4450        );
4451        assert!(!key.contains("summary"), "summary still matches: {key:?}");
4452        assert!(!key.contains("type"), "type still matches: {key:?}");
4453    }
4454
4455    /// Broaden the guard across the typed/list/timestamp projections at once:
4456    /// a wrong `tags`, `updated`, and a custom dedup field (`amount`) are each
4457    /// caught, with all three named in one issue.
4458    #[test]
4459    fn index_jsonl_stale_typed_and_list_fields() {
4460        let fx = Fixture::new();
4461        let expense = "---\ntype: expense\ncreated: 2026-05-20T08:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"office chairs\"\ntags: [furniture, q2]\namount: 1299\nvendor: Acme\ndate: 2026-05-20\n---\n\n# Expense\n";
4462        fx.write("records/expenses/e.md", expense);
4463        fx.rebuild_indexes();
4464        let jsonl_path = fx.dir.path().join("records/expenses/index.jsonl");
4465        let good = fs::read_to_string(&jsonl_path).unwrap();
4466        assert!(
4467            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
4468            "freshly-rebuilt sidecar must not be stale"
4469        );
4470        // Desync a list field (tags), a timestamp (updated), and a number (amount).
4471        let stale_line = good
4472            .replace("\"q2\"", "\"WRONG-TAG\"")
4473            .replace("2026-05-22T10:00:00-07:00", "2099-01-01T00:00:00-07:00")
4474            .replace("1299", "9999");
4475        fx.write("records/expenses/index.jsonl", &stale_line);
4476
4477        let issues = fx.store_all();
4478        let stale = find(&issues, codes::INDEX_JSONL_STALE);
4479        let key = stale.key.as_deref().unwrap();
4480        for expected in ["amount", "tags", "updated"] {
4481            assert!(
4482                key.contains(expected),
4483                "expected `{expected}` in stale key, got {key:?}"
4484            );
4485        }
4486    }
4487
4488    #[test]
4489    fn index_orphan_in_noncanonical_folder() {
4490        let fx = Fixture::new();
4491        fx.write("records/contacts/a.md", &valid_contact("a"));
4492        // Build the canonical indexes so they aren't reported as orphans.
4493        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4494        fx.write(
4495            "records/index.md",
4496            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4497        );
4498        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
4499        fx.write(
4500            "records/contacts/index.jsonl",
4501            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
4502        );
4503        // An index.md inside a sub-sub-folder (operator territory) is an orphan.
4504        fx.write(
4505            "records/contacts/subfolder/index.md",
4506            "---\ntype: index\nscope: type-folder\n---\n\n# stray\n",
4507        );
4508        let issues = fx.store_all();
4509        let orphan = find(&issues, codes::INDEX_ORPHAN);
4510        assert_eq!(orphan.severity, Severity::Warning);
4511        assert_eq!(
4512            orphan.file,
4513            PathBuf::from("records/contacts/subfolder/index.md")
4514        );
4515    }
4516
4517    #[test]
4518    fn index_wrong_scope() {
4519        let fx = Fixture::new();
4520        fx.write("records/contacts/a.md", &valid_contact("a"));
4521        // Root index declares the wrong scope.
4522        fx.write("index.md", "---\ntype: index\nscope: layer\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4523        fx.write(
4524            "records/index.md",
4525            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4526        );
4527        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
4528        fx.write(
4529            "records/contacts/index.jsonl",
4530            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
4531        );
4532        let issues = fx.store_all();
4533        let issue = find(&issues, codes::INDEX_WRONG_SCOPE);
4534        assert_eq!(issue.severity, Severity::Warning);
4535        assert_eq!(issue.file, PathBuf::from("index.md"));
4536    }
4537
4538    #[test]
4539    fn capped_type_folder_index_does_not_flag_missing_entries() {
4540        // Over the 500-entry cap, omitted entries are expected, not an error.
4541        let fx = Fixture::new();
4542        for i in 0..501 {
4543            fx.write(
4544                &format!("records/contacts/c{i:04}.md"),
4545                &valid_contact(&format!("contact {i}")),
4546            );
4547        }
4548        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (501 files)\n");
4549        fx.write(
4550            "records/index.md",
4551            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4552        );
4553        // Type-folder index lists only ONE entry + a More footer.
4554        fx.write(
4555            "records/contacts/index.md",
4556            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/c0000]] — contact 0\n\n## More\n\nThis folder has 501 files.\n",
4557        );
4558        // jsonl must still be complete — write all 501 lines.
4559        let mut jsonl = String::new();
4560        for i in 0..501 {
4561            jsonl.push_str(&format!(
4562                "{{\"path\":\"records/contacts/c{i:04}.md\",\"type\":\"contact\",\"summary\":\"contact {i}\"}}\n"
4563            ));
4564        }
4565        fx.write("records/contacts/index.jsonl", &jsonl);
4566        let issues = fx.store_all();
4567        assert!(
4568            !has(&issues, codes::INDEX_MISSING_ENTRY),
4569            "over the cap, missing browse entries are expected: {issues:#?}"
4570        );
4571        // But the jsonl is complete → no desync.
4572        assert!(
4573            !has(&issues, codes::INDEX_JSONL_DESYNC),
4574            "{:#?}",
4575            issues
4576                .iter()
4577                .filter(|i| i.code == codes::INDEX_JSONL_DESYNC)
4578                .collect::<Vec<_>>()
4579        );
4580    }
4581
4582    // ── log ────────────────────────────────────────────────────────────────
4583
4584    #[test]
4585    fn log_bad_timestamp_unknown_kind_out_of_order() {
4586        let fx = Fixture::new();
4587        fx.write(
4588            "log.md",
4589            concat!(
4590                "---\ntype: log\n---\n\n# Log\n\n",
4591                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
4592                "## [2026-05-27 09:00] update | records/contacts/b\nx\n\n", // out of order
4593                "## [2026-05-27 11:00] frobnicate | records/contacts/c\nx\n\n", // unknown kind
4594                "## [not-a-date] create | records/contacts/d\nx\n",         // bad timestamp
4595            ),
4596        );
4597        let issues = fx.store_all();
4598        assert!(has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
4599        assert_eq!(
4600            find(&issues, codes::LOG_OUT_OF_ORDER).severity,
4601            Severity::Warning
4602        );
4603        let unknown = find(&issues, codes::LOG_UNKNOWN_KIND);
4604        assert_eq!(unknown.severity, Severity::Warning);
4605        assert!(unknown.message.contains("frobnicate"));
4606        let bad = find(&issues, codes::LOG_BAD_TIMESTAMP);
4607        assert!(bad.is_error());
4608    }
4609
4610    #[test]
4611    fn log_validate_entry_without_object_is_well_formed() {
4612        let fx = Fixture::new();
4613        fx.write(
4614            "log.md",
4615            "---\ntype: log\n---\n\n## [2026-05-27 10:00] validate\nPASS\n",
4616        );
4617        let issues = fx.store_all();
4618        assert!(!has(&issues, codes::LOG_BAD_TIMESTAMP), "{issues:#?}");
4619        assert!(!has(&issues, codes::LOG_UNKNOWN_KIND), "{issues:#?}");
4620    }
4621
4622    #[test]
4623    fn log_in_order_is_clean() {
4624        let fx = Fixture::new();
4625        fx.write(
4626            "log.md",
4627            concat!(
4628                "---\ntype: log\n---\n\n",
4629                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
4630                "## [2026-05-27 10:05] update | records/contacts/a\nx\n",
4631            ),
4632        );
4633        let issues = fx.store_all();
4634        assert!(!has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
4635    }
4636
4637    #[test]
4638    fn log_not_checked_in_working_set() {
4639        // log.md ordering is an --all-only check.
4640        let fx = Fixture::new();
4641        fx.write(
4642            "log.md",
4643            concat!(
4644                "---\ntype: log\n---\n\n",
4645                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
4646                "## [2026-05-27 09:00] update | records/contacts/a\nx\n",
4647            ),
4648        );
4649        let issues = validate_working_set(&fx.store(), None).unwrap();
4650        assert!(
4651            !has(&issues, codes::LOG_OUT_OF_ORDER),
4652            "log ordering is --all only: {issues:#?}"
4653        );
4654    }
4655
4656    // ── working-set scoping ───────────────────────────────────────────────────
4657
4658    #[test]
4659    fn working_set_validates_only_changed_files() {
4660        let fx = Fixture::new();
4661        // `dirty` has a bad timestamp; `clean_but_unlogged` also does but is NOT
4662        // in the log → working set must skip it.
4663        fx.write(
4664            "records/contacts/dirty.md",
4665            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
4666        );
4667        fx.write(
4668            "records/contacts/unlogged.md",
4669            "---\ntype: contact\ncreated: ALSO-BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
4670        );
4671        fx.write(
4672            "log.md",
4673            "---\ntype: log\n---\n\n## [2026-05-22 10:00] update | records/contacts/dirty\nedited\n",
4674        );
4675        let issues = validate_working_set(&fx.store(), None).unwrap();
4676        assert!(
4677            issues.iter().any(
4678                |i| i.code == codes::FM_BAD_TIMESTAMP && i.file == *"records/contacts/dirty.md"
4679            ),
4680            "{issues:#?}"
4681        );
4682        assert!(
4683            !issues
4684                .iter()
4685                .any(|i| i.file == *"records/contacts/unlogged.md"),
4686            "unlogged file must not be in the working set: {issues:#?}"
4687        );
4688    }
4689
4690    #[test]
4691    fn working_set_includes_incoming_linkers_to_changed_path() {
4692        let fx = Fixture::new();
4693        // `changed` was renamed/removed (logged). `linker` points at it with a
4694        // now-broken link and was NOT itself logged — but must be pulled in.
4695        fx.write(
4696            "wiki/people/linker.md",
4697            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: links to a removed page\n---\n\nSee [[records/contacts/changed]].\n",
4698        );
4699        // `changed.md` does NOT exist on disk (removed).
4700        fx.write(
4701            "log.md",
4702            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/changed\nremoved\n",
4703        );
4704        let issues = validate_working_set(&fx.store(), None).unwrap();
4705        assert!(
4706            issues
4707                .iter()
4708                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.file == *"wiki/people/linker.md"),
4709            "incoming linker to a removed path must be validated: {issues:#?}"
4710        );
4711    }
4712
4713    #[test]
4714    fn working_set_respects_explicit_since_cutoff() {
4715        let fx = Fixture::new();
4716        fx.write(
4717            "records/contacts/old.md",
4718            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
4719        );
4720        fx.write(
4721            "records/contacts/new.md",
4722            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
4723        );
4724        fx.write(
4725            "log.md",
4726            concat!(
4727                "---\ntype: log\n---\n\n",
4728                "## [2026-05-20 10:00] update | records/contacts/old\nx\n\n",
4729                "## [2026-05-25 10:00] update | records/contacts/new\nx\n",
4730            ),
4731        );
4732        // Cutoff after `old` but before `new`.
4733        let since = DateTime::parse_from_rfc3339("2026-05-22T00:00:00+00:00").unwrap();
4734        let issues = validate_working_set(&fx.store(), Some(since)).unwrap();
4735        assert!(
4736            issues.iter().any(|i| i.file == *"records/contacts/new.md"),
4737            "{issues:#?}"
4738        );
4739        assert!(
4740            !issues.iter().any(|i| i.file == *"records/contacts/old.md"),
4741            "old change is before the cutoff: {issues:#?}"
4742        );
4743    }
4744
4745    #[test]
4746    fn working_set_default_since_is_last_validate_entry() {
4747        let fx = Fixture::new();
4748        // `before` changed before the last validate; `after` changed after.
4749        fx.write(
4750            "records/contacts/before.md",
4751            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
4752        );
4753        fx.write(
4754            "records/contacts/after.md",
4755            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
4756        );
4757        fx.write(
4758            "log.md",
4759            concat!(
4760                "---\ntype: log\n---\n\n",
4761                "## [2026-05-20 10:00] update | records/contacts/before\nx\n\n",
4762                "## [2026-05-21 10:00] validate\nPASS\n\n",
4763                "## [2026-05-22 10:00] update | records/contacts/after\nx\n",
4764            ),
4765        );
4766        let issues = validate_working_set(&fx.store(), None).unwrap();
4767        assert!(
4768            issues
4769                .iter()
4770                .any(|i| i.file == *"records/contacts/after.md"),
4771            "{issues:#?}"
4772        );
4773        assert!(
4774            !issues
4775                .iter()
4776                .any(|i| i.file == *"records/contacts/before.md"),
4777            "change before the last validate entry is outside the default window: {issues:#?}"
4778        );
4779    }
4780
4781    // ── ordering / determinism ────────────────────────────────────────────────
4782
4783    #[test]
4784    fn issues_are_sorted_by_file_then_line() {
4785        let fx = Fixture::new();
4786        fx.write("wiki/people/z.md", "---\ntype: wiki-page\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
4787        fx.write("wiki/people/a.md", "---\ntype: wiki-page\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
4788        let issues = fx.store_all();
4789        let files: Vec<&PathBuf> = issues.iter().map(|i| &i.file).collect();
4790        let mut sorted = files.clone();
4791        sorted.sort();
4792        assert_eq!(
4793            files, sorted,
4794            "issues must be emitted in a stable file order"
4795        );
4796    }
4797
4798    // ── boundaries: codes validate must NOT emit ──────────────────────────────
4799
4800    #[test]
4801    fn frozen_page_is_not_a_validate_error() {
4802        // POLICY_FROZEN_PAGE is a *write-time* refusal, never a validate finding.
4803        // A clean file listed in `### Frozen pages` must validate clean.
4804        let mut fx = Fixture::new();
4805        fx.config
4806            .frozen_pages
4807            .push(PathBuf::from("records/decisions/d.md"));
4808        fx.write(
4809            "records/decisions/d.md",
4810            "---\ntype: decision\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a finalized decision\n---\n\n# D\n",
4811        );
4812        let issues = fx.store_all();
4813        assert!(
4814            !has(&issues, codes::POLICY_FROZEN_PAGE),
4815            "frozen pages are enforced at write-time, not by validate: {issues:#?}"
4816        );
4817    }
4818
4819    #[test]
4820    fn wiki_link_ambiguous_is_never_emitted_under_full_path_doctrine() {
4821        // The full-path doctrine makes ambiguity impossible; the defensive code
4822        // must never fire on a normal store.
4823        let fx = Fixture::new();
4824        fx.write("records/contacts/sarah-chen.md", &valid_contact("sarah"));
4825        let mut body = valid_contact("links to sarah");
4826        body.push_str("\nSee [[records/contacts/sarah-chen]].\n");
4827        fx.write("wiki/people/p.md", &body);
4828        let issues = fx.store_all();
4829        assert!(!has(&issues, codes::WIKI_LINK_AMBIGUOUS), "{issues:#?}");
4830    }
4831
4832    // ── unknown-type / unknown-field passthrough ──────────────────────────────
4833
4834    #[test]
4835    fn unknown_type_passes_through() {
4836        // A custom type is ambient context: it has a `type`, so no
4837        // FM_MISSING_TYPE, and with no matching schema there are no schema
4838        // errors. Only the universal contract (summary, timestamps) applies.
4839        let fx = Fixture::new();
4840        fx.write(
4841            "records/proposals/x.md",
4842            "---\ntype: proposal\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a proposal\ncustom_field: anything\nbudget: 5000\n---\n\n# Proposal\n",
4843        );
4844        let issues = fx.store_all();
4845        assert!(!has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
4846        assert!(!has(&issues, codes::SCHEMA_MISSING_REQUIRED), "{issues:#?}");
4847        assert!(!has(&issues, codes::SCHEMA_SHAPE_MISMATCH), "{issues:#?}");
4848        // The unknown fields don't trip anything.
4849        assert!(
4850            !issues
4851                .iter()
4852                .any(|i| i.key.as_deref() == Some("custom_field")
4853                    || i.key.as_deref() == Some("budget")),
4854            "unknown fields are ambient context: {issues:#?}"
4855        );
4856    }
4857
4858    // ── implicit canonical schema across the four link-bearing types ──────────
4859
4860    #[test]
4861    fn expense_vendor_plain_string_is_link_prefix_mismatch() {
4862        // Exercises the `expense` branch of the implicit canonical schema.
4863        let fx = Fixture::new();
4864        fx.write(
4865            "records/expenses/e.md",
4866            "---\ntype: expense\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: an expense\ndate: 2026-05-01\namount: 100\nvendor: \"Acme Co\"\n---\n\n# E\n",
4867        );
4868        let issues = fx.store_all();
4869        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
4870        assert_eq!(issue.key.as_deref(), Some("vendor"));
4871        assert!(issue
4872            .suggestion
4873            .as_deref()
4874            .unwrap()
4875            .contains("records/companies/"));
4876    }
4877
4878    #[test]
4879    fn invoice_vendor_correct_unquoted_link_passes() {
4880        // The unquoted canonical link form must satisfy the implicit schema.
4881        let fx = Fixture::new();
4882        fx.write(
4883            "records/companies/acme.md",
4884            "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a company\nname: Acme\n---\n\n# Acme\n",
4885        );
4886        fx.write(
4887            "records/invoices/i.md",
4888            "---\ntype: invoice\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: an invoice\ndate: 2026-05-01\namount: 100\nvendor: [[records/companies/acme]]\n---\n\n# I\n",
4889        );
4890        let issues = fx.store_all();
4891        assert!(
4892            !has(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH),
4893            "a correct unquoted vendor link must pass: {issues:#?}"
4894        );
4895        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4896    }
4897
4898    #[test]
4899    fn implicit_canonical_schema_matches_spec_link_set_exactly() {
4900        // Lockstep guard: the implicit canonical schema must enforce EXACTLY the
4901        // fields the SPEC recognized-types table marks `(link → <prefix>/)`, and
4902        // no others. This pins both directions of the prior code↔SPEC drift:
4903        //   * the four record fields the table now marks are enforced with their
4904        //     stated prefixes, and
4905        //   * types/fields the table does NOT mark (notably wiki-page, whose
4906        //     `derived_from` spans records/ AND sources/) carry NO implicit
4907        //     link schema.
4908        // If you change either side, change the SPEC § Recognized types table to
4909        // match — they are one source of truth.
4910        let prefix_of = |type_: &str, field: &str| -> Option<String> {
4911            implicit_canonical_schema(type_)?
4912                .fields
4913                .into_iter()
4914                .find(|f| f.name == field)
4915                .and_then(|f| f.link_prefix)
4916                .map(|p| p.to_string_lossy().into_owned())
4917        };
4918
4919        // The complete enforced set, field-for-field with its prefix.
4920        let expected: &[(&str, &str, &str)] = &[
4921            ("contact", "company", "records/companies/"),
4922            ("expense", "vendor", "records/companies/"),
4923            ("expense", "contact", "records/contacts/"),
4924            ("meeting", "expense", "records/expenses/"),
4925            ("invoice", "vendor", "records/companies/"),
4926        ];
4927        for (type_, field, prefix) in expected {
4928            assert_eq!(
4929                prefix_of(type_, field).as_deref(),
4930                Some(*prefix),
4931                "{type_}.{field} must be an implicit link to {prefix}"
4932            );
4933        }
4934
4935        // The total number of implicit link fields across all types is exactly
4936        // the size of the expected set — no extra, unmarked field has crept in.
4937        let total: usize = ["contact", "expense", "meeting", "invoice"]
4938            .iter()
4939            .filter_map(|t| implicit_canonical_schema(t))
4940            .map(|s| s.fields.len())
4941            .sum();
4942        assert_eq!(total, expected.len(), "no unmarked field may be enforced");
4943
4944        // wiki-page is NOT in the table's `(link)` set: it must have no implicit
4945        // schema at all (derived_from is left to ordinary wiki-link validation).
4946        assert!(
4947            implicit_canonical_schema("wiki-page").is_none(),
4948            "wiki-page.derived_from has no single canonical prefix; it must not be implicit-schema enforced"
4949        );
4950        // A type with no marked link field at all also returns None.
4951        assert!(implicit_canonical_schema("company").is_none());
4952        assert!(implicit_canonical_schema("decision").is_none());
4953    }
4954
4955    #[test]
4956    fn wiki_page_derived_from_plain_string_is_not_prefix_mismatch() {
4957        // The user-visible half of the finding, running the other way: a
4958        // `wiki-page` written per the SPEC table (derived_from spans records/
4959        // AND sources/) must NOT raise SCHEMA_LINK_PREFIX_MISMATCH, because the
4960        // implicit schema deliberately omits the field. A plain-string value is
4961        // therefore not a hard schema error.
4962        let fx = Fixture::new();
4963        fx.write(
4964            "wiki/themes/t.md",
4965            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a theme\ntopic: renewals\nderived_from: \"some notes\"\n---\n\n# T\n",
4966        );
4967        let issues = fx.store_all();
4968        assert!(
4969            !has(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH),
4970            "wiki-page.derived_from is not implicit-schema enforced: {issues:#?}"
4971        );
4972    }
4973
4974    #[test]
4975    fn expense_contact_and_meeting_expense_enforce_their_prefixes() {
4976        // The two implicit link fields not previously exercised end-to-end:
4977        // expense.contact (→ records/contacts/) and meeting.expense
4978        // (→ records/expenses/). A plain string in each is a prefix mismatch
4979        // naming the correct prefix.
4980        let fx = Fixture::new();
4981        fx.write(
4982            "records/expenses/e.md",
4983            "---\ntype: expense\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: an expense\ndate: 2026-05-01\namount: 100\nvendor: [[records/companies/acme]]\ncontact: \"Jane Doe\"\n---\n\n# E\n",
4984        );
4985        fx.write(
4986            "records/meetings/m.md",
4987            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-01\nexpense: \"2026-05 lunch\"\n---\n\n# M\n",
4988        );
4989        let issues = fx.store_all();
4990
4991        let contact_issue = issues.iter().find(|i| {
4992            i.code == codes::SCHEMA_LINK_PREFIX_MISMATCH
4993                && i.file == *"records/expenses/e.md"
4994                && i.key.as_deref() == Some("contact")
4995        });
4996        let contact_issue = contact_issue.unwrap_or_else(|| {
4997            panic!("expense.contact plain string must be a prefix mismatch: {issues:#?}")
4998        });
4999        assert!(contact_issue
5000            .suggestion
5001            .as_deref()
5002            .unwrap()
5003            .contains("records/contacts/"));
5004
5005        let expense_issue = issues.iter().find(|i| {
5006            i.code == codes::SCHEMA_LINK_PREFIX_MISMATCH
5007                && i.file == *"records/meetings/m.md"
5008                && i.key.as_deref() == Some("expense")
5009        });
5010        let expense_issue = expense_issue.unwrap_or_else(|| {
5011            panic!("meeting.expense plain string must be a prefix mismatch: {issues:#?}")
5012        });
5013        assert!(expense_issue
5014            .suggestion
5015            .as_deref()
5016            .unwrap()
5017            .contains("records/expenses/"));
5018    }
5019
5020    // ── find_links_to prefix-collision safety (working set) ───────────────────
5021
5022    #[test]
5023    fn incoming_linker_scan_does_not_prefix_match() {
5024        // A changed `records/contacts/sarah` must NOT pull in a file that only
5025        // links to `records/contacts/sarah-chen` (a longer path sharing a prefix).
5026        let fx = Fixture::new();
5027        fx.write(
5028            "wiki/people/only-sarah-chen.md",
5029            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
5030        );
5031        // The log says `records/contacts/sarah` (the shorter path) changed.
5032        fx.write(
5033            "log.md",
5034            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah\nremoved\n",
5035        );
5036        let issues = validate_working_set(&fx.store(), None).unwrap();
5037        assert!(
5038            !issues
5039                .iter()
5040                .any(|i| i.file == *"wiki/people/only-sarah-chen.md"),
5041            "a prefix-sharing link must not pull a file into the working set: {issues:#?}"
5042        );
5043    }
5044
5045    #[test]
5046    fn incoming_linker_scan_pulls_in_catalog_index_md() {
5047        // CONTRACT: the working-set incoming-linker scan rides the embedded-
5048        // ripgrep `Store::find_links_to`, which scans EVERY `.md` (including
5049        // `index.md` catalogs) — NOT the walk-and-read over `walk_content_files`,
5050        // which excludes `index.md`. A type-folder `index.md` that lists a now-
5051        // deleted target must be pulled into the working set so its dangling
5052        // catalog entry is flagged `WIKI_LINK_BROKEN`. The old walk-and-read
5053        // implementation skipped `index.md` and let this broken link survive the
5054        // loop silently; this test fails if anyone reverts to that path.
5055        let fx = Fixture::new();
5056        // A catalog that still lists the deleted contact (a real, common stale
5057        // state after a `delete`). No other file references the target, so the
5058        // catalog is the ONLY incoming linker — if it isn't scanned, nothing is.
5059        fx.write(
5060            "records/contacts/index.md",
5061            "---\ntype: index\n---\n\n- [[records/contacts/sarah-chen]] — Sarah Chen\n",
5062        );
5063        // The log says `records/contacts/sarah-chen` was deleted.
5064        fx.write(
5065            "log.md",
5066            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n",
5067        );
5068        let issues = validate_working_set(&fx.store(), None).unwrap();
5069        assert!(
5070            issues.iter().any(
5071                |i| i.file == *"records/contacts/index.md" && i.code == codes::WIKI_LINK_BROKEN
5072            ),
5073            "the catalog `index.md` linking to the deleted target must be pulled \
5074             into the working set and flagged WIKI_LINK_BROKEN (proves the scan \
5075             uses embedded-ripgrep `Store::find_links_to`, not the index-skipping \
5076             walk-and-read): {issues:#?}"
5077        );
5078    }
5079
5080    #[test]
5081    fn incoming_linker_scan_covers_the_whole_changed_set_in_one_pass() {
5082        // CONTRACT (the O(changed × store) fix): the working-set scan finds
5083        // incoming linkers for EVERY changed object, and does so via the single
5084        // batch pass `Store::find_links_to_any` — not one full store read per
5085        // changed object. This test pins the behavior that makes the single-pass
5086        // correct: with two DISTINCT deleted targets, the linker to EACH is pulled
5087        // into the working set and flagged. A regression that scanned for only the
5088        // first/last changed object, or that dropped the batch union, would leave
5089        // one of the two broken links unreported and fail here.
5090        let fx = Fixture::new();
5091        // Linker A → deleted target #1 (in the body).
5092        fx.write(
5093            "wiki/people/refers-sarah.md",
5094            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
5095        );
5096        // Linker B → deleted target #2 (in a typed frontmatter field — an edge the
5097        // sidecar `links` projection would miss, which is why this must be a
5098        // content scan, not a sidecar read).
5099        fx.write(
5100            "records/meetings/2026/05/kickoff.md",
5101            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\ncompany: \"[[records/companies/acme]]\"\n---\n\n# Kickoff\n",
5102        );
5103        // The log says BOTH targets were deleted in this window.
5104        fx.write(
5105            "log.md",
5106            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n\n## [2026-05-22 10:05] delete | records/companies/acme\nremoved\n",
5107        );
5108
5109        let issues = validate_working_set(&fx.store(), None).unwrap();
5110        assert!(
5111            issues
5112                .iter()
5113                .any(|i| i.file == *"wiki/people/refers-sarah.md"
5114                    && i.code == codes::WIKI_LINK_BROKEN),
5115            "linker to the FIRST deleted target must be pulled in and flagged: {issues:#?}"
5116        );
5117        assert!(
5118            issues
5119                .iter()
5120                .any(|i| i.file == *"records/meetings/2026/05/kickoff.md"
5121                    && i.code == codes::WIKI_LINK_BROKEN),
5122            "linker to the SECOND deleted target (typed-field edge) must also be \
5123             pulled in and flagged — proves the scan covers the whole changed set, \
5124             not just one object: {issues:#?}"
5125        );
5126    }
5127
5128    #[test]
5129    fn frontmatter_block_sequence_links_each_get_their_own_line() {
5130        // Each block-sequence wiki-link reports on its own source line.
5131        let fx = Fixture::new();
5132        // Neither target exists → two WIKI_LINK_BROKEN, on different lines.
5133        fx.write(
5134            "records/meetings/m.md",
5135            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nparticipants:\n  - [[records/contacts/ghost1]]\n  - [[records/contacts/ghost2]]\n---\n\n# M\n",
5136        );
5137        let issues = fx.store_all();
5138        let broken_lines: BTreeSet<Option<u32>> = issues
5139            .iter()
5140            .filter(|i| i.code == codes::WIKI_LINK_BROKEN)
5141            .map(|i| i.line)
5142            .collect();
5143        assert_eq!(
5144            broken_lines.len(),
5145            2,
5146            "two distinct broken-link lines: {issues:#?}"
5147        );
5148    }
5149
5150    /// Every code in `mod codes` must appear as a row in SPEC.md § Validation —
5151    /// the SPEC table is the declared "complete vocabulary" an agent branches on,
5152    /// and the module doc-comment promises this code implements "exactly those
5153    /// codes — no more, no fewer." This guards against the code/SPEC drift where a
5154    /// new validation code is added to the engine but never documented.
5155    #[test]
5156    fn every_code_constant_is_documented_in_spec() {
5157        // Parse the canonical constant *values* straight out of this module's
5158        // source, so a future `pub const X: &str = "X";` is covered with no test
5159        // edit. Format is uniform: `    pub const NAME: &str = "VALUE";`.
5160        let this_src = include_str!("validate.rs");
5161        let mut codes_in_module: Vec<String> = Vec::new();
5162        let mut in_codes_mod = false;
5163        for line in this_src.lines() {
5164            let t = line.trim();
5165            if t.starts_with("pub mod codes") {
5166                in_codes_mod = true;
5167                continue;
5168            }
5169            // The `mod codes` block ends at its closing brace at column 0.
5170            if in_codes_mod && line == "}" {
5171                break;
5172            }
5173            if in_codes_mod {
5174                if let Some(rest) = t.strip_prefix("pub const ") {
5175                    // rest = `NAME: &str = "VALUE";`
5176                    let value = rest
5177                        .split_once('=')
5178                        .map(|(_, v)| v.trim())
5179                        .and_then(|v| v.strip_prefix('"'))
5180                        .and_then(|v| v.strip_suffix("\";"))
5181                        .unwrap_or_else(|| panic!("unparseable code constant line: {line:?}"));
5182                    codes_in_module.push(value.to_string());
5183                }
5184            }
5185        }
5186        assert!(
5187            codes_in_module.len() >= 36,
5188            "parsed only {} code constants from `mod codes`; the parser likely \
5189             broke against a source-format change",
5190            codes_in_module.len()
5191        );
5192
5193        // SPEC.md lives at the repo root, two levels up from this crate's manifest.
5194        let spec_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../../SPEC.md");
5195        let spec = fs::read_to_string(&spec_path)
5196            .unwrap_or_else(|e| panic!("cannot read {}: {e}", spec_path.display()));
5197
5198        // Each code must appear as a SPEC § Validation table cell: `` | `CODE` | ``.
5199        let missing: Vec<&String> = codes_in_module
5200            .iter()
5201            .filter(|code| !spec.contains(&format!("| `{code}` |")))
5202            .collect();
5203        assert!(
5204            missing.is_empty(),
5205            "validation codes emitted by the engine but absent from SPEC.md \
5206             § Validation (the declared complete vocabulary): {missing:?}"
5207        );
5208    }
5209}