Skip to main content

dbmd_core/
validate.rs

1//! `validate` — the validation engine.
2//!
3//! The canonical issue-code vocabulary is **SPEC.md § Validation** (that table
4//! is the single source of truth). This module implements exactly those codes
5//! — no more, no fewer. If a code is added here it must be added to the SPEC
6//! table in the same change. The codes are exposed as the [`codes`] constants
7//! so call sites never spell a code as a bare string literal.
8//!
9//! **Two scopes.** [`validate_working_set`] is the loop default: content files
10//! changed since `since`, plus any file whose wiki-links target a changed path.
11//! The changed set and the per-file checks are O(changed); the incoming linkers
12//! are found by a *single* embedded-ripgrep pass over the store for the whole
13//! changed set at once ([`Store::find_links_to_any`], one scan — not a full read
14//! per changed object, and not the parse-the-tree walk `--all` does). On this
15//! changed-set path it never builds the global cross-file state.
16//!
17//! The **one** exception is the vacuous-pass guard: when the change log records
18//! no objects since the cutoff and no explicit `--since` was given (a fresh
19//! store, a missing/empty `log.md`, or external edits never logged), the default
20//! call falls back to a single per-file content sweep ([`Store::walk`]) so an
21//! externally edited or freshly copied store cannot pass validation vacuously.
22//! That fallback is O(store) by design; the O(changed) guarantee is about the
23//! normal post-write path, not this safety net.
24//!
25//! [`validate_all`] is the full SWEEP: it adds the checks that need the global
26//! cross-file state — entity-dedup `DUP_*`, every-index sync, and `log.md`
27//! ordering.
28//!
29//! ## Why this module is self-contained
30//!
31//! Validation does its own frontmatter split, YAML parse, wiki-link scan,
32//! log-header parse, and file walk here, reading only the two public,
33//! caller-populated fields of a [`Store`]: [`Store::root`] and
34//! [`Store::config`] — rather than routing through the sibling modules
35//! ([`crate::parser`], [`crate::store`], [`crate::log`], [`crate::index`]).
36//! Keeping the checks local lets the validator report precise, per-issue
37//! diagnostics (exact codes, file, and context) without coupling its output to
38//! incidental behavior of the shared readers; the public surface and the
39//! emitted issue vocabulary are the contract.
40
41use std::collections::{BTreeMap, BTreeSet, HashMap};
42use std::path::{Component, Path, PathBuf};
43
44use chrono::{DateTime, FixedOffset, NaiveDateTime};
45use serde_norway::Value;
46
47use crate::parser::{Schema, Shape};
48use crate::store::Store;
49
50/// Severity of a validation [`Issue`]. Any [`Severity::Error`] fails validation
51/// (non-zero exit); warnings and info do not.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum Severity {
54    /// Blocks: a hard violation of the format or doctrine.
55    Error,
56    /// A decision point the agent resolves at its discretion.
57    Warning,
58    /// Visibility only; never affects exit status.
59    Info,
60}
61
62/// A single structured validation finding. Agent-primary and machine-parseable
63/// via `--json`; `suggestion` is a deterministic remediation hint the agent
64/// applies without guessing.
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub struct Issue {
67    /// The severity; only [`Severity::Error`] fails validation.
68    pub severity: Severity,
69    /// The structured code, e.g. `"WIKI_LINK_SHORT_FORM"` — one of [`codes`].
70    pub code: &'static str,
71    /// The file the issue is about.
72    pub file: PathBuf,
73    /// The 1-based line, when applicable.
74    pub line: Option<u32>,
75    /// The frontmatter key, when the issue is about a specific field.
76    pub key: Option<String>,
77    /// A human-readable message.
78    pub message: String,
79    /// A deterministic remediation hint, when one exists.
80    pub suggestion: Option<String>,
81    /// Other files involved (e.g. the duplicate partner in a collision).
82    pub related: Vec<PathBuf>,
83}
84
85impl Issue {
86    /// True if this issue fails validation (i.e. its severity is
87    /// [`Severity::Error`]).
88    pub fn is_error(&self) -> bool {
89        matches!(self.severity, Severity::Error)
90    }
91}
92
93/// The canonical validation issue codes — one constant per row of the SPEC.md
94/// § Validation table. Call sites reference these instead of bare strings so
95/// the code and the SPEC table can never silently drift.
96pub mod codes {
97    /// path has no `DB.md`; not a db.md store.
98    pub const NOT_A_STORE: &str = "NOT_A_STORE";
99    /// the store's `DB.md` is not `type: db-md`.
100    pub const DB_MD_BAD_TYPE: &str = "DB_MD_BAD_TYPE";
101    /// the store's `DB.md` frontmatter lacks `scope` or `owner`.
102    pub const DB_MD_MISSING_FIELD: &str = "DB_MD_MISSING_FIELD";
103    /// `DB.md` has an `##` section other than the three recognized ones.
104    pub const DB_MD_UNKNOWN_SECTION: &str = "DB_MD_UNKNOWN_SECTION";
105    /// a `DB.md ## Schemas` field declaration is malformed (empty or duplicate
106    /// field name) or carries an unrecognized modifier.
107    pub const DB_MD_SCHEMA_FIELD: &str = "DB_MD_SCHEMA_FIELD";
108    /// content file has no `type:`.
109    pub const FM_MISSING_TYPE: &str = "FM_MISSING_TYPE";
110    /// content file has no `created:`.
111    pub const FM_MISSING_CREATED: &str = "FM_MISSING_CREATED";
112    /// content file has no `updated:`.
113    pub const FM_MISSING_UPDATED: &str = "FM_MISSING_UPDATED";
114    /// content file can't be read (not valid UTF-8, or an I/O error).
115    pub const FM_UNREADABLE: &str = "FM_UNREADABLE";
116    /// frontmatter block isn't valid YAML.
117    pub const FM_MALFORMED_YAML: &str = "FM_MALFORMED_YAML";
118    /// `created` or `updated` isn't ISO-8601.
119    pub const FM_BAD_TIMESTAMP: &str = "FM_BAD_TIMESTAMP";
120    /// `meta-type` is present but not one of fact / operational / conclusion.
121    pub const FM_BAD_META_TYPE: &str = "FM_BAD_META_TYPE";
122    /// content file has no `summary`.
123    pub const SUMMARY_MISSING: &str = "SUMMARY_MISSING";
124    /// `summary` present but empty.
125    pub const SUMMARY_EMPTY: &str = "SUMMARY_EMPTY";
126    /// `summary` contains newlines.
127    pub const SUMMARY_MULTILINE: &str = "SUMMARY_MULTILINE";
128    /// `summary` > 200 chars.
129    pub const SUMMARY_TOO_LONG: &str = "SUMMARY_TOO_LONG";
130    /// wiki-link target isn't a full store-relative path.
131    pub const WIKI_LINK_SHORT_FORM: &str = "WIKI_LINK_SHORT_FORM";
132    /// wiki-link target file doesn't exist.
133    pub const WIKI_LINK_BROKEN: &str = "WIKI_LINK_BROKEN";
134    /// wiki-link target matches multiple files (defensive).
135    pub const WIKI_LINK_AMBIGUOUS: &str = "WIKI_LINK_AMBIGUOUS";
136    /// wiki-link target carries a `.md` extension — drop it.
137    pub const WIKI_LINK_HAS_EXTENSION: &str = "WIKI_LINK_HAS_EXTENSION";
138    /// frontmatter list uses inline `[[[a]], [[b]]]` — use block form.
139    pub const WIKI_LINK_FLOW_FORM_LIST: &str = "WIKI_LINK_FLOW_FORM_LIST";
140    /// two files declare the same explicit `id`.
141    pub const DUP_ID: &str = "DUP_ID";
142    /// two records of a type collide on a `DB.md ## Schemas` `unique:` key.
143    pub const DUP_UNIQUE_KEY: &str = "DUP_UNIQUE_KEY";
144    /// a `DB.md` schema requires a field that's absent.
145    pub const SCHEMA_MISSING_REQUIRED: &str = "SCHEMA_MISSING_REQUIRED";
146    /// a value doesn't match the schema's shape modifier.
147    pub const SCHEMA_SHAPE_MISMATCH: &str = "SCHEMA_SHAPE_MISMATCH";
148    /// a `link to <prefix>/` field has a plain or wrong-prefix value.
149    pub const SCHEMA_LINK_PREFIX_MISMATCH: &str = "SCHEMA_LINK_PREFIX_MISMATCH";
150    /// a value isn't in the schema's `enum`.
151    pub const SCHEMA_ENUM_VIOLATION: &str = "SCHEMA_ENUM_VIOLATION";
152    /// a write was attempted on a `### Frozen pages` path (write-time).
153    pub const POLICY_FROZEN_PAGE: &str = "POLICY_FROZEN_PAGE";
154    /// a file with an `### Ignored types` type exists.
155    pub const POLICY_IGNORED_TYPE_PRESENT: &str = "POLICY_IGNORED_TYPE_PRESENT";
156    /// a `meta-type: conclusion` record derives from an ignored-type record.
157    pub const POLICY_IGNORED_TYPE_DERIVED: &str = "POLICY_IGNORED_TYPE_DERIVED";
158    /// a `log.md` entry header timestamp is unparseable.
159    pub const LOG_BAD_TIMESTAMP: &str = "LOG_BAD_TIMESTAMP";
160    /// a `log.md` entry kind isn't recognized.
161    pub const LOG_UNKNOWN_KIND: &str = "LOG_UNKNOWN_KIND";
162    /// `log.md` entries aren't in non-decreasing time order (possible rewrite).
163    pub const LOG_OUT_OF_ORDER: &str = "LOG_OUT_OF_ORDER";
164    /// a non-empty canonical folder lacks `index.md`.
165    pub const INDEX_MISSING: &str = "INDEX_MISSING";
166    /// an `index.md` lists a file that no longer exists.
167    pub const INDEX_STALE_ENTRY: &str = "INDEX_STALE_ENTRY";
168    /// a file isn't listed in its folder's `index.md`.
169    pub const INDEX_MISSING_ENTRY: &str = "INDEX_MISSING_ENTRY";
170    /// an `index.md` sits in an empty / non-canonical folder.
171    pub const INDEX_ORPHAN: &str = "INDEX_ORPHAN";
172    /// an index's `scope:` doesn't match its filesystem location.
173    pub const INDEX_WRONG_SCOPE: &str = "INDEX_WRONG_SCOPE";
174    /// an index entry's text doesn't match the target file's `summary`.
175    pub const INDEX_SUMMARY_MISMATCH: &str = "INDEX_SUMMARY_MISMATCH";
176    /// a type-folder's `index.jsonl` twin is missing.
177    pub const INDEX_JSONL_MISSING: &str = "INDEX_JSONL_MISSING";
178    /// a file isn't in the `index.jsonl`, or a jsonl record points at a missing
179    /// file.
180    pub const INDEX_JSONL_DESYNC: &str = "INDEX_JSONL_DESYNC";
181    /// a `index.jsonl` record's fields don't match the file's frontmatter.
182    pub const INDEX_JSONL_STALE: &str = "INDEX_JSONL_STALE";
183    /// `tags` isn't a flat YAML list of short scalar labels.
184    pub const TAGS_MALFORMED: &str = "TAGS_MALFORMED";
185    /// a line in `assets.jsonl` is not a valid asset record.
186    pub const ASSET_MANIFEST_MALFORMED: &str = "ASSET_MANIFEST_MALFORMED";
187    /// a content file references an `asset`/`assets` path with no record in
188    /// `assets.jsonl` (run `dbmd assets scan`).
189    pub const ASSET_UNDECLARED: &str = "ASSET_UNDECLARED";
190    /// an `assets.jsonl` record names a wrapper file that does not exist.
191    pub const ASSET_WRAPPER_BROKEN: &str = "ASSET_WRAPPER_BROKEN";
192    /// an `assets.jsonl` record's path is referenced by no wrapper.
193    pub const ASSET_MANIFEST_ORPHAN: &str = "ASSET_MANIFEST_ORPHAN";
194    /// an `asset`/`assets` path points at a tracked markdown content file.
195    pub const ASSET_PATH_IS_CONTENT: &str = "ASSET_PATH_IS_CONTENT";
196}
197
198/// The SPEC's `summary` length bound (chars). Over it → `SUMMARY_TOO_LONG`.
199const MAX_SUMMARY_LEN: usize = 200;
200
201/// Recognized `log.md` entry kinds (SPEC § `log.md`). Anything else →
202/// `LOG_UNKNOWN_KIND` (warning, not error).
203const RECOGNIZED_LOG_KINDS: &[&str] = &[
204    "ingest",
205    "create",
206    "update",
207    "delete",
208    "rename",
209    "link",
210    "validate",
211    "index-rebuild",
212    "contradiction",
213];
214
215// ─────────────────────────────────────────────────────────────────────────────
216//  Public entrypoints
217// ─────────────────────────────────────────────────────────────────────────────
218
219/// **Loop default.** Validate the working set: content files changed since
220/// `since` (default: the last `validate` entry in `log.md`), plus any file whose
221/// wiki-links target a changed/renamed/removed path. Per-file *checks* only —
222/// none of the cross-file global passes (entity-dedup, every-index sync,
223/// `log.md` ordering) that `--all` adds. If the default call finds no logged
224/// changed objects, it falls back to a per-file content sweep so an externally
225/// edited or freshly copied store cannot pass vacuously.
226///
227/// **Cost.** The changed set is read from `log.md` — O(changed): every
228/// `create`/`update`/`ingest`/`rename`/`delete`/`link` entry newer than the
229/// cutoff names an object. Per-file frontmatter + link-doctrine checks then run
230/// over that set plus its incoming linkers — also O(changed). The one part that
231/// is *not* O(changed) is discovering those incoming linkers: a link to a
232/// changed path can live in the body or a typed frontmatter field of any file,
233/// so it is found by a **single** embedded-ripgrep pass over the store
234/// ([`Store::find_links_to_any`]) for the whole changed set at once — one store
235/// scan, flat in the changed-set size. (It was previously a full store read
236/// *per* changed object — `O(changed × store)`; that is the blow-up this path
237/// no longer pays.) The unavoidable single content scan is the same shape as
238/// free-text `dbmd search`; the sidecar `links` projection can't replace it
239/// because it omits body/typed-field edges.
240pub fn validate_working_set(
241    store: &Store,
242    since: Option<DateTime<FixedOffset>>,
243) -> crate::Result<Vec<Issue>> {
244    if !store_marker_present(store) {
245        return Ok(vec![not_a_store_issue(store)]);
246    }
247
248    let cutoff = match since {
249        Some(ts) => Some(ts),
250        None => last_validate_at(store),
251    };
252
253    // 1. Changed objects, straight from the log (O(changed) — never a walk).
254    let changed = changed_objects_since(store, cutoff);
255    if changed.is_empty() && since.is_none() {
256        return validate_content_sweep(store);
257    }
258
259    // 2. Add every file with an incoming wiki-link to a changed/renamed/removed
260    //    path (the linker may now be stale even though it didn't change). The
261    //    incoming-linker scan is `Store::find_links_to_any` — ONE embedded-ripgrep
262    //    pass over the store for the WHOLE changed set (one `.md` walk, one
263    //    presence-only/early-exit scan per file), not one walk per object. This
264    //    is the fix for the `O(changed × store)` blow-up that calling
265    //    `find_links_to` in a loop produced (a full store read per changed
266    //    object); the cost is now a single store scan regardless of how many
267    //    objects changed. A returned self-link is harmlessly deduped by the set
268    //    (the object is already inserted below).
269    let changed_targets: Vec<PathBuf> = changed.iter().cloned().collect();
270    let mut working: BTreeSet<PathBuf> = changed;
271    for linker in store.find_links_to_any(&changed_targets)? {
272        working.insert(linker);
273    }
274
275    let mut issues = Vec::new();
276    for rel in &working {
277        let abs = store.root.join(rel);
278        // A changed path can be a *deletion* — skip files that no longer exist;
279        // the incoming-linker scan above already flagged links into them.
280        if !abs.is_file() {
281            continue;
282        }
283        // `None` basename index: the working-set pass does not build the
284        // store-wide basename map (that is a `--all`-only structure), so a bare
285        // short-form target is reported as plain `WIKI_LINK_SHORT_FORM` and the
286        // `--all` sweep does the ambiguity upgrade.
287        check_content_file(store, rel, &abs, None, &mut issues);
288    }
289    issues.sort_by(issue_order);
290    Ok(issues)
291}
292
293fn validate_content_sweep(store: &Store) -> crate::Result<Vec<Issue>> {
294    let mut issues = Vec::new();
295    for rel in store.walk()? {
296        let abs = store.root.join(&rel);
297        check_content_file(store, &rel, &abs, None, &mut issues);
298    }
299    issues.sort_by(issue_order);
300    Ok(issues)
301}
302
303/// **Full SWEEP (O(store)).** Validate every file, every link, and every index,
304/// adding the cross-file checks that need global state: entity-dedup `DUP_*`,
305/// every-index sync (md + jsonl), and `log.md` ordering. CI / recovery, not the
306/// loop.
307pub fn validate_all(store: &Store) -> crate::Result<Vec<Issue>> {
308    if !store_marker_present(store) {
309        return Ok(vec![not_a_store_issue(store)]);
310    }
311
312    let mut issues = Vec::new();
313
314    // Store-identity file: `DB.md` shape (type / required fields / section
315    // headers). A single root file, checked once in the sweep — not a content
316    // file (it carries no `summary`), so it is not part of `walk_content_files`.
317    check_db_md(store, &mut issues);
318
319    let files = walk_content_files(&store.root);
320
321    // The basename index makes the short-form wiki-link check able to upgrade a
322    // bare-basename target to `WIKI_LINK_AMBIGUOUS` when it matches ≥2 files.
323    // Built once from the already-gathered sweep list (no extra walk); only the
324    // `--all` path has it (the working-set path stays O(changed)).
325    let basenames = build_basename_index(&files);
326
327    // Per-file checks over the whole store.
328    let mut parsed: Vec<(PathBuf, Parsed)> = Vec::new();
329    for rel in &files {
330        let abs = store.root.join(rel);
331        if let Some(p) = check_content_file(store, rel, &abs, Some(&basenames), &mut issues) {
332            parsed.push((rel.clone(), p));
333        }
334    }
335
336    // Cross-file: hard `id` + soft schema-declared `unique:` dedup collisions.
337    check_duplicates(store, &parsed, &mut issues);
338
339    // Cross-file: hierarchical index.md + index.jsonl sync.
340    check_indexes(store, &files, &mut issues);
341
342    // Cross-file: log.md well-formedness + ordering.
343    check_log(store, &mut issues);
344
345    // Cross-file: asset manifest (assets.jsonl) integrity against wrapper
346    // declarations. Text-only, no hashing, no byte reads — a SWEEP check like
347    // dedup. Byte presence/correctness is `dbmd assets verify`, not validate, so
348    // a fresh clone with no restored bytes still passes here.
349    check_assets(store, &parsed, &mut issues);
350
351    issues.sort_by(issue_order);
352    Ok(issues)
353}
354
355// ─────────────────────────────────────────────────────────────────────────────
356//  Per-file content checks (shared by both scopes)
357// ─────────────────────────────────────────────────────────────────────────────
358
359/// What `validate_all`'s cross-file pass needs from a per-file parse: the
360/// parsed YAML mapping (for dedup keys) and the raw frontmatter text (for
361/// text-based wiki-link extraction). The body and fence-line are consumed
362/// inline during the per-file pass and not carried here.
363struct Parsed {
364    /// The parsed top-level YAML mapping, keyed by string. `None` ⇒ malformed
365    /// YAML (a `FM_MALFORMED_YAML` was already emitted).
366    fm: Option<BTreeMap<String, Value>>,
367    /// The raw frontmatter YAML text (between the fences) — the source for
368    /// text-based wiki-link extraction in dedup.
369    fm_yaml: String,
370}
371
372/// Run every per-file check on one content file, pushing issues. Returns the
373/// parsed file so `validate_all` can reuse it for cross-file checks. Returns
374/// `None` only when the file is unreadable or has no frontmatter block at all
375/// (which for a content file is itself reported).
376fn check_content_file(
377    store: &Store,
378    rel: &Path,
379    abs: &Path,
380    basenames: Option<&BasenameIndex>,
381    issues: &mut Vec<Issue>,
382) -> Option<Parsed> {
383    let text = match std::fs::read_to_string(abs) {
384        Ok(t) => t,
385        Err(e) => {
386            // The file exists in the walk but can't be read as UTF-8 text
387            // (invalid bytes) or hit an I/O error. Returning `None` silently
388            // here let a store whose only content file was binary garbage pass
389            // `dbmd validate` with exit 0 — the exact vacuous-pass the fallback
390            // sweep exists to prevent. Report it so the agent gets an actionable
391            // diagnostic naming the unreadable file (and `index rebuild`, which
392            // hard-fails on the same file, isn't the only signal).
393            let detail = if e.kind() == std::io::ErrorKind::InvalidData {
394                "file is not valid UTF-8 text".to_string()
395            } else {
396                format!("file could not be read: {e}")
397            };
398            push(
399                issues,
400                Severity::Error,
401                codes::FM_UNREADABLE,
402                rel,
403                None,
404                None,
405                format!("content file is unreadable: {detail}"),
406                Some(
407                    "save the file as UTF-8 text, or remove it if it isn't a db.md content file"
408                        .into(),
409                ),
410                vec![],
411            );
412            return None;
413        }
414    };
415
416    let is_content = is_content_file(rel);
417
418    let (fm_yaml, body, fm_end_line) = match split_frontmatter(&text) {
419        Some(split) => split,
420        None => {
421            // No frontmatter at all. For a content file that means there's no
422            // `type:` and no `summary:` — report both the way a parsed-but-empty
423            // file would, so the agent gets the same actionable codes.
424            if is_content {
425                push(
426                    issues,
427                    Severity::Error,
428                    codes::FM_MISSING_TYPE,
429                    rel,
430                    None,
431                    Some("type".into()),
432                    "content file has no frontmatter `type:`".into(),
433                    Some("add a YAML frontmatter block with `type:`".into()),
434                    vec![],
435                );
436                push(
437                    issues,
438                    Severity::Error,
439                    codes::SUMMARY_MISSING,
440                    rel,
441                    None,
442                    Some("summary".into()),
443                    "content file has no `summary`".into(),
444                    Some("run `dbmd fm init`".into()),
445                    vec![],
446                );
447            }
448            return None;
449        }
450    };
451
452    // Parse the YAML block.
453    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
454        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
455        // An empty frontmatter block parses as Null; treat as an empty mapping.
456        Ok(Value::Null) => Some(BTreeMap::new()),
457        Ok(_) => {
458            // A scalar / sequence at the top level isn't a frontmatter mapping.
459            // Anchor to line 1 — the frontmatter block's opening `---`; the whole
460            // block is opaque, so there is no single offending field line.
461            push(
462                issues,
463                Severity::Error,
464                codes::FM_MALFORMED_YAML,
465                rel,
466                Some(1),
467                None,
468                "frontmatter is not a YAML mapping".into(),
469                Some("repair the frontmatter YAML mapping, then rerun `dbmd validate`".into()),
470                vec![],
471            );
472            None
473        }
474        Err(e) => {
475            // Anchor to line 1 (the opening `---`): an unparseable block has no
476            // single offending field line; the agent re-reads the whole block.
477            push(
478                issues,
479                Severity::Error,
480                codes::FM_MALFORMED_YAML,
481                rel,
482                Some(1),
483                None,
484                format!("frontmatter block isn't valid YAML: {e}"),
485                Some("repair the frontmatter YAML block, then rerun `dbmd validate`".into()),
486                vec![],
487            );
488            None
489        }
490    };
491
492    if let Some(map) = &fm {
493        // The detailed frontmatter checks only run when the YAML parsed.
494        check_frontmatter(store, rel, map, &fm_yaml, basenames, issues, is_content);
495    }
496
497    // Wiki-link doctrine checks run on the body of content files (and on
498    // `index.md` files, whose entries are wiki-links too). They are NOT run on
499    // the root append-only meta files `log.md`/`DB.md`: those reach this
500    // function only via the working-set incoming-linker scan (`walk_all_md`
501    // includes them), and `validate --all` never link-checks their bodies
502    // (`walk_content_files` skips them; `check_log`/`check_db_md` do no body
503    // link checks). Without this guard the two scopes disagree — a historical
504    // `[[deleted-page]]` mention in a `log.md` note, or a `[[…]]` in DB.md's
505    // `## Agent instructions`, is flagged `WIKI_LINK_BROKEN` by the default
506    // working set but is clean under `--all`. The log is append-only by spec, so
507    // the suggested "fix the link" remedy can't even be applied.
508    if !is_root_meta_file(rel) {
509        check_body_wiki_links(store, rel, &body, fm_end_line, basenames, issues);
510    }
511
512    Some(Parsed { fm, fm_yaml })
513}
514
515/// All frontmatter-level checks for a content file with valid YAML.
516fn check_frontmatter(
517    store: &Store,
518    rel: &Path,
519    fm: &BTreeMap<String, Value>,
520    fm_yaml: &str,
521    basenames: Option<&BasenameIndex>,
522    issues: &mut Vec<Issue>,
523    is_content: bool,
524) {
525    let type_ = fm.get("type").and_then(scalar_string);
526
527    // ── type ────────────────────────────────────────────────────────────────
528    if is_content && type_.is_none() {
529        push(
530            issues,
531            Severity::Error,
532            codes::FM_MISSING_TYPE,
533            rel,
534            fm_key_line_or_top(fm_yaml, "type"),
535            Some("type".into()),
536            "content file has no `type:`".into(),
537            Some("add a `type:` field (e.g. `type: contact`)".into()),
538            vec![],
539        );
540    }
541
542    // ── meta-type (records-only epistemic class; closed enum) ─────────────────
543    // Present-but-out-of-enum is an error; absent is fine (effective default
544    // `fact`). Sources don't normally carry one, but validating the value when
545    // present is layer-agnostic and harmless.
546    if is_content {
547        // Branch on the raw value, NOT `and_then(scalar_string)`. Pre-filtering
548        // through `scalar_string` made a list/mapping value (which returns `None`)
549        // short-circuit the whole check, so a structurally-wrong `meta-type`
550        // slipped through clean AND was silently reclassified as the default
551        // `fact` by the rest of the toolkit. Absent or explicit-`null` is fine
552        // (effective default `fact`); a present non-null value must be a scalar in
553        // the closed enum. This mirrors the sibling timestamp check below, which
554        // was already hardened against the same non-scalar escape.
555        if let Some(v) = fm.get("meta-type").filter(|v| !v.is_null()) {
556            match scalar_string(v) {
557                Some(mt) if matches!(mt.as_str(), "fact" | "operational" | "conclusion") => {}
558                Some(mt) => push(
559                    issues,
560                    Severity::Error,
561                    codes::FM_BAD_META_TYPE,
562                    rel,
563                    fm_key_line_or_top(fm_yaml, "meta-type"),
564                    Some("meta-type".into()),
565                    format!("`meta-type: {mt}` is not one of fact / operational / conclusion"),
566                    Some(
567                        "use one of: fact, operational, conclusion (or omit for the default `fact`)"
568                            .into(),
569                    ),
570                    vec![],
571                ),
572                None => push(
573                    issues,
574                    Severity::Error,
575                    codes::FM_BAD_META_TYPE,
576                    rel,
577                    fm_key_line_or_top(fm_yaml, "meta-type"),
578                    Some("meta-type".into()),
579                    "`meta-type` is not one of fact / operational / conclusion: expected a scalar \
580                     string, found a list or mapping"
581                        .to_string(),
582                    Some(
583                        "use one of: fact, operational, conclusion (or omit for the default `fact`)"
584                            .into(),
585                    ),
586                    vec![],
587                ),
588            }
589        }
590    }
591
592    // ── summary (universal on content files) ──────────────────────────────────
593    if is_content {
594        check_summary(rel, fm, fm_yaml, issues);
595    }
596
597    // ── timestamps: created / updated ─────────────────────────────────────────
598    // The `created`/`updated` contract is content-file-only; meta files
599    // (`DB.md`, `log.md`, index twins) legitimately carry no such timestamps.
600    if is_content {
601        for (key, missing_code) in [
602            ("created", codes::FM_MISSING_CREATED),
603            ("updated", codes::FM_MISSING_UPDATED),
604        ] {
605            // A key that is absent, or present-but-`null`, has *no* timestamp →
606            // `FM_MISSING_*`. The toolkit's parser also treats a null value as
607            // "no timestamp", so a null `created:` must read as missing, not
608            // silently pass.
609            let value = fm.get(key);
610            let missing = value.is_none() || value.is_some_and(Value::is_null);
611            if missing {
612                push(
613                    issues,
614                    Severity::Error,
615                    missing_code,
616                    rel,
617                    fm_key_line_or_top(fm_yaml, key),
618                    Some(key.into()),
619                    format!("content file has no `{key}:` timestamp"),
620                    Some(format!(
621                        "set `{key}` to an RFC3339 timestamp, e.g. 2026-05-27T08:00:00-07:00"
622                    )),
623                    vec![],
624                );
625            } else if let Some(v) = value {
626                // Present and non-null. A scalar is checked for ISO-8601; a
627                // sequence/mapping is not a timestamp string at all and so
628                // cannot be ISO-8601 → `FM_BAD_TIMESTAMP` (it must not slip
629                // through the way it did when `scalar_string` returned `None`
630                // and the branch silently no-oped).
631                match scalar_string(v) {
632                    Some(s) if is_iso8601(&s) => {}
633                    Some(s) => push(
634                        issues,
635                        Severity::Error,
636                        codes::FM_BAD_TIMESTAMP,
637                        rel,
638                        fm_key_line(fm_yaml, key),
639                        Some(key.into()),
640                        format!("`{key}` is not ISO-8601: {s:?}"),
641                        Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
642                        vec![],
643                    ),
644                    None => push(
645                        issues,
646                        Severity::Error,
647                        codes::FM_BAD_TIMESTAMP,
648                        rel,
649                        fm_key_line(fm_yaml, key),
650                        Some(key.into()),
651                        format!(
652                            "`{key}` is not ISO-8601: expected a timestamp string, found a list or mapping"
653                        ),
654                        Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
655                        vec![],
656                    ),
657                }
658            }
659        }
660    }
661    // ── tags shape ────────────────────────────────────────────────────────────
662    if let Some(tags) = fm.get("tags") {
663        if !is_flat_scalar_list(tags) {
664            push(
665                issues,
666                Severity::Warning,
667                codes::TAGS_MALFORMED,
668                rel,
669                fm_key_line(fm_yaml, "tags"),
670                Some("tags".into()),
671                "`tags` must be a flat YAML list of short scalar labels".into(),
672                Some("use block form: one `- <tag>` per line".into()),
673                vec![],
674            );
675        }
676    }
677
678    // ── inline flow-form wiki-link lists in frontmatter ──────────────────────
679    for key in detect_flow_form_link_lists(fm_yaml) {
680        push(
681            issues,
682            Severity::Error,
683            codes::WIKI_LINK_FLOW_FORM_LIST,
684            rel,
685            fm_key_line(fm_yaml, &key),
686            Some(key.clone()),
687            format!("`{key}` uses inline flow form `[[[a]], [[b]]]`"),
688            Some("use YAML block-sequence form: one `- [[...]]` per line".into()),
689            vec![],
690        );
691    }
692
693    // ── frontmatter wiki-link fields: doctrine + integrity ───────────────────
694    // Skip keys that have an explicit `link to` schema spec — those are checked
695    // (with prefix enforcement) in `check_schema`, and double-reporting the same
696    // link via two paths would be noise.
697    let schema_link_keys: BTreeSet<String> =
698        effective_schema(store, type_.as_deref().unwrap_or(""))
699            .map(|s| {
700                s.fields
701                    .iter()
702                    .filter(|f| f.link_prefix.is_some())
703                    .map(|f| f.name.clone())
704                    .collect()
705            })
706            .unwrap_or_default();
707    for (key, link) in frontmatter_link_fields_text(fm_yaml, 2) {
708        if schema_link_keys.contains(&key) {
709            continue;
710        }
711        check_wiki_link(
712            store,
713            rel,
714            &link,
715            Some(link.line),
716            Some(&key),
717            basenames,
718            issues,
719        );
720    }
721
722    // ── policies: ignored types ──────────────────────────────────────────────
723    if let Some(t) = &type_ {
724        if store.config.ignored_types.iter().any(|it| it == t) {
725            push(
726                issues,
727                Severity::Info,
728                codes::POLICY_IGNORED_TYPE_PRESENT,
729                rel,
730                fm_key_line(fm_yaml, "type"),
731                Some("type".into()),
732                format!("file has ignored type `{t}` (per DB.md ## Policies)"),
733                Some(
734                    "change the `type`, or remove it from DB.md `### Ignored types` if it should be managed"
735                        .into(),
736                ),
737                // The policy source: `DB.md` declares the ignored type.
738                vec![PathBuf::from("DB.md")],
739            );
740        }
741        // A conclusion record (`meta-type: conclusion`) deriving from an
742        // ignored-type record → warning. The decision lives in the shared
743        // `derived_from_ignored_type` entry point; this side only supplies the
744        // `derived_from` targets (with their line, which the issue carries) and
745        // renders the finding.
746        let meta_type = fm
747            .get("meta-type")
748            .and_then(scalar_string)
749            .unwrap_or_else(|| "fact".to_string());
750        for link in frontmatter_links_for_key(fm_yaml, "derived_from", 2) {
751            if let Some(hit) =
752                derived_from_ignored_type(store, &meta_type, std::iter::once(link.target.as_str()))
753            {
754                push(
755                    issues,
756                    Severity::Warning,
757                    codes::POLICY_IGNORED_TYPE_DERIVED,
758                    rel,
759                    Some(link.line),
760                    Some("derived_from".into()),
761                    format!(
762                        "conclusion record derives from ignored-type record `{}` (type `{}`)",
763                        hit.target, hit.target_type
764                    ),
765                    Some(
766                        "drop this `derived_from` link, or remove the target type from DB.md `### Ignored types`"
767                            .into(),
768                    ),
769                    // The ignored-type source record, plus `DB.md` (the policy
770                    // source that lists the ignored type).
771                    vec![
772                        PathBuf::from(format!("{}.md", hit.target)),
773                        PathBuf::from("DB.md"),
774                    ],
775                );
776            }
777        }
778    }
779
780    // ── schema enforcement: DB.md ## Schemas (the only schema source) ─────────
781    if let Some(t) = &type_ {
782        if let Some(schema) = effective_schema(store, t) {
783            check_schema(store, rel, fm, fm_yaml, &schema, issues);
784        }
785    }
786}
787
788/// `summary` rules: required, non-empty, single-line, ≤ 200 chars.
789fn check_summary(rel: &Path, fm: &BTreeMap<String, Value>, fm_yaml: &str, issues: &mut Vec<Issue>) {
790    let line = fm_key_line(fm_yaml, "summary");
791    match fm.get("summary") {
792        None => push(
793            issues,
794            Severity::Error,
795            codes::SUMMARY_MISSING,
796            rel,
797            // A missing `summary` key has no line of its own → anchor to the
798            // frontmatter block top (line 1), the EXPECTED field-absence rule.
799            fm_key_line_or_top(fm_yaml, "summary"),
800            Some("summary".into()),
801            "content file has no `summary`".into(),
802            Some("run `dbmd fm init`".into()),
803            vec![],
804        ),
805        Some(v) => {
806            let s = scalar_string(v).unwrap_or_default();
807            if s.trim().is_empty() {
808                push(
809                    issues,
810                    Severity::Error,
811                    codes::SUMMARY_EMPTY,
812                    rel,
813                    line,
814                    Some("summary".into()),
815                    "`summary` is present but empty".into(),
816                    Some("write a one-line summary, or run `dbmd fm init`".into()),
817                    vec![],
818                );
819            } else if s.contains('\n') {
820                push(
821                    issues,
822                    Severity::Error,
823                    codes::SUMMARY_MULTILINE,
824                    rel,
825                    line,
826                    Some("summary".into()),
827                    "`summary` must be one line (contains a newline)".into(),
828                    Some("collapse the summary to a single line".into()),
829                    vec![],
830                );
831            } else if s.chars().count() > MAX_SUMMARY_LEN {
832                push(
833                    issues,
834                    Severity::Warning,
835                    codes::SUMMARY_TOO_LONG,
836                    rel,
837                    line,
838                    Some("summary".into()),
839                    format!(
840                        "`summary` is {} chars (> {MAX_SUMMARY_LEN})",
841                        s.chars().count()
842                    ),
843                    Some(format!("trim the summary to ≤ {MAX_SUMMARY_LEN} chars")),
844                    vec![],
845                );
846            }
847        }
848    }
849}
850
851/// Wiki-link checks for a body. Per-link doctrine (`WIKI_LINK_*`).
852fn check_body_wiki_links(
853    store: &Store,
854    rel: &Path,
855    body: &str,
856    fm_end_line: u32,
857    basenames: Option<&BasenameIndex>,
858    issues: &mut Vec<Issue>,
859) {
860    for link in extract_wiki_links(body) {
861        // Body lines are offset past the frontmatter block. `link.line` is
862        // 1-based within `body`; the body starts at `fm_end_line + 1`.
863        let abs_line = fm_end_line + link.line;
864        check_wiki_link(store, rel, &link, Some(abs_line), None, basenames, issues);
865    }
866}
867
868/// A store-wide map from a file's bare basename (its stem, no `.md`) to every
869/// store-relative path carrying that basename. Built once per `validate --all`
870/// sweep so the short-form wiki-link check can distinguish a merely short-form
871/// target (`WIKI_LINK_SHORT_FORM`) from one that is *ambiguous* because the bare
872/// basename matches two or more files (`WIKI_LINK_AMBIGUOUS`, the defensive
873/// code). `None` in the working-set path — that loop is O(changed) and never
874/// walks the store, so it reports the plain short-form error without the scan.
875type BasenameIndex = HashMap<String, Vec<PathBuf>>;
876
877/// Build the [`BasenameIndex`] from the swept file list (already gathered by
878/// `validate_all`; no extra walk).
879fn build_basename_index(files: &[PathBuf]) -> BasenameIndex {
880    let mut idx: BasenameIndex = HashMap::new();
881    for rel in files {
882        if let Some(stem) = rel.file_stem().and_then(|s| s.to_str()) {
883            idx.entry(stem.to_string()).or_default().push(rel.clone());
884        }
885    }
886    idx
887}
888
889/// The shared per-wiki-link doctrine + integrity check used by both body links
890/// and frontmatter link-fields. `basenames` is `Some` only in the `--all`
891/// sweep, where a no-slash short-form target is upgraded to `WIKI_LINK_AMBIGUOUS`
892/// when its bare basename matches ≥2 files.
893fn check_wiki_link(
894    store: &Store,
895    rel: &Path,
896    link: &Link,
897    line: Option<u32>,
898    key: Option<&str>,
899    basenames: Option<&BasenameIndex>,
900    issues: &mut Vec<Issue>,
901) {
902    let bare = link.target.trim_end_matches(".md");
903
904    // Short-form: not a full store-relative path (no `/`, or first segment isn't
905    // a known layer).
906    if !is_full_store_path(bare) {
907        // Ambiguous (defensive) takes precedence over plain short-form when the
908        // target is a bare basename (no `/`) that matches ≥2 files in the store.
909        // Only computable in the sweep (where `basenames` is populated); the
910        // working-set path falls through to the plain short-form error.
911        if !bare.contains('/') {
912            if let Some(idx) = basenames {
913                if let Some(matches) = idx.get(bare) {
914                    if matches.len() >= 2 {
915                        let mut related = matches.clone();
916                        related.sort();
917                        push(
918                            issues,
919                            Severity::Error,
920                            codes::WIKI_LINK_AMBIGUOUS,
921                            rel,
922                            line,
923                            key.map(str::to_string),
924                            format!(
925                                "short-form wiki-link `[[{}]]` matches multiple files",
926                                link.target
927                            ),
928                            Some("use the full store-relative path to disambiguate".into()),
929                            related,
930                        );
931                        return;
932                    }
933                }
934            }
935        }
936        push(
937            issues,
938            Severity::Error,
939            codes::WIKI_LINK_SHORT_FORM,
940            rel,
941            line,
942            key.map(str::to_string),
943            format!(
944                "wiki-link `[[{}]]` is not a full store-relative path",
945                link.target
946            ),
947            short_form_suggestion(bare),
948            vec![],
949        );
950        // Don't also report broken; the agent must fix the form first.
951        return;
952    }
953
954    // `.md` extension → warning, then still check existence.
955    if link.target.ends_with(".md") {
956        push(
957            issues,
958            Severity::Warning,
959            codes::WIKI_LINK_HAS_EXTENSION,
960            rel,
961            line,
962            key.map(str::to_string),
963            format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
964            Some(format!("drop the extension: [[{bare}]]")),
965            vec![],
966        );
967    }
968
969    // Broken: target file doesn't exist (O(1) stat). Resolve the target the
970    // same way the graph engine does — the literal path first (so a link to a
971    // raw `.eml`/`.pdf` source kept verbatim under `sources/` resolves), then
972    // the `.md`-appended path.
973    match resolve_wiki_target(store, bare) {
974        TargetResolution::Exists => {}
975        TargetResolution::Missing => push(
976            issues,
977            Severity::Error,
978            codes::WIKI_LINK_BROKEN,
979            rel,
980            line,
981            key.map(str::to_string),
982            format!("wiki-link target `{bare}` doesn't exist"),
983            Some(format!(
984                "create `{bare}.md`, or point the link at an existing file"
985            )),
986            vec![],
987        ),
988        TargetResolution::Unsafe => push(
989            issues,
990            Severity::Error,
991            codes::WIKI_LINK_BROKEN,
992            rel,
993            line,
994            key.map(str::to_string),
995            format!("wiki-link target `{bare}` is not a safe store-relative path"),
996            Some("use a full store-relative path under sources/ or records/".into()),
997            vec![],
998        ),
999    }
1000}
1001
1002// ─────────────────────────────────────────────────────────────────────────────
1003//  Schema enforcement (user-declared DB.md ## Schemas — the only source)
1004// ─────────────────────────────────────────────────────────────────────────────
1005
1006/// The effective schema for a type: the store's explicit `DB.md ## Schemas`
1007/// block, or `None`. This is the **only** source of schema enforcement — the
1008/// toolkit ships no implicit or built-in per-type schema (SPEC § Schemas). A
1009/// store that wants its `contact` / `expense` / etc. fields enforced declares
1010/// them in `## Schemas`; the example schema pack in SPEC § Example types is a
1011/// copy-in starting point.
1012fn effective_schema(store: &Store, type_: &str) -> Option<Schema> {
1013    store.config.schemas.get(type_).cloned()
1014}
1015
1016/// Validate a file's frontmatter against a schema's [`FieldSpec`]s.
1017fn check_schema(
1018    store: &Store,
1019    rel: &Path,
1020    fm: &BTreeMap<String, Value>,
1021    fm_yaml: &str,
1022    schema: &Schema,
1023    issues: &mut Vec<Issue>,
1024) {
1025    for spec in &schema.fields {
1026        let present = fm.get(&spec.name);
1027        let line = fm_key_line(fm_yaml, &spec.name);
1028
1029        // Required. "Empty" means: the key is absent, or its value carries no
1030        // content — a YAML `null` (`name:`), an empty list (`name: []`), an
1031        // empty mapping (`name: {}`), or a blank/whitespace-only scalar
1032        // (`name: ""`). `scalar_string` returns `None` for null/list/mapping, so
1033        // a bare `.unwrap_or(false)` wrongly treated those as non-empty and let
1034        // a required field with a null or empty-collection value pass silently;
1035        // route them through `is_empty_value` instead.
1036        let is_empty = match present {
1037            None => true,
1038            Some(v) => is_empty_value(v),
1039        };
1040        if spec.required && is_empty {
1041            push(
1042                issues,
1043                Severity::Error,
1044                codes::SCHEMA_MISSING_REQUIRED,
1045                rel,
1046                // Absent key → anchor to the frontmatter top (line 1); a
1047                // present-but-empty value keeps its own line.
1048                fm_key_line_or_top(fm_yaml, &spec.name),
1049                Some(spec.name.clone()),
1050                format!("required field `{}` is absent or empty", spec.name),
1051                Some(format!("set `{}` to a non-empty value", spec.name)),
1052                vec![],
1053            );
1054            continue;
1055        }
1056        let Some(value) = present else { continue };
1057
1058        // An OPTIONAL field that is `null` or empty is simply unset — there is
1059        // no value to shape/enum/link-check. (The required+empty case already
1060        // returned above as `SCHEMA_MISSING_REQUIRED`.) Without this, an
1061        // `paid_at: null` on an `invoice` whose schema marks `paid_at (date)`
1062        // would wrongly fire `SCHEMA_SHAPE_MISMATCH` against the empty string.
1063        let value_empty = value.is_null()
1064            || scalar_string(value)
1065                .map(|s| s.trim().is_empty())
1066                .unwrap_or(false);
1067        if !spec.required && value_empty {
1068            continue;
1069        }
1070
1071        // link to <prefix>/ — extract the link target(s) from the raw frontmatter
1072        // text (unquoted `[[...]]` is a YAML nested-sequence, not a string).
1073        if let Some(prefix) = &spec.link_prefix {
1074            check_schema_link(store, rel, &spec.name, fm_yaml, prefix, line, issues);
1075            continue; // a link field is never also shape/enum-checked
1076        }
1077
1078        // A shape- or enum-constrained field expects a SCALAR. A YAML sequence
1079        // or mapping satisfies neither, and would otherwise slip through both
1080        // checks (`scalar_string` returns `None` for non-scalars, so the enum
1081        // and shape bodies silently no-op). Flag it as a shape mismatch rather
1082        // than let a structurally-wrong value validate clean. (Link fields,
1083        // which legitimately take block-form sequences, already `continue`d.)
1084        if (spec.shape.is_some() || spec.enum_values.is_some()) && scalar_string(value).is_none() {
1085            push(
1086                issues,
1087                Severity::Error,
1088                codes::SCHEMA_SHAPE_MISMATCH,
1089                rel,
1090                line,
1091                Some(spec.name.clone()),
1092                format!(
1093                    "`{}` must be a scalar value, found a list or mapping",
1094                    spec.name
1095                ),
1096                Some(format!("set `{}` to a single scalar value", spec.name)),
1097                vec![],
1098            );
1099            continue;
1100        }
1101
1102        // enum
1103        if let Some(allowed) = &spec.enum_values {
1104            if let Some(s) = scalar_string(value) {
1105                if !allowed.iter().any(|a| a == &s) {
1106                    push(
1107                        issues,
1108                        Severity::Error,
1109                        codes::SCHEMA_ENUM_VIOLATION,
1110                        rel,
1111                        line,
1112                        Some(spec.name.clone()),
1113                        format!("`{}` value {s:?} not in enum {allowed:?}", spec.name),
1114                        Some(format!("use one of: {}", allowed.join(", "))),
1115                        vec![],
1116                    );
1117                }
1118            }
1119            continue;
1120        }
1121
1122        // shape
1123        if let Some(shape) = spec.shape {
1124            check_schema_shape(rel, &spec.name, value, shape, line, issues);
1125        }
1126    }
1127}
1128
1129/// `link to <prefix>/` enforcement: the value must be a wiki-link whose target
1130/// starts with `<prefix>`. Reads the link target(s) from the raw frontmatter
1131/// text so unquoted `field: [[...]]` (a YAML nested-sequence, not a string) is
1132/// recognized exactly like the quoted form.
1133fn check_schema_link(
1134    store: &Store,
1135    rel: &Path,
1136    field: &str,
1137    fm_yaml: &str,
1138    prefix: &Path,
1139    line: Option<u32>,
1140    issues: &mut Vec<Issue>,
1141) {
1142    let prefix_str = prefix.to_string_lossy();
1143    let prefix_str = prefix_str.trim_end_matches('/');
1144    let suggestion = |target_leaf: &str| {
1145        Some(format!(
1146            "expected `link to {prefix_str}/`; replace with [[{prefix_str}/{target_leaf}]]"
1147        ))
1148    };
1149
1150    let links = frontmatter_links_for_key(fm_yaml, field, 2);
1151    if links.is_empty() {
1152        // No wiki-link in the field's value → it's a plain string.
1153        let raw = frontmatter_raw_value_for_key(fm_yaml, field, 2).unwrap_or_default();
1154        let raw = raw.trim().trim_matches('"').trim_matches('\'').trim();
1155        let leaf = slugish(raw);
1156        push(
1157            issues,
1158            Severity::Error,
1159            codes::SCHEMA_LINK_PREFIX_MISMATCH,
1160            rel,
1161            line,
1162            Some(field.to_string()),
1163            format!(
1164                "`{field}` is a plain string {raw:?}, expected a wiki-link under `{prefix_str}/`"
1165            ),
1166            suggestion(&leaf),
1167            vec![],
1168        );
1169        return;
1170    }
1171
1172    for link in links {
1173        if link.target.ends_with(".md") {
1174            let bare = link.target.trim_end_matches(".md");
1175            push(
1176                issues,
1177                Severity::Warning,
1178                codes::WIKI_LINK_HAS_EXTENSION,
1179                rel,
1180                Some(link.line),
1181                Some(field.to_string()),
1182                format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
1183                Some(format!("drop the extension: [[{bare}]]")),
1184                vec![],
1185            );
1186        }
1187        let bare = link.target.trim_end_matches(".md");
1188        if !path_under_prefix(bare, prefix_str) {
1189            let leaf = bare.rsplit('/').next().unwrap_or(bare);
1190            push(
1191                issues,
1192                Severity::Error,
1193                codes::SCHEMA_LINK_PREFIX_MISMATCH,
1194                rel,
1195                line,
1196                Some(field.to_string()),
1197                format!("`{field}` target `{bare}` is not under `{prefix_str}/`"),
1198                suggestion(leaf),
1199                vec![],
1200            );
1201        } else {
1202            // Correct prefix — still surface a broken target so the agent sees
1203            // one consistent vocabulary. Resolve like the graph engine (literal
1204            // path first, then `.md`) so a `link to sources/` field pointing at a
1205            // raw `.eml`/`.pdf` source isn't wrongly flagged broken.
1206            match resolve_wiki_target(store, bare) {
1207                TargetResolution::Exists => {}
1208                TargetResolution::Missing => push(
1209                    issues,
1210                    Severity::Error,
1211                    codes::WIKI_LINK_BROKEN,
1212                    rel,
1213                    line,
1214                    Some(field.to_string()),
1215                    format!("wiki-link target `{bare}` doesn't exist"),
1216                    Some(format!(
1217                        "create `{bare}.md`, or point the link at an existing file"
1218                    )),
1219                    vec![],
1220                ),
1221                TargetResolution::Unsafe => push(
1222                    issues,
1223                    Severity::Error,
1224                    codes::WIKI_LINK_BROKEN,
1225                    rel,
1226                    line,
1227                    Some(field.to_string()),
1228                    format!("wiki-link target `{bare}` is not a safe store-relative path"),
1229                    Some("use a full store-relative path under sources/ or records/".into()),
1230                    vec![],
1231                ),
1232            }
1233        }
1234    }
1235}
1236
1237/// Shape enforcement for a non-link, non-enum schema field.
1238fn check_schema_shape(
1239    rel: &Path,
1240    field: &str,
1241    value: &Value,
1242    shape: Shape,
1243    line: Option<u32>,
1244    issues: &mut Vec<Issue>,
1245) {
1246    let s = scalar_string(value).unwrap_or_default();
1247    let ok = match shape {
1248        Shape::String => true, // any scalar string
1249        Shape::Int => value.is_i64() || value.is_u64() || s.trim().parse::<i64>().is_ok(),
1250        Shape::Bool => value.is_bool() || matches!(s.trim(), "true" | "false"),
1251        Shape::Date => is_iso8601_date_or_datetime(&s),
1252        Shape::Email => is_email(&s),
1253        Shape::Currency => is_currency(&s),
1254        Shape::Url => is_url(&s),
1255    };
1256    if !ok {
1257        push(
1258            issues,
1259            Severity::Error,
1260            codes::SCHEMA_SHAPE_MISMATCH,
1261            rel,
1262            line,
1263            Some(field.to_string()),
1264            format!("`{field}` value {s:?} doesn't match shape {shape:?}"),
1265            Some(shape_suggestion(shape)),
1266            vec![],
1267        );
1268    }
1269}
1270
1271// ─────────────────────────────────────────────────────────────────────────────
1272//  Cross-file: entity-dedup collisions (validate_all only)
1273// ─────────────────────────────────────────────────────────────────────────────
1274
1275/// Hard `DUP_ID` + the soft, schema-declared `DUP_UNIQUE_KEY` collisions.
1276///
1277/// `DUP_ID` is universal (two files with the same explicit `id`).
1278/// `DUP_UNIQUE_KEY` is driven entirely by the store's `DB.md ## Schemas`: each
1279/// `- unique: <field>[, <field> …]` directive on a `### <type>` declares a
1280/// uniqueness constraint, and two records of that type whose declared values
1281/// collide warn. No type carries a built-in dedup key — the store opts in.
1282///
1283/// **Reporting precedence (rule #1 in `corpus-b-edges/EXPECTED/README.md`):** a
1284/// collision group of N files yields exactly ONE issue, not N. Its `file` is the
1285/// lexicographically smallest store-relative path in the group (a total order →
1286/// deterministic); `related` is the rest, sorted. A single-field key anchors to
1287/// that field's line on the reported file and carries it as `key`; a multi-field
1288/// key anchors to line 1 with a null key.
1289fn check_duplicates(store: &Store, parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
1290    // Path → frontmatter YAML, for resolving the anchor field's line on the
1291    // reported (smallest-path) member.
1292    let fm_yaml_of: HashMap<&PathBuf, &str> = parsed
1293        .iter()
1294        .map(|(rel, p)| (rel, p.fm_yaml.as_str()))
1295        .collect();
1296
1297    // ── DUP_ID (hard error): two files with the same explicit `id`. ──────────
1298    let mut by_id: HashMap<String, Vec<PathBuf>> = HashMap::new();
1299    for (rel, p) in parsed {
1300        if let Some(map) = &p.fm {
1301            if let Some(id) = map.get("id").and_then(scalar_string) {
1302                if !id.trim().is_empty() {
1303                    by_id.entry(id).or_default().push(rel.clone());
1304                }
1305            }
1306        }
1307    }
1308    for (id, files) in &by_id {
1309        if files.len() > 1 {
1310            let (reported, related) = canonical_and_related(files);
1311            let line = fm_yaml_of.get(&reported).and_then(|y| fm_key_line(y, "id"));
1312            push(
1313                issues,
1314                Severity::Error,
1315                codes::DUP_ID,
1316                &reported,
1317                line,
1318                Some("id".into()),
1319                format!("id {id:?} is declared by more than one file"),
1320                Some("give each file a unique `id` (or drop it to derive from the path)".into()),
1321                related,
1322            );
1323        }
1324    }
1325
1326    // ── DUP_UNIQUE_KEY (warning): schema-declared `unique:` collisions. ───────
1327    // Every constraint comes from the store's `## Schemas`; a type with no
1328    // `unique:` directive is never dedup-checked. Iteration over the BTreeMap is
1329    // key-ordered, so emitted issues are deterministic across runs.
1330    for (type_name, schema) in &store.config.schemas {
1331        for key_fields in &schema.unique_keys {
1332            soft_dup(parsed, issues, type_name, key_fields, &fm_yaml_of);
1333        }
1334    }
1335}
1336
1337/// Emit ONE `DUP_UNIQUE_KEY` warning per group of ≥2 files of `type_` whose
1338/// declared `key_fields` render to the same token tuple. Files missing any key
1339/// field are skipped — an incomplete key is never a collision.
1340///
1341/// Per reporting rule #1 the issue is keyed on the lexicographically smallest
1342/// store-relative path; `related` is the rest. A single-field key anchors to
1343/// that field's line on the reported file and carries it as `key`; a multi-field
1344/// key anchors to line 1 with a null key. `fm_yaml_of` resolves the field line.
1345fn soft_dup(
1346    parsed: &[(PathBuf, Parsed)],
1347    issues: &mut Vec<Issue>,
1348    type_: &str,
1349    key_fields: &[String],
1350    fm_yaml_of: &HashMap<&PathBuf, &str>,
1351) {
1352    if key_fields.is_empty() {
1353        return;
1354    }
1355    let mut groups: HashMap<Vec<String>, Vec<PathBuf>> = HashMap::new();
1356    for (rel, p) in parsed {
1357        let is_type =
1358            p.fm.as_ref()
1359                .and_then(|m| m.get("type"))
1360                .and_then(scalar_string)
1361                .map(|t| t == type_)
1362                .unwrap_or(false);
1363        if !is_type {
1364            continue;
1365        }
1366        if let Some(key) = dedup_key(p, key_fields) {
1367            groups.entry(key).or_default().push(rel.clone());
1368        }
1369    }
1370    // HashMap iteration is nondeterministic; sort by reported member so the
1371    // emitted issue order is stable across runs.
1372    let mut collisions: Vec<(PathBuf, Vec<PathBuf>)> = groups
1373        .values()
1374        .filter(|files| files.len() > 1)
1375        .map(|files| canonical_and_related(files))
1376        .collect();
1377    collisions.sort_by(|a, b| a.0.cmp(&b.0));
1378
1379    let fields_disp = key_fields.join(", ");
1380    for (reported, related) in collisions {
1381        // Single-field keys anchor to the field's line + carry the key; multi-
1382        // field keys anchor to line 1 with a null key.
1383        let (line, key) = if key_fields.len() == 1 {
1384            (
1385                fm_yaml_of
1386                    .get(&reported)
1387                    .and_then(|y| fm_key_line(y, &key_fields[0])),
1388                Some(key_fields[0].clone()),
1389            )
1390        } else {
1391            (Some(1), None)
1392        };
1393        let n = related.len();
1394        push(
1395            issues,
1396            Severity::Warning,
1397            codes::DUP_UNIQUE_KEY,
1398            &reported,
1399            line,
1400            key,
1401            format!("`{type_}` unique key ({fields_disp}) collides with {n} other record(s)"),
1402            Some("merge with `dbmd rename`, or cross-link with `dbmd link`".into()),
1403            related,
1404        );
1405    }
1406}
1407
1408/// Render a type's `unique:` key for one file: each field's dedup token in
1409/// order, or `None` if any field is absent/empty (an incomplete key never
1410/// collides).
1411fn dedup_key(p: &Parsed, key_fields: &[String]) -> Option<Vec<String>> {
1412    let mut out = Vec::with_capacity(key_fields.len());
1413    for f in key_fields {
1414        out.push(dedup_token(p, f)?);
1415    }
1416    Some(out)
1417}
1418
1419/// One field's normalized dedup token, or `None` when absent/empty. Wiki-link
1420/// values (single or block-sequence list) reduce to their lower-cased target
1421/// path(s); a list collapses to a sorted, de-duplicated set so item order never
1422/// matters. Plain scalars (and YAML scalar lists) lower-case and trim.
1423fn dedup_token(p: &Parsed, field: &str) -> Option<String> {
1424    // Wiki-links first — read from the raw frontmatter text so the unquoted
1425    // `field: [[...]]` (a YAML nested-sequence, not a string) is handled.
1426    let links = frontmatter_links_for_key(&p.fm_yaml, field, 2);
1427    if !links.is_empty() {
1428        let set: BTreeSet<String> = links
1429            .into_iter()
1430            .map(|l| l.target.trim_end_matches(".md").to_lowercase())
1431            .filter(|t| !t.is_empty())
1432            .collect();
1433        return if set.is_empty() {
1434            None
1435        } else {
1436            Some(set.into_iter().collect::<Vec<_>>().join(","))
1437        };
1438    }
1439    match p.fm.as_ref()?.get(field) {
1440        Some(Value::Sequence(items)) => {
1441            let set: BTreeSet<String> = items
1442                .iter()
1443                .filter_map(scalar_string)
1444                .map(|s| s.trim().to_lowercase())
1445                .filter(|t| !t.is_empty())
1446                .collect();
1447            if set.is_empty() {
1448                None
1449            } else {
1450                Some(set.into_iter().collect::<Vec<_>>().join(","))
1451            }
1452        }
1453        Some(v) => {
1454            let s = scalar_string(v)?.trim().to_lowercase();
1455            if s.is_empty() {
1456                None
1457            } else {
1458                Some(s)
1459            }
1460        }
1461        None => None,
1462    }
1463}
1464
1465/// Split a non-empty collision group into `(reported, related)`: the
1466/// lexicographically smallest store-relative path is the reported member; the
1467/// rest, sorted ascending, are `related`. Deterministic because store-relative
1468/// path is a total order — the property reporting rule #1 relies on.
1469fn canonical_and_related(files: &[PathBuf]) -> (PathBuf, Vec<PathBuf>) {
1470    let mut sorted = files.to_vec();
1471    sorted.sort();
1472    let reported = sorted[0].clone();
1473    let related = sorted[1..].to_vec();
1474    (reported, related)
1475}
1476
1477// ─────────────────────────────────────────────────────────────────────────────
1478//  Cross-file: hierarchical index.md + index.jsonl sync (validate_all only)
1479// ─────────────────────────────────────────────────────────────────────────────
1480
1481/// All `INDEX_*` and `INDEX_JSONL_*` checks across the three canonical levels.
1482fn check_indexes(store: &Store, files: &[PathBuf], issues: &mut Vec<Issue>) {
1483    // Group content files by their immediate parent folder (the type-folder,
1484    // *across date shards* — a sharded file's "type folder" is the folder right
1485    // under the layer). We key on the type-folder so shards roll up correctly.
1486    let mut type_folders: BTreeMap<PathBuf, Vec<PathBuf>> = BTreeMap::new();
1487    let mut layers_present: BTreeSet<&'static str> = BTreeSet::new();
1488    for rel in files {
1489        // The layer is the first path component — recorded independently of the
1490        // type-folder so a layer containing only loose files still requires an
1491        // `index.md`.
1492        if let Some(layer) = rel.iter().next().and_then(|s| s.to_str()) {
1493            match layer {
1494                "sources" => layers_present.insert("sources"),
1495                "records" => layers_present.insert("records"),
1496                _ => false,
1497            };
1498        }
1499        if let Some(tf) = type_folder_of(rel) {
1500            type_folders.entry(tf).or_default().push(rel.clone());
1501        }
1502    }
1503
1504    // ── Root index.md ─────────────────────────────────────────────────────────
1505    if !files.is_empty() {
1506        let root_index = store.root.join("index.md");
1507        if !root_index.is_file() {
1508            push(
1509                issues,
1510                Severity::Error,
1511                codes::INDEX_MISSING,
1512                Path::new("index.md"),
1513                None,
1514                None,
1515                "store has files but no root `index.md`".into(),
1516                Some("run `dbmd index rebuild`".into()),
1517                vec![],
1518            );
1519        } else {
1520            check_index_scope(store, Path::new("index.md"), "root", None, issues);
1521        }
1522    }
1523
1524    // ── Layer index.md ────────────────────────────────────────────────────────
1525    for layer in &layers_present {
1526        let layer_index_rel = PathBuf::from(layer).join("index.md");
1527        let abs = store.root.join(&layer_index_rel);
1528        if !abs.is_file() {
1529            push(
1530                issues,
1531                Severity::Error,
1532                codes::INDEX_MISSING,
1533                &layer_index_rel,
1534                None,
1535                None,
1536                format!("layer `{layer}/` has files but no `index.md`"),
1537                Some("run `dbmd index rebuild`".into()),
1538                vec![],
1539            );
1540        } else {
1541            check_index_scope(store, &layer_index_rel, "layer", Some(layer), issues);
1542        }
1543    }
1544
1545    // ── Type-folder index.md + index.jsonl ───────────────────────────────────
1546    for (tf, members) in &type_folders {
1547        let index_md_rel = tf.join("index.md");
1548        let index_md_abs = store.root.join(&index_md_rel);
1549        let index_md_present = index_md_abs.is_file();
1550        if !index_md_present {
1551            // The whole folder index is absent → a single `INDEX_MISSING` keyed
1552            // on the FOLDER (not the would-be `index.md` path). When the index is
1553            // entirely missing we do NOT additionally evaluate per-entry
1554            // completeness or the `index.jsonl` twin: one `INDEX_MISSING` covers
1555            // the folder (precedence rule #4 in `corpus-b-edges/EXPECTED`).
1556            push(
1557                issues,
1558                Severity::Error,
1559                codes::INDEX_MISSING,
1560                tf,
1561                None,
1562                None,
1563                format!("non-empty folder `{}` has no index.md", tf.display()),
1564                Some(format!(
1565                    "run `dbmd index rebuild --folder {}`",
1566                    tf.display()
1567                )),
1568                vec![],
1569            );
1570            continue;
1571        }
1572
1573        check_index_scope(store, &index_md_rel, "type-folder", tf.to_str(), issues);
1574        check_type_folder_index_md(store, tf, &index_md_rel, members, issues);
1575
1576        // index.jsonl twin — must exist and be complete (uncapped). Only checked
1577        // when the `index.md` is present (above): a folder whose entire index is
1578        // missing is one `INDEX_MISSING`, not also an `INDEX_JSONL_MISSING`.
1579        let jsonl_rel = tf.join("index.jsonl");
1580        let jsonl_abs = store.root.join(&jsonl_rel);
1581        if !jsonl_abs.is_file() {
1582            push(
1583                issues,
1584                Severity::Error,
1585                codes::INDEX_JSONL_MISSING,
1586                &jsonl_rel,
1587                None,
1588                None,
1589                format!("type-folder `{}/` has no `index.jsonl` twin", tf.display()),
1590                Some("run `dbmd index rebuild`".into()),
1591                vec![],
1592            );
1593        } else {
1594            check_type_folder_index_jsonl(store, tf, &jsonl_rel, members, issues);
1595        }
1596    }
1597
1598    // ── Orphan index.md: an index file in a folder with no content. ──────────
1599    for rel in walk_index_files(&store.root) {
1600        let parent = rel.parent().unwrap_or(Path::new("")).to_path_buf();
1601        let parent_str = parent.to_string_lossy().to_string();
1602        let is_canonical = parent_str.is_empty() // root
1603            || matches!(parent_str.as_str(), "sources" | "records")
1604            || type_folders.contains_key(&parent);
1605        if !is_canonical {
1606            push(
1607                issues,
1608                Severity::Warning,
1609                codes::INDEX_ORPHAN,
1610                &rel,
1611                None,
1612                None,
1613                format!(
1614                    "`{}` sits in an empty or non-canonical folder",
1615                    rel.display()
1616                ),
1617                Some("remove it, or run `dbmd index rebuild`".into()),
1618                vec![],
1619            );
1620        }
1621    }
1622}
1623
1624/// Check a type-folder `index.md`'s entries against the folder's actual files:
1625/// stale entries (target gone), missing entries (file not listed), and
1626/// summary mismatches.
1627fn check_type_folder_index_md(
1628    store: &Store,
1629    tf: &Path,
1630    index_rel: &Path,
1631    members: &[PathBuf],
1632    issues: &mut Vec<Issue>,
1633) {
1634    let abs = store.root.join(index_rel);
1635    let Ok(text) = std::fs::read_to_string(&abs) else {
1636        return;
1637    };
1638    let entries = parse_index_entries(&text);
1639
1640    let listed: BTreeSet<PathBuf> = entries
1641        .iter()
1642        .map(|e| PathBuf::from(e.target.trim_end_matches(".md")))
1643        .collect();
1644
1645    // Stale entries + summary mismatch.
1646    for entry in &entries {
1647        let bare = entry.target.trim_end_matches(".md");
1648        // Resolve like the graph engine (literal path first, then `.md`) so an
1649        // index entry naming a raw `.eml`/`.pdf` source isn't reported stale.
1650        let target_abs = match resolved_target_abs(store, bare) {
1651            Some(abs) => abs,
1652            None => {
1653                if matches!(resolve_wiki_target(store, bare), TargetResolution::Unsafe) {
1654                    push(
1655                        issues,
1656                        Severity::Error,
1657                        codes::INDEX_STALE_ENTRY,
1658                        index_rel,
1659                        Some(entry.line),
1660                        None,
1661                        format!("index entry `[[{bare}]]` is not a safe store-relative path"),
1662                        Some("run `dbmd index rebuild`".into()),
1663                        vec![],
1664                    );
1665                } else {
1666                    push(
1667                        issues,
1668                        Severity::Error,
1669                        codes::INDEX_STALE_ENTRY,
1670                        index_rel,
1671                        Some(entry.line),
1672                        None,
1673                        format!("index entry `[[{bare}]]` points at a missing file"),
1674                        Some("run `dbmd index rebuild`".into()),
1675                        // The stale target the entry names (the file that no
1676                        // longer exists) — so the agent can locate the dangling
1677                        // reference.
1678                        vec![PathBuf::from(format!("{bare}.md"))],
1679                    );
1680                }
1681                continue;
1682            }
1683        };
1684        // Summary mismatch: the entry text must equal the file's `summary`. A
1685        // bare `- [[path]]` entry (no `— <text>`) when the file HAS a non-empty
1686        // summary is also a mismatch — the SPEC requires every type-folder index
1687        // entry to quote the file's `summary` (`- [[path]] — <summary>`), so a
1688        // missing quote can't validate clean just because there's nothing to
1689        // compare.
1690        if let Some(expected) = read_summary(&target_abs) {
1691            match &entry.summary_text {
1692                Some(text_part) if text_part.trim() != expected.trim() => {
1693                    push(
1694                        issues,
1695                        Severity::Error,
1696                        codes::INDEX_SUMMARY_MISMATCH,
1697                        index_rel,
1698                        Some(entry.line),
1699                        None,
1700                        format!("index entry for `{bare}` text doesn't match the file's `summary`"),
1701                        Some("run `dbmd index rebuild`".into()),
1702                        vec![PathBuf::from(format!("{bare}.md"))],
1703                    );
1704                }
1705                None if !expected.trim().is_empty() => {
1706                    push(
1707                        issues,
1708                        Severity::Error,
1709                        codes::INDEX_SUMMARY_MISMATCH,
1710                        index_rel,
1711                        Some(entry.line),
1712                        None,
1713                        format!("index entry for `{bare}` is missing its summary text (the file has a `summary`)"),
1714                        Some("run `dbmd index rebuild`".into()),
1715                        vec![PathBuf::from(format!("{bare}.md"))],
1716                    );
1717                }
1718                _ => {}
1719            }
1720        }
1721    }
1722
1723    // Missing entries: a member file not listed. Skip the index/log meta files.
1724    // The browse view caps at 500; only flag a missing entry when the folder is
1725    // under the cap (a capped folder legitimately omits older files).
1726    let content_members: Vec<&PathBuf> = members.iter().filter(|m| is_content_file(m)).collect();
1727    if content_members.len() <= 500 {
1728        for m in content_members {
1729            let bare = PathBuf::from(m.to_string_lossy().trim_end_matches(".md").to_string());
1730            if !listed.contains(&bare) {
1731                push(
1732                    issues,
1733                    Severity::Error,
1734                    codes::INDEX_MISSING_ENTRY,
1735                    index_rel,
1736                    None,
1737                    None,
1738                    format!(
1739                        "file `{}` is not listed in its folder's `index.md`",
1740                        m.display()
1741                    ),
1742                    Some("run `dbmd index rebuild`".into()),
1743                    vec![(*m).clone()],
1744                );
1745            }
1746        }
1747    }
1748    let _ = tf;
1749}
1750
1751/// Check a type-folder `index.jsonl` twin: it must list **every** file in the
1752/// folder (uncapped), every record must point at a real file, and each record's
1753/// fields must match the file's frontmatter.
1754fn check_type_folder_index_jsonl(
1755    store: &Store,
1756    tf: &Path,
1757    jsonl_rel: &Path,
1758    members: &[PathBuf],
1759    issues: &mut Vec<Issue>,
1760) {
1761    let abs = store.root.join(jsonl_rel);
1762    let Ok(text) = std::fs::read_to_string(&abs) else {
1763        return;
1764    };
1765
1766    // Parse records (last-write-wins by path), tolerating tombstones/blank lines.
1767    let mut records: BTreeMap<PathBuf, serde_json::Value> = BTreeMap::new();
1768    for (i, line) in text.lines().enumerate() {
1769        let line = line.trim();
1770        if line.is_empty() {
1771            continue;
1772        }
1773        let rec: serde_json::Value = match serde_json::from_str(line) {
1774            Ok(v) => v,
1775            Err(e) => {
1776                push(
1777                    issues,
1778                    Severity::Error,
1779                    codes::INDEX_JSONL_DESYNC,
1780                    jsonl_rel,
1781                    Some((i + 1) as u32),
1782                    None,
1783                    format!("`index.jsonl` line {} is not valid JSON: {e}", i + 1),
1784                    Some("run `dbmd index rebuild`".into()),
1785                    vec![],
1786                );
1787                continue;
1788            }
1789        };
1790        if let Some(path) = rec.get("path").and_then(|v| v.as_str()) {
1791            if !is_safe_store_relative_path(Path::new(path)) {
1792                push(
1793                    issues,
1794                    Severity::Error,
1795                    codes::INDEX_JSONL_DESYNC,
1796                    jsonl_rel,
1797                    Some((i + 1) as u32),
1798                    None,
1799                    format!("`index.jsonl` record path `{path}` is not a safe store-relative path"),
1800                    Some("run `dbmd index rebuild`".into()),
1801                    vec![],
1802                );
1803                continue;
1804            }
1805            records.insert(PathBuf::from(path), rec);
1806        }
1807    }
1808
1809    let member_set: BTreeSet<PathBuf> = members
1810        .iter()
1811        .filter(|m| is_content_file(m))
1812        .cloned()
1813        .collect();
1814
1815    // jsonl record → missing file = desync.
1816    for path in records.keys() {
1817        let target_abs = store.root.join(path);
1818        if !target_abs.is_file() {
1819            push(
1820                issues,
1821                Severity::Error,
1822                codes::INDEX_JSONL_DESYNC,
1823                jsonl_rel,
1824                None,
1825                None,
1826                format!(
1827                    "`index.jsonl` record points at missing file `{}`",
1828                    path.display()
1829                ),
1830                Some("run `dbmd index rebuild`".into()),
1831                vec![],
1832            );
1833        }
1834    }
1835
1836    // file not in jsonl = desync (the jsonl is the complete twin — no cap).
1837    for m in &member_set {
1838        if !records.contains_key(m) {
1839            push(
1840                issues,
1841                Severity::Error,
1842                codes::INDEX_JSONL_DESYNC,
1843                jsonl_rel,
1844                None,
1845                None,
1846                format!(
1847                    "file `{}` is missing from the complete `index.jsonl`",
1848                    m.display()
1849                ),
1850                Some("run `dbmd index rebuild`".into()),
1851                vec![m.clone()],
1852            );
1853        }
1854    }
1855
1856    // Record fields stale vs. frontmatter. SPEC § Validation defines
1857    // `INDEX_JSONL_STALE` as "an `index.jsonl` record's fields don't match the
1858    // file's frontmatter" — ANY field, not just `summary`/`type`. The query and
1859    // search paths read every field straight from these sidecars (`tags`,
1860    // `links`, `created`, `updated`, plus type-specific `email` / `domain` /
1861    // `company` / `amount` / `vendor` …), so a single field left unchecked lets
1862    // a stale value answer queries with data that exists in no `.md` file.
1863    //
1864    // Rather than re-list (and drift from) every projected key, rebuild the
1865    // record the canonical projection would write for this file
1866    // ([`IndexRecord::expected_from_file`], the same path `index rebuild` uses)
1867    // and diff the two as flat JSON maps. Every key the projection emits is
1868    // covered automatically; `path` is the join key and is skipped.
1869    for (path, rec) in &records {
1870        let target_abs = store.root.join(path);
1871        if !target_abs.is_file() {
1872            continue;
1873        }
1874        let Ok(expected) = crate::index::IndexRecord::expected_from_file(&target_abs, path.clone())
1875        else {
1876            continue; // unreadable / unparseable frontmatter is reported elsewhere
1877        };
1878        let Ok(expected_json) = serde_json::to_value(&expected) else {
1879            continue;
1880        };
1881        let (Some(have), Some(want)) = (rec.as_object(), expected_json.as_object()) else {
1882            continue;
1883        };
1884
1885        // Compare the union of keys present on either side; a key the file
1886        // projects but the sidecar omits is just as stale as a wrong value.
1887        let mut mismatched_keys: BTreeSet<&str> = BTreeSet::new();
1888        for key in have.keys().chain(want.keys()) {
1889            if key == "path" {
1890                continue;
1891            }
1892            if have.get(key) != want.get(key) {
1893                mismatched_keys.insert(key);
1894            }
1895        }
1896
1897        if !mismatched_keys.is_empty() {
1898            let keys: Vec<&str> = mismatched_keys.into_iter().collect();
1899            push(
1900                issues,
1901                Severity::Error,
1902                codes::INDEX_JSONL_STALE,
1903                jsonl_rel,
1904                None,
1905                Some(keys.join(",")),
1906                format!(
1907                    "`index.jsonl` record for `{}` is stale ({})",
1908                    path.display(),
1909                    keys.join(", ")
1910                ),
1911                Some("run `dbmd index rebuild`".into()),
1912                vec![path.clone()],
1913            );
1914        }
1915    }
1916    let _ = tf;
1917}
1918
1919/// Check an index's `scope:` frontmatter against its filesystem location.
1920fn check_index_scope(
1921    store: &Store,
1922    index_rel: &Path,
1923    expected_scope: &str,
1924    expected_folder: Option<&str>,
1925    issues: &mut Vec<Issue>,
1926) {
1927    let abs = store.root.join(index_rel);
1928    let Ok(text) = std::fs::read_to_string(&abs) else {
1929        return;
1930    };
1931    let Some((yaml, _, _)) = split_frontmatter(&text) else {
1932        return;
1933    };
1934    let Ok(Value::Mapping(map)) = serde_norway::from_str::<Value>(&yaml) else {
1935        return;
1936    };
1937    let fm = yaml_map_to_btree(&map);
1938
1939    if let Some(scope) = fm.get("scope").and_then(scalar_string) {
1940        // Accept "type-folder" and the SPEC example's looser "folder" alias.
1941        let scope_ok =
1942            scope == expected_scope || (expected_scope == "type-folder" && scope == "folder");
1943        if !scope_ok {
1944            push(
1945                issues,
1946                Severity::Warning,
1947                codes::INDEX_WRONG_SCOPE,
1948                index_rel,
1949                fm_key_line(&yaml, "scope"),
1950                Some("scope".into()),
1951                format!(
1952                    "index `scope: {scope}` doesn't match location (expected `{expected_scope}`)"
1953                ),
1954                Some(format!("set `scope: {expected_scope}`")),
1955                vec![],
1956            );
1957        }
1958    }
1959    // folder: must match for layer/type-folder indexes.
1960    if let Some(expected) = expected_folder {
1961        if let Some(folder) = fm.get("folder").and_then(scalar_string) {
1962            if folder.trim_end_matches('/') != expected.trim_end_matches('/') {
1963                push(
1964                    issues,
1965                    Severity::Warning,
1966                    codes::INDEX_WRONG_SCOPE,
1967                    index_rel,
1968                    fm_key_line(&yaml, "folder"),
1969                    Some("folder".into()),
1970                    format!("index `folder: {folder}` doesn't match location `{expected}`"),
1971                    Some(format!("set `folder: {expected}`")),
1972                    vec![],
1973                );
1974            }
1975        }
1976    }
1977}
1978
1979// ─────────────────────────────────────────────────────────────────────────────
1980//  Cross-file: log.md well-formedness + ordering (validate_all only)
1981// ─────────────────────────────────────────────────────────────────────────────
1982
1983/// `LOG_*` checks: bad timestamps, unknown kinds, out-of-order entries — across
1984/// the active `log.md` AND the rotated `log/<YYYY-MM>.md` archives.
1985///
1986/// [`Log::append`] rolls strictly-prior-month entries into `log/<YYYY-MM>.md`,
1987/// and `Log::tail`/`Log::since` deliberately read those archives back. If the
1988/// LOG_* checks read only the active file, an entry `validate --all` flagged
1989/// while it lived in `log.md` would stop being flagged the moment a newer-month
1990/// append rotated it into an archive — even though the log readers still surface
1991/// that exact entry to the curator. Scanning the archives too keeps validate and
1992/// the readers in agreement after a rotation.
1993///
1994/// Order: archives oldest-month first, then the active `log.md` last — the true
1995/// chronological timeline — so the out-of-order check threads `prev` across the
1996/// rotation boundary the same way it does within a single file.
1997fn check_log(store: &Store, issues: &mut Vec<Issue>) {
1998    let mut prev: Option<DateTime<FixedOffset>> = None;
1999    for rel in log_files_chronological(store) {
2000        check_log_file(store, &rel, &mut prev, issues);
2001    }
2002}
2003
2004/// The log files to scan, in chronological order: every `log/<YYYY-MM>.md`
2005/// archive oldest-month first, then the active `log.md` last. Missing files are
2006/// simply absent from the list.
2007fn log_files_chronological(store: &Store) -> Vec<PathBuf> {
2008    let mut files: Vec<PathBuf> = Vec::new();
2009    let archive_dir = store.root.join("log");
2010    if let Ok(entries) = std::fs::read_dir(&archive_dir) {
2011        let mut archives: Vec<PathBuf> = entries
2012            .flatten()
2013            .map(|e| e.path())
2014            .filter(|p| {
2015                p.is_file()
2016                    && p.file_name()
2017                        .and_then(|s| s.to_str())
2018                        .and_then(|n| n.strip_suffix(".md"))
2019                        .is_some_and(is_year_month_archive)
2020            })
2021            .filter_map(|p| p.strip_prefix(&store.root).ok().map(Path::to_path_buf))
2022            .collect();
2023        // `YYYY-MM` stems sort lexically == chronologically; oldest first.
2024        archives.sort();
2025        files.extend(archives);
2026    }
2027    // The active file holds the current month — newest, so it comes last.
2028    if store.root.join("log.md").is_file() {
2029        files.push(PathBuf::from("log.md"));
2030    }
2031    files
2032}
2033
2034/// Scan one log file's entry headers, threading the running `prev` timestamp so
2035/// the out-of-order check spans file (rotation) boundaries. Issues anchor to the
2036/// given store-relative path so an archived entry points at its archive file.
2037fn check_log_file(
2038    store: &Store,
2039    log_rel: &Path,
2040    prev: &mut Option<DateTime<FixedOffset>>,
2041    issues: &mut Vec<Issue>,
2042) {
2043    let abs = store.root.join(log_rel);
2044    let Ok(text) = std::fs::read_to_string(&abs) else {
2045        return;
2046    };
2047
2048    for (i, line) in text.lines().enumerate() {
2049        if !line.starts_with("## [") {
2050            continue;
2051        }
2052        let line_no = (i + 1) as u32;
2053        match parse_log_header(line) {
2054            None => push(
2055                issues,
2056                Severity::Error,
2057                codes::LOG_BAD_TIMESTAMP,
2058                log_rel,
2059                Some(line_no),
2060                None,
2061                format!("log entry header has an unparseable timestamp: {line:?}"),
2062                Some("use `## [YYYY-MM-DD HH:MM] <kind> | <object>`".into()),
2063                vec![],
2064            ),
2065            Some((ts, kind, _object)) => {
2066                if !RECOGNIZED_LOG_KINDS.contains(&kind.as_str()) {
2067                    push(
2068                        issues,
2069                        Severity::Warning,
2070                        codes::LOG_UNKNOWN_KIND,
2071                        log_rel,
2072                        Some(line_no),
2073                        None,
2074                        format!("log entry kind `{kind}` is not recognized"),
2075                        Some(format!("use one of: {}", RECOGNIZED_LOG_KINDS.join(", "))),
2076                        vec![],
2077                    );
2078                }
2079                if let Some(p) = *prev {
2080                    if ts < p {
2081                        push(
2082                            issues,
2083                            Severity::Warning,
2084                            codes::LOG_OUT_OF_ORDER,
2085                            log_rel,
2086                            Some(line_no),
2087                            None,
2088                            "log entry is older than the entry above it (possible rewrite)".into(),
2089                            Some("append corrective entries; never reorder past ones".into()),
2090                            vec![],
2091                        );
2092                    }
2093                }
2094                *prev = Some(ts);
2095            }
2096        }
2097    }
2098}
2099
2100// ─────────────────────────────────────────────────────────────────────────────
2101//  Self-contained primitives (collapse onto sibling modules once they land)
2102// ─────────────────────────────────────────────────────────────────────────────
2103
2104/// A minimal wiki-link found in a body: target, optional display, 1-based line.
2105#[derive(Debug)]
2106struct Link {
2107    target: String,
2108    line: u32,
2109}
2110
2111/// True if the store marker (`DB.md`, uppercase) is present at the root. On a
2112/// case-insensitive filesystem `db.md` would also match `DB.md`; we require the
2113/// exact-cased directory entry to be present.
2114fn store_marker_present(store: &Store) -> bool {
2115    let want = store.root.join("DB.md");
2116    if !want.is_file() {
2117        return false;
2118    }
2119    // Reject a case-folded match (`db.md`) on case-insensitive filesystems.
2120    match std::fs::read_dir(&store.root) {
2121        Ok(entries) => entries
2122            .flatten()
2123            .any(|e| e.file_name().to_str() == Some("DB.md")),
2124        Err(_) => true, // can't enumerate; trust the is_file() above
2125    }
2126}
2127
2128/// Validate the store's identity file, `DB.md`: its frontmatter `type:` must be
2129/// `db-md`, it must carry both `scope` and `owner`, and its body may contain
2130/// only the three recognized `##` sections (`Agent instructions`, `Policies`,
2131/// `Schemas`).
2132///
2133/// `DB.md` is not a content file (no `summary`), so it is checked here rather
2134/// than through `check_content_file`. The marker presence is established by the
2135/// caller (`store_marker_present`); a malformed-frontmatter `DB.md` still counts
2136/// as a store (the marker is the filename), so we report its shape rather than
2137/// `NOT_A_STORE`. Issues anchor to `DB.md` as the store-relative path.
2138fn check_db_md(store: &Store, issues: &mut Vec<Issue>) {
2139    let rel = Path::new("DB.md");
2140    let abs = store.root.join("DB.md");
2141    let Ok(text) = std::fs::read_to_string(&abs) else {
2142        return; // marker present but unreadable: nothing more to say.
2143    };
2144
2145    let Some((fm_yaml, body, fm_end_line)) = split_frontmatter(&text) else {
2146        // No frontmatter block at all → it cannot declare `type: db-md` and has
2147        // neither required field. Report the type and both missing fields,
2148        // anchored to line 1 (the would-be opening fence).
2149        push(
2150            issues,
2151            Severity::Error,
2152            codes::DB_MD_BAD_TYPE,
2153            rel,
2154            Some(1),
2155            Some("type".into()),
2156            "DB.md has no frontmatter; it must declare `type: db-md`".into(),
2157            Some("add a `---` frontmatter block with `type: db-md`".into()),
2158            vec![],
2159        );
2160        for field in ["scope", "owner"] {
2161            push(
2162                issues,
2163                Severity::Error,
2164                codes::DB_MD_MISSING_FIELD,
2165                rel,
2166                Some(1),
2167                Some(field.into()),
2168                format!("DB.md frontmatter is missing required field `{field}`"),
2169                Some(format!("add `{field}:` to the DB.md frontmatter")),
2170                vec![],
2171            );
2172        }
2173        return;
2174    };
2175
2176    // Parse the frontmatter mapping. If it doesn't parse, we can still say the
2177    // identity contract is unmet (no provable `type: db-md`, no provable fields).
2178    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
2179        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
2180        Ok(Value::Null) => Some(BTreeMap::new()),
2181        _ => None,
2182    };
2183
2184    match &fm {
2185        Some(map) => {
2186            // ── type: db-md ──────────────────────────────────────────────────
2187            let type_ = map.get("type").and_then(scalar_string);
2188            if type_.as_deref() != Some("db-md") {
2189                let (line, msg) = match &type_ {
2190                    Some(t) => (
2191                        fm_key_line(&fm_yaml, "type"),
2192                        format!("DB.md has `type: {t}`; a store's DB.md must be `type: db-md`"),
2193                    ),
2194                    None => (
2195                        Some(1),
2196                        "DB.md frontmatter has no `type:`; it must be `type: db-md`".to_string(),
2197                    ),
2198                };
2199                push(
2200                    issues,
2201                    Severity::Error,
2202                    codes::DB_MD_BAD_TYPE,
2203                    rel,
2204                    line,
2205                    Some("type".into()),
2206                    msg,
2207                    Some("set `type: db-md` in the DB.md frontmatter".into()),
2208                    vec![],
2209                );
2210            }
2211
2212            // ── required fields: scope + owner ───────────────────────────────
2213            for field in ["scope", "owner"] {
2214                let present = map
2215                    .get(field)
2216                    .and_then(scalar_string)
2217                    .map(|s| !s.trim().is_empty())
2218                    .unwrap_or(false);
2219                if !present {
2220                    push(
2221                        issues,
2222                        Severity::Error,
2223                        codes::DB_MD_MISSING_FIELD,
2224                        rel,
2225                        // A present-but-empty field anchors to its line; a fully
2226                        // absent one to the block top.
2227                        fm_key_line_or_top(&fm_yaml, field),
2228                        Some(field.into()),
2229                        format!("DB.md frontmatter is missing required field `{field}`"),
2230                        Some(format!("add `{field}:` to the DB.md frontmatter")),
2231                        vec![],
2232                    );
2233                }
2234            }
2235        }
2236        None => {
2237            // Unparseable frontmatter: the identity contract is unprovable. Emit
2238            // the type error and both field errors, anchored to the block top.
2239            push(
2240                issues,
2241                Severity::Error,
2242                codes::DB_MD_BAD_TYPE,
2243                rel,
2244                Some(1),
2245                Some("type".into()),
2246                "DB.md frontmatter isn't valid YAML; it must declare `type: db-md`".into(),
2247                Some("fix the DB.md frontmatter and set `type: db-md`".into()),
2248                vec![],
2249            );
2250            for field in ["scope", "owner"] {
2251                push(
2252                    issues,
2253                    Severity::Error,
2254                    codes::DB_MD_MISSING_FIELD,
2255                    rel,
2256                    Some(1),
2257                    Some(field.into()),
2258                    format!("DB.md frontmatter is missing required field `{field}`"),
2259                    Some(format!("add `{field}:` to the DB.md frontmatter")),
2260                    vec![],
2261                );
2262            }
2263        }
2264    }
2265
2266    // ── recognized `##` section headers only ─────────────────────────────────
2267    // The body's H2 headings must be one of the three the toolkit reads; any
2268    // other is a likely typo / misplacement (warning — the parser ignores it,
2269    // so the config is not corrupted, but the operator wrote a section that will
2270    // never be read). H3 sub-headings (Frozen pages, Ignored types, `### <type>`
2271    // schema blocks) live under their H2 and are not flagged here.
2272    for section in crate::parser::extract_sections(&body) {
2273        if section.level != 2 {
2274            continue;
2275        }
2276        let name = section.heading.trim().to_ascii_lowercase();
2277        if matches!(name.as_str(), "agent instructions" | "policies" | "schemas") {
2278            continue;
2279        }
2280        // `Section::line` is 1-based within the body; the body begins at file
2281        // line `fm_end_line + 1`.
2282        let file_line = fm_end_line + section.line;
2283        push(
2284            issues,
2285            Severity::Warning,
2286            codes::DB_MD_UNKNOWN_SECTION,
2287            rel,
2288            Some(file_line),
2289            None,
2290            format!(
2291                "DB.md has an unrecognized `## {}` section",
2292                section.heading.trim()
2293            ),
2294            Some(
2295                "DB.md sections are `## Agent instructions`, `## Policies`, `## Schemas` — \
2296                 remove or rename this heading"
2297                    .into(),
2298            ),
2299            vec![],
2300        );
2301    }
2302
2303    // ── `## Schemas` field-declaration lint ──────────────────────────────────
2304    // Without this, every schema misparse is silent: the operator/agent gets no
2305    // signal that DB.md is interpreting their schema differently from what they
2306    // wrote, and downstream records are validated against the degraded schema.
2307    check_db_md_schemas(store, rel, &body, fm_end_line, issues);
2308}
2309
2310/// Lint the parsed `## Schemas` field declarations: an empty field name, a
2311/// duplicate field name within a type, or an unrecognized modifier all parse
2312/// "successfully" into a degraded [`Schema`] today, so a bad declaration never
2313/// surfaces. The parsed schemas live in `store.config.schemas` (directives
2314/// already separated out); this pass reports the suspicious *field* shapes,
2315/// anchored to the `### <type>` heading line so the agent can find the block.
2316fn check_db_md_schemas(
2317    store: &Store,
2318    rel: &Path,
2319    body: &str,
2320    fm_end_line: u32,
2321    issues: &mut Vec<Issue>,
2322) {
2323    if store.config.schemas.is_empty() {
2324        return;
2325    }
2326
2327    // Map each `### <type>` heading (under `## Schemas`) to its file line, so a
2328    // per-type issue can anchor to the declaration block. `extract_sections`
2329    // returns a flat list with 1-based body lines; the body starts at file line
2330    // `fm_end_line + 1`.
2331    let mut type_line: BTreeMap<String, u32> = BTreeMap::new();
2332    let mut current_h2: Option<String> = None;
2333    for section in crate::parser::extract_sections(body) {
2334        match section.level {
2335            2 => current_h2 = Some(section.heading.trim().to_ascii_lowercase()),
2336            3 if current_h2.as_deref() == Some("schemas") => {
2337                // The H3 heading text (as written) is the type name — the same
2338                // key `parse_db_md` inserts into `config.schemas`.
2339                type_line
2340                    .entry(section.heading.trim().to_string())
2341                    .or_insert(fm_end_line + section.line);
2342            }
2343            _ => {}
2344        }
2345    }
2346
2347    for (type_name, schema) in &store.config.schemas {
2348        let line = type_line.get(type_name).copied();
2349        let mut seen: BTreeSet<String> = BTreeSet::new();
2350        for field in &schema.fields {
2351            let name = field.name.trim();
2352
2353            // Empty field name: a `- (string)` / bare `- ` bullet parses to a
2354            // nameless field that can never match a frontmatter key, so its
2355            // required/shape/enum constraints silently never apply.
2356            if name.is_empty() {
2357                push(
2358                    issues,
2359                    Severity::Warning,
2360                    codes::DB_MD_SCHEMA_FIELD,
2361                    rel,
2362                    line,
2363                    None,
2364                    format!("`### {type_name}` has a schema field bullet with no field name"),
2365                    Some(
2366                        "write each field as `- <name> (<modifiers>)`, e.g. `- email (required, email)`"
2367                            .into(),
2368                    ),
2369                    vec![],
2370                );
2371                continue;
2372            }
2373
2374            // Duplicate field name within a type: the second declaration's
2375            // constraints are interpreted independently of the first, so the
2376            // author's intent is ambiguous and likely wrong.
2377            if !seen.insert(name.to_string()) {
2378                push(
2379                    issues,
2380                    Severity::Warning,
2381                    codes::DB_MD_SCHEMA_FIELD,
2382                    rel,
2383                    line,
2384                    Some(name.to_string()),
2385                    format!("`### {type_name}` declares field `{name}` more than once"),
2386                    Some(
2387                        "remove the duplicate field bullet, or merge the modifiers onto one".into(),
2388                    ),
2389                    vec![],
2390                );
2391            }
2392
2393            // Unrecognized modifiers: the parser stashes anything outside the
2394            // known vocabulary (`required` / a shape / `link to …` / `default …`
2395            // / `enum: …`) in `unknown_modifiers`. Surface them as Info so a
2396            // typo'd modifier (`requierd`, `unqiue`) doesn't silently do nothing.
2397            for modifier in &field.unknown_modifiers {
2398                let modifier = modifier.trim();
2399                if modifier.is_empty() {
2400                    continue;
2401                }
2402                push(
2403                    issues,
2404                    Severity::Info,
2405                    codes::DB_MD_SCHEMA_FIELD,
2406                    rel,
2407                    line,
2408                    Some(name.to_string()),
2409                    format!(
2410                        "`### {type_name}` field `{name}` has an unrecognized modifier `{modifier}`"
2411                    ),
2412                    Some(
2413                        "recognized modifiers are `required`, a shape (`string`/`int`/`bool`/`date`/`email`/`currency`/`url`), `link to <prefix>/`, `default <value>`, `enum: <v1>, <v2>, …`"
2414                            .into(),
2415                    ),
2416                    vec![],
2417                );
2418            }
2419        }
2420    }
2421}
2422
2423/// The `NOT_A_STORE` issue for a root with no `DB.md`.
2424fn not_a_store_issue(store: &Store) -> Issue {
2425    Issue {
2426        severity: Severity::Error,
2427        code: codes::NOT_A_STORE,
2428        file: store.root.clone(),
2429        line: None,
2430        key: None,
2431        message: format!("{} has no DB.md; not a db.md store", store.root.display()),
2432        suggestion: Some("create a `DB.md` at the store root".into()),
2433        related: vec![],
2434    }
2435}
2436
2437/// True if a store-relative path is a content file: under `sources/` or
2438/// `records/` and not an `index.md`/`index.jsonl`/`log.md`.
2439fn is_content_file(rel: &Path) -> bool {
2440    let Some(first) = rel.iter().next().and_then(|s| s.to_str()) else {
2441        return false;
2442    };
2443    if !matches!(first, "sources" | "records") {
2444        return false;
2445    }
2446    let name = rel.file_name().and_then(|s| s.to_str()).unwrap_or("");
2447    // Only the derived catalog twins are meta INSIDE a layer. `DB.md` / `log.md`
2448    // are reserved meta only at the store ROOT, which the `first` layer check
2449    // above already excludes — so a content file named `log.md` / `DB.md` inside
2450    // a layer (e.g. `records/docs/log.md`) is real content, consistent with
2451    // `Store::walk`.
2452    if matches!(name, "index.md" | "index.jsonl") {
2453        return false;
2454    }
2455    name.ends_with(".md")
2456}
2457
2458/// True for the store's ROOT append-only meta files (`DB.md` / `log.md`): a
2459/// single-component store-relative path whose name is one of those two. An
2460/// in-layer `records/docs/log.md` is real content (multiple components), not a
2461/// root meta file. These reach `check_content_file` only via the working-set
2462/// incoming-linker scan; their bodies are deliberately not link-checked there
2463/// because `validate --all` doesn't link-check them either.
2464fn is_root_meta_file(rel: &Path) -> bool {
2465    let mut comps = rel.components();
2466    let Some(Component::Normal(only)) = comps.next() else {
2467        return false;
2468    };
2469    if comps.next().is_some() {
2470        return false; // has a parent dir → not a root file
2471    }
2472    matches!(only.to_str(), Some("DB.md") | Some("log.md"))
2473}
2474
2475/// Split a file into `(frontmatter_yaml, body, closing_fence_line)`. The block
2476/// must start at the very first line with `---` and end at the next `---`.
2477/// Returns `None` if there's no leading frontmatter block.
2478fn split_frontmatter(text: &str) -> Option<(String, String, u32)> {
2479    // Tolerate a single leading UTF-8 BOM, matching parser/store/index (which
2480    // already strip it). Without this, a BOM-prefixed file is read as having no
2481    // frontmatter here while the catalog still indexes it — so validate would
2482    // silently skip frontmatter checks on a file the rest of the toolkit sees.
2483    let text = text.strip_prefix('\u{feff}').unwrap_or(text);
2484    let mut lines = text.lines();
2485    let first = lines.next()?;
2486    if first.trim_end() != "---" {
2487        return None;
2488    }
2489    let mut yaml = String::new();
2490    let mut close_line: Option<u32> = None;
2491    // line 1 is the opening fence; YAML starts at line 2.
2492    let mut current = 1u32;
2493    for line in lines {
2494        current += 1;
2495        if line.trim_end() == "---" {
2496            close_line = Some(current);
2497            break;
2498        }
2499        yaml.push_str(line);
2500        yaml.push('\n');
2501    }
2502    let close_line = close_line?;
2503    // Body = everything after the closing fence.
2504    let body: String = text
2505        .lines()
2506        .skip(close_line as usize)
2507        .collect::<Vec<_>>()
2508        .join("\n");
2509    Some((yaml, body, close_line))
2510}
2511
2512/// Read just the `summary` field of a file, or `None` if absent/unparseable.
2513fn read_summary(abs: &Path) -> Option<String> {
2514    let text = std::fs::read_to_string(abs).ok()?;
2515    let (yaml, _, _) = split_frontmatter(&text)?;
2516    let value: Value = serde_norway::from_str(&yaml).ok()?;
2517    if let Value::Mapping(m) = value {
2518        m.get(Value::String("summary".into()))
2519            .and_then(scalar_string)
2520    } else {
2521        None
2522    }
2523}
2524
2525/// Convert a `serde_norway` mapping into a string-keyed [`BTreeMap`], dropping
2526/// non-string keys (frontmatter keys are always strings).
2527fn yaml_map_to_btree(map: &serde_norway::Mapping) -> BTreeMap<String, Value> {
2528    let mut out = BTreeMap::new();
2529    for (k, v) in map {
2530        if let Value::String(s) = k {
2531            out.insert(s.clone(), v.clone());
2532        }
2533    }
2534    out
2535}
2536
2537/// A scalar YAML value as a string (`String`/`Number`/`Bool`); `None` for
2538/// sequences/mappings/null.
2539fn scalar_string(v: &Value) -> Option<String> {
2540    match v {
2541        Value::String(s) => Some(s.clone()),
2542        Value::Number(n) => Some(n.to_string()),
2543        Value::Bool(b) => Some(b.to_string()),
2544        _ => None,
2545    }
2546}
2547
2548/// True if a frontmatter value carries no content for a *required*-field check:
2549/// a YAML `null` (`name:`), an empty sequence (`name: []`), an empty mapping
2550/// (`name: {}`), or a blank/whitespace-only scalar (`name: ""`). A non-empty
2551/// list or mapping is NOT treated as empty here — a structurally-wrong value on
2552/// a shape/enum field is caught by the later non-scalar shape check, not by the
2553/// required-presence check.
2554fn is_empty_value(v: &Value) -> bool {
2555    match v {
2556        Value::Null => true,
2557        Value::Sequence(items) => items.is_empty(),
2558        Value::Mapping(map) => map.is_empty(),
2559        other => scalar_string(other)
2560            .map(|s| s.trim().is_empty())
2561            .unwrap_or(true),
2562    }
2563}
2564
2565/// True if `tags` is a flat YAML sequence of scalars. A mapping, a scalar, or a
2566/// sequence containing a nested sequence/mapping → false (`TAGS_MALFORMED`).
2567fn is_flat_scalar_list(v: &Value) -> bool {
2568    match v {
2569        Value::Sequence(items) => items.iter().all(|it| scalar_string(it).is_some()),
2570        _ => false,
2571    }
2572}
2573
2574/// Extract every frontmatter wiki-link, returning `(key, Link)` pairs with the
2575/// link's 1-based file line. **Text-based, by necessity:** an unquoted
2576/// `company: [[records/companies/x]]` parses in YAML as a nested *sequence*, not
2577/// a string (because `[[x]]` is YAML flow-list-in-a-list); a quoted
2578/// `"[[...]]"` parses as a string. Scanning the raw frontmatter text catches
2579/// both forms uniformly, the way the link textually appears — the doctrine view.
2580///
2581/// `fm_start_line` is the file line of the first YAML line (file line 2, since
2582/// line 1 is the opening `---`), so the returned `Link::line` is absolute.
2583fn frontmatter_link_fields_text(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, Link)> {
2584    let mut out = Vec::new();
2585    for (key, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2586        for link in links {
2587            out.push((key.clone(), link));
2588        }
2589    }
2590    out
2591}
2592
2593/// The wiki-link targets declared under a single top-level frontmatter key
2594/// (text-based; handles quoted + unquoted forms). Empty if the key is absent or
2595/// carries no `[[...]]`.
2596fn frontmatter_links_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Vec<Link> {
2597    for (k, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2598        if k == key {
2599            return links;
2600        }
2601    }
2602    Vec::new()
2603}
2604
2605/// The raw value text under a single top-level frontmatter key (the remainder of
2606/// the key line plus any indented continuation/sequence lines), trimmed. Used to
2607/// decide whether a `link to` field holds a plain string vs. a wiki-link.
2608fn frontmatter_raw_value_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Option<String> {
2609    for (k, value_text, _links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2610        if k == key {
2611            return Some(value_text);
2612        }
2613    }
2614    None
2615}
2616
2617/// Split a frontmatter YAML block into `(key, raw_value_text, wiki_links)` for
2618/// each top-level key. A top-level key is a line with no leading indentation in
2619/// `name:` form; its value spans the rest of that line plus any deeper-indented
2620/// continuation lines (block scalars, block sequences) until the next top-level
2621/// key. Wiki-links are every `[[...]]` found anywhere in that span, with their
2622/// absolute file line.
2623fn frontmatter_key_blocks(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, String, Vec<Link>)> {
2624    let mut blocks: Vec<(String, String, Vec<Link>)> = Vec::new();
2625    let mut current: Option<(String, String, Vec<Link>)> = None;
2626
2627    for (idx, raw_line) in fm_yaml.lines().enumerate() {
2628        let file_line = fm_start_line + idx as u32;
2629        let indented = raw_line.starts_with(' ') || raw_line.starts_with('\t');
2630        let trimmed = raw_line.trim();
2631
2632        // A new top-level key: no indentation, `name:` prefix, not a list dash or
2633        // comment. (Indented or dash lines belong to the current key's value.)
2634        let new_key = if !indented && !trimmed.starts_with('#') && !trimmed.starts_with('-') {
2635            top_level_key(raw_line)
2636        } else {
2637            None
2638        };
2639
2640        if let Some((key, after)) = new_key {
2641            if let Some(done) = current.take() {
2642                blocks.push(done);
2643            }
2644            let mut links = Vec::new();
2645            collect_line_links(after, file_line, &mut links);
2646            current = Some((key, after.trim().to_string(), links));
2647        } else if let Some((_k, value_text, links)) = current.as_mut() {
2648            // Continuation of the current key's value (indented or dash line).
2649            if !value_text.is_empty() {
2650                value_text.push('\n');
2651            }
2652            value_text.push_str(trimmed);
2653            collect_line_links(raw_line, file_line, links);
2654        }
2655    }
2656    if let Some(done) = current.take() {
2657        blocks.push(done);
2658    }
2659    blocks
2660}
2661
2662/// Parse a top-level frontmatter key line into `(key, value_after_colon)`.
2663/// `None` if the line isn't a `name:` mapping entry.
2664fn top_level_key(line: &str) -> Option<(String, &str)> {
2665    let (key, rest) = line.split_once(':')?;
2666    let key = key.trim();
2667    if key.is_empty()
2668        || !key
2669            .chars()
2670            .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
2671    {
2672        return None;
2673    }
2674    Some((key.to_string(), rest))
2675}
2676
2677/// Append every `[[target]]` / `[[target|display]]` found in `s` to `links`,
2678/// each tagged with `file_line`.
2679fn collect_line_links(s: &str, file_line: u32, links: &mut Vec<Link>) {
2680    let bytes = s.as_bytes();
2681    let mut i = 0;
2682    while i + 1 < bytes.len() {
2683        if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2684            if let Some(close) = s[i + 2..].find("]]") {
2685                let inner = &s[i + 2..i + 2 + close];
2686                // Guard against `[[[` (nested) double-counting: the inner must
2687                // not itself open another `[[`.
2688                let target = inner
2689                    .trim_start_matches('[')
2690                    .split('|')
2691                    .next()
2692                    .unwrap_or(inner)
2693                    .trim()
2694                    .to_string();
2695                if !target.is_empty() {
2696                    links.push(Link {
2697                        target,
2698                        line: file_line,
2699                    });
2700                }
2701                i = i + 2 + close + 2;
2702                continue;
2703            }
2704        }
2705        i += 1;
2706    }
2707}
2708
2709/// Extract every `[[...]]` wiki-link from a body, with 1-based line numbers.
2710/// Skips fenced code blocks, so example links in docs don't trip the validator.
2711///
2712/// Fence tracking matches the toolkit's parser ([`crate::parser`]'s
2713/// `extract_sections`): an open fence is `(fence char, run length)` and closes
2714/// only on a line that is the **same** fence character with a run **at least as
2715/// long**. A naive "toggle a bool on any ``` or ~~~ line" inverts the state when
2716/// a `~~~` block legally contains a ```` ``` ```` line (the standard way to
2717/// document a backtick fence) — the inner backtick line would flip `in_fence`
2718/// off and the demo `[[…]]` inside the code block would be checked as a live
2719/// link, falsely flagging a legal store.
2720fn extract_wiki_links(body: &str) -> Vec<Link> {
2721    let mut out = Vec::new();
2722    let mut fence: Option<(u8, usize)> = None;
2723    for (idx, line) in body.lines().enumerate() {
2724        let content = line.trim_end_matches('\r');
2725        if let Some(f) = fence {
2726            // Inside a fence: the only thing that matters is whether THIS line
2727            // closes it (matching char, run ≥ the opening run). Everything else
2728            // is opaque code — no link extraction.
2729            if fence_closes(content, f) {
2730                fence = None;
2731            }
2732            continue;
2733        }
2734        if let Some(opened) = fence_opens(content) {
2735            fence = Some(opened);
2736            continue;
2737        }
2738        let line_no = (idx + 1) as u32;
2739        let bytes = line.as_bytes();
2740        let mut i = 0;
2741        while i + 1 < bytes.len() {
2742            if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2743                if let Some(close) = line[i + 2..].find("]]") {
2744                    let inner = &line[i + 2..i + 2 + close];
2745                    let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
2746                    // Skip a triple-bracket `[[[…` opening: the inner content
2747                    // starts with `[`, so this is the rejected flow-form list
2748                    // mis-encoding (`[[[a]], [[b]]]`), not a real wiki-link. A
2749                    // legitimate target never starts with `[`. The frontmatter
2750                    // `WIKI_LINK_FLOW_FORM_LIST` check already owns that error;
2751                    // extracting a bogus body link here would double-report it as
2752                    // a spurious `WIKI_LINK_SHORT_FORM`.
2753                    if !target.is_empty() && !target.starts_with('[') {
2754                        out.push(Link {
2755                            target,
2756                            line: line_no,
2757                        });
2758                    }
2759                    i = i + 2 + close + 2;
2760                    continue;
2761                }
2762            }
2763            i += 1;
2764        }
2765    }
2766    out
2767}
2768
2769/// If `line` opens a fenced code block, return `(fence byte, run length)`. A
2770/// local mirror of the parser's `opening_fence` so the validator's fence
2771/// tracking matches the rest of the toolkit: a fence is ``` ``` ``` or `~~~`
2772/// (run ≥ 3) at ≤ 3 spaces of indent, and a backtick fence's info string may
2773/// not itself contain a backtick.
2774fn fence_opens(line: &str) -> Option<(u8, usize)> {
2775    let indent = line.len() - line.trim_start_matches(' ').len();
2776    if indent > 3 {
2777        return None;
2778    }
2779    let rest = &line[indent..];
2780    let byte = rest.bytes().next()?;
2781    if byte != b'`' && byte != b'~' {
2782        return None;
2783    }
2784    let run = rest.len() - rest.trim_start_matches(byte as char).len();
2785    if run < 3 {
2786        return None;
2787    }
2788    // A backtick fence's info string may not itself contain a backtick.
2789    if byte == b'`' && rest[run..].contains('`') {
2790        return None;
2791    }
2792    Some((byte, run))
2793}
2794
2795/// True if `line` closes the currently open `fence`: same char, run at least as
2796/// long, nothing but trailing whitespace after. Local mirror of the parser's
2797/// `is_closing_fence` — so an inner fence of the *other* character (a ``` ``` ```
2798/// line inside a `~~~` block) does NOT close the outer fence.
2799fn fence_closes(line: &str, fence: (u8, usize)) -> bool {
2800    let (byte, open_len) = fence;
2801    let indent = line.len() - line.trim_start_matches(' ').len();
2802    if indent > 3 {
2803        return false;
2804    }
2805    let rest = &line[indent..];
2806    let run = rest.len() - rest.trim_start_matches(byte as char).len();
2807    if run < open_len {
2808        return false;
2809    }
2810    rest[run..].trim().is_empty()
2811}
2812
2813/// Detect the frontmatter INLINE flow-form wiki-link-list mis-encoding —
2814/// `attendees: [[[a]], [[b]]]` — and return the offending keys.
2815///
2816/// **Scoped to the inline value on the key line.** The SPEC's canonical
2817/// list-of-links form is the *unquoted YAML block sequence* (`- [[a]]` per
2818/// indented line), which is explicitly correct (SPEC § Linking) and MUST NOT be
2819/// flagged — even though, parsed whole, it nests the same way the rejected
2820/// inline flow form does. So this check looks only at the value written *inline*
2821/// after the colon: if it opens a flow sequence (`[…]`) whose parsed shape is a
2822/// nested sequence (a list whose items are themselves lists — the wiki-link-list
2823/// mis-encoding), it is flagged. A key with no inline value (the block form,
2824/// whose items live on continuation lines) is never inspected here.
2825///
2826/// Parsing the inline value (rather than a literal `starts_with("[[[")` text
2827/// test) is what catches the whitespace variant `attendees: [ [[a]] ]`, which
2828/// encodes the identical nested sequence but evaded the old prefix match.
2829fn detect_flow_form_link_lists(fm_yaml: &str) -> Vec<String> {
2830    let mut out = Vec::new();
2831    for line in fm_yaml.lines() {
2832        // Top-level key lines only (no indentation, not a comment or list dash).
2833        if line.starts_with(' ') || line.starts_with('\t') {
2834            continue;
2835        }
2836        let Some((key, rest)) = line.split_once(':') else {
2837            continue;
2838        };
2839        let key = key.trim();
2840        if key.is_empty()
2841            || key.starts_with('#')
2842            || key.starts_with('-')
2843            || !key
2844                .chars()
2845                .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
2846        {
2847            continue;
2848        }
2849        let rest = rest.trim();
2850        // Only an inline flow sequence (`[…]`) on the key line is a candidate;
2851        // the unquoted block form has an empty inline value and is never flagged.
2852        if !rest.starts_with('[') {
2853            continue;
2854        }
2855        // Parse just the inline value and test its shape: a list whose items are
2856        // themselves lists is the wiki-link-list mis-encoding (`[[[a]]]` parses
2857        // to `Seq[Seq[Seq[String]]]`; the scalar inline link `[[a]]` is only
2858        // `Seq[Seq[String]]` and is NOT flagged).
2859        if let Ok(Value::Sequence(items)) = serde_norway::from_str::<Value>(rest) {
2860            let nested = items.iter().any(|item| match item {
2861                Value::Sequence(inner) => inner.iter().any(|x| matches!(x, Value::Sequence(_))),
2862                _ => false,
2863            });
2864            if nested {
2865                out.push(key.to_string());
2866            }
2867        }
2868    }
2869    out
2870}
2871
2872/// True if a bare target (no `.md`) is a full store-relative path: it contains a
2873/// `/` and its first segment is a known layer.
2874fn is_full_store_path(bare: &str) -> bool {
2875    let mut parts = bare.splitn(2, '/');
2876    let first = parts.next().unwrap_or("");
2877    let has_rest = parts.next().map(|r| !r.is_empty()).unwrap_or(false);
2878    matches!(first, "sources" | "records") && has_rest
2879}
2880
2881/// True if a path contains only normal relative components. Validator inputs
2882/// come from user-authored markdown/JSON sidecars; never let absolute paths,
2883/// platform prefixes, or `..` turn a validation probe into a filesystem escape.
2884fn is_safe_store_relative_path(path: &Path) -> bool {
2885    let mut saw_component = false;
2886    for component in path.components() {
2887        match component {
2888            Component::Normal(_) => saw_component = true,
2889            Component::CurDir => {}
2890            Component::ParentDir | Component::RootDir | Component::Prefix(_) => return false,
2891        }
2892    }
2893    saw_component
2894}
2895
2896fn safe_md_target_rel(bare: &str) -> Option<PathBuf> {
2897    let path = Path::new(bare);
2898    if !is_safe_store_relative_path(path) {
2899        return None;
2900    }
2901    Some(PathBuf::from(format!("{bare}.md")))
2902}
2903
2904/// How a wiki-link / index-entry target resolves on disk.
2905enum TargetResolution {
2906    /// The target exists (either as the literal path or with a `.md` suffix).
2907    Exists,
2908    /// The target is a safe store-relative path but no file exists for it.
2909    Missing,
2910    /// The target escapes the store (absolute, `..`, prefix) — never probe it.
2911    Unsafe,
2912}
2913
2914/// Resolve a bare wiki-link / index-entry target the way the graph engine does
2915/// ([`crate::graph`]'s `resolve_existing`): try the path **as written** first
2916/// (so a link to a raw non-`.md` source file kept verbatim under `sources/` —
2917/// `[[sources/emails/x.eml]]`, `[[sources/contracts/y.pdf]]` — resolves to the
2918/// real file), then the `.md`-appended path (the common case for content
2919/// pages). Without trying the literal path first, a legal link to a raw source
2920/// file is wrongly flagged `WIKI_LINK_BROKEN` even though `graph backlinks`
2921/// resolves it.
2922fn resolve_wiki_target(store: &Store, bare: &str) -> TargetResolution {
2923    // The literal path and the `.md`-appended path share the same safety check
2924    // (`safe_md_target_rel` only differs by appending `.md`), so an unsafe bare
2925    // target is unsafe in both forms.
2926    if !is_safe_store_relative_path(Path::new(bare)) {
2927        return TargetResolution::Unsafe;
2928    }
2929    match resolved_target_abs(store, bare) {
2930        Some(_) => TargetResolution::Exists,
2931        None => TargetResolution::Missing,
2932    }
2933}
2934
2935/// The absolute on-disk path a bare wiki-link / index-entry target resolves to,
2936/// trying the literal path first, then `.md`-appended — mirroring the graph
2937/// engine. `None` when neither exists, or when the bare target escapes the store
2938/// (callers that need to distinguish unsafe from merely-missing use
2939/// [`resolve_wiki_target`]).
2940fn resolved_target_abs(store: &Store, bare: &str) -> Option<PathBuf> {
2941    if !is_safe_store_relative_path(Path::new(bare)) {
2942        return None;
2943    }
2944    // The literal path, as written (e.g. an `.eml`/`.pdf` source file kept
2945    // verbatim under `sources/`).
2946    let literal = store.root.join(bare);
2947    if literal.is_file() {
2948        return Some(literal);
2949    }
2950    // The `.md`-appended path (a content page referenced without its extension).
2951    let with_md = store.root.join(format!("{bare}.md"));
2952    if with_md.is_file() {
2953        return Some(with_md);
2954    }
2955    None
2956}
2957
2958/// True if a bare target path is under `prefix` (both `.md`-stripped).
2959fn path_under_prefix(bare: &str, prefix: &str) -> bool {
2960    let prefix = prefix.trim_end_matches('/');
2961    bare == prefix || bare.starts_with(&format!("{prefix}/"))
2962}
2963
2964/// The type-folder for a store-relative content path: `<layer>/<type-folder>`
2965/// (the folder directly under the layer; date-shards roll up to it). `None` for
2966/// files directly in a layer folder or outside the two layers.
2967fn type_folder_of(rel: &Path) -> Option<PathBuf> {
2968    let comps: Vec<&str> = rel.iter().filter_map(|s| s.to_str()).collect();
2969    if comps.len() < 3 {
2970        return None; // need layer/type-folder/file at minimum
2971    }
2972    if !matches!(comps[0], "sources" | "records") {
2973        return None;
2974    }
2975    Some(PathBuf::from(comps[0]).join(comps[1]))
2976}
2977
2978/// **SWEEP.** Walk every `.md` content file under `sources/`/`records/`,
2979/// returning store-relative paths to be parsed in full. Skips hidden dirs and
2980/// the index twin (`index.jsonl`). Used only by `validate_all`; the working-set
2981/// incoming-linker scan rides the embedded-ripgrep `Store::find_links_to_any`
2982/// (a single presence-only pass), so the loop default never walks-and-*parses*
2983/// the whole content tree.
2984///
2985/// **`log/` is NOT pruned here.** Only the *root-level* `log/` rotation archive
2986/// is reserved (`Store::is_in_log_dir` checks only the first path component);
2987/// the walk roots are the two layers, so the root archive is already out of
2988/// scope. A `log`-named folder *inside* a layer (e.g. `records/log/` — a
2989/// decision log) is real content (see `is_content_file`), so pruning every
2990/// `name == "log"` made `--all` silently skip those files — reporting fewer
2991/// errors than the default working-set scope on the same store.
2992fn walk_content_files(root: &Path) -> Vec<PathBuf> {
2993    let mut out = Vec::new();
2994    for layer in ["sources", "records"] {
2995        let base = root.join(layer);
2996        if !base.is_dir() {
2997            continue;
2998        }
2999        for entry in walkdir::WalkDir::new(&base)
3000            .into_iter()
3001            .filter_entry(|e| {
3002                let name = e.file_name().to_str().unwrap_or("");
3003                !name.starts_with('.')
3004            })
3005            .flatten()
3006        {
3007            if !entry.file_type().is_file() {
3008                continue;
3009            }
3010            let name = entry.file_name().to_str().unwrap_or("");
3011            if name.ends_with(".md") && name != "index.md" {
3012                if let Ok(rel) = entry.path().strip_prefix(root) {
3013                    out.push(rel.to_path_buf());
3014                }
3015            }
3016        }
3017    }
3018    out.sort();
3019    out
3020}
3021
3022/// Every `index.md` under the store (root + layers + type-folders), as
3023/// store-relative paths. Used to detect orphan indexes. Like
3024/// [`walk_content_files`], a `log`-named folder *inside* a layer is real content
3025/// and its `index.md` is not pruned (only the root-level `log/` archive is
3026/// reserved, and the walk roots are the two layers, so it is already
3027/// out of scope).
3028fn walk_index_files(root: &Path) -> Vec<PathBuf> {
3029    let mut out = Vec::new();
3030    if root.join("index.md").is_file() {
3031        out.push(PathBuf::from("index.md"));
3032    }
3033    for layer in ["sources", "records"] {
3034        let base = root.join(layer);
3035        if !base.is_dir() {
3036            continue;
3037        }
3038        for entry in walkdir::WalkDir::new(&base)
3039            .into_iter()
3040            .filter_entry(|e| {
3041                let name = e.file_name().to_str().unwrap_or("");
3042                !name.starts_with('.')
3043            })
3044            .flatten()
3045        {
3046            if entry.file_type().is_file() && entry.file_name().to_str() == Some("index.md") {
3047                if let Ok(rel) = entry.path().strip_prefix(root) {
3048                    out.push(rel.to_path_buf());
3049                }
3050            }
3051        }
3052    }
3053    out.sort();
3054    out
3055}
3056
3057/// A parsed `index.md` entry line: the wiki-link target, the optional summary
3058/// text after the `—`, and the 1-based line number.
3059struct IndexEntry {
3060    target: String,
3061    summary_text: Option<String>,
3062    line: u32,
3063}
3064
3065/// Parse the `- [[<path>]] — <summary>` entry lines of an `index.md`. Stops at a
3066/// `## More` footer (those lines aren't file entries). Root/layer entries with a
3067/// `|display` segment and a `(N)` count are parsed too — the target is the bare
3068/// path, the summary text is whatever follows the em dash.
3069fn parse_index_entries(text: &str) -> Vec<IndexEntry> {
3070    let mut out = Vec::new();
3071    let mut in_more = false;
3072    for (idx, line) in text.lines().enumerate() {
3073        let trimmed = line.trim_start();
3074        if trimmed.starts_with("## More") {
3075            in_more = true;
3076            continue;
3077        }
3078        if in_more {
3079            continue;
3080        }
3081        if !trimmed.starts_with("- ") {
3082            continue;
3083        }
3084        // Find the first `[[...]]`.
3085        let Some(open) = trimmed.find("[[") else {
3086            continue;
3087        };
3088        let Some(close_rel) = trimmed[open + 2..].find("]]") else {
3089            continue;
3090        };
3091        let inner = &trimmed[open + 2..open + 2 + close_rel];
3092        let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
3093
3094        // Summary text: whatever follows the first em dash (`—`) or ` - `.
3095        let after = &trimmed[open + 2 + close_rel + 2..];
3096        let summary_text = extract_index_entry_summary(after);
3097
3098        out.push(IndexEntry {
3099            target,
3100            summary_text,
3101            line: (idx + 1) as u32,
3102        });
3103    }
3104    out
3105}
3106
3107/// Pull the summary portion out of the text trailing an index entry's
3108/// wiki-link: drop a leading `(N files)` count, then the `—`/`-` separator, then
3109/// strip a trailing `  ·  #tag` suffix **only when it is a genuine tag block**
3110/// (so a literal `·` inside the summary text is preserved, not mistaken for the
3111/// renderer's tag separator).
3112fn extract_index_entry_summary(after: &str) -> Option<String> {
3113    let mut s = after.trim();
3114    // Drop a leading "(N ...)" count segment, if present.
3115    if s.starts_with('(') {
3116        if let Some(close) = s.find(')') {
3117            s = s[close + 1..].trim_start();
3118        }
3119    }
3120    // Require an em dash or hyphen separator before the summary.
3121    let s = if let Some(rest) = s.strip_prefix('—') {
3122        rest.trim()
3123    } else if let Some(rest) = s.strip_prefix('-') {
3124        rest.trim()
3125    } else {
3126        return None;
3127    };
3128    if s.is_empty() {
3129        return None;
3130    }
3131    // Strip a trailing tag block — but ONLY when it matches the EXACT delimiter
3132    // the renderer emits: `  ·  #tag #tag` (a *double*-spaced middot, per
3133    // `crate::index::format_md_entry`'s `format!("  ·  {tags}")`), dropped when
3134    // the file has no tags. The previous code also accepted a *single*-spaced
3135    // ` · ` separator, which collided with a legal summary whose own text ends
3136    // in a single-spaced middot-plus-hashtag tail — e.g. a tagless file with
3137    // `summary: "Standup notes · #standup"`. The renderer round-trips that
3138    // summary verbatim (no tag block, since there are no tags), but the loose
3139    // strip mistook the ` · #standup` for the renderer's tag suffix, compared
3140    // `"Standup notes"` against the file's full summary, and emitted a spurious
3141    // `INDEX_SUMMARY_MISMATCH` that `dbmd index rebuild` could never fix
3142    // (rebuild regenerates the identical line). Matching the renderer's exact
3143    // double-spaced delimiter makes the comparison round-trip. `rsplit_once`
3144    // matches from the right so only the real trailing tag block is considered.
3145    let s = match s.rsplit_once("  ·  ") {
3146        Some((summary, tags)) if is_tag_suffix(tags) => summary.trim(),
3147        _ => s,
3148    };
3149    Some(s.to_string())
3150}
3151
3152/// True if `s` is a non-empty tag block: one or more whitespace-separated tokens
3153/// each starting with `#`, the exact shape the index renderer appends after the
3154/// `·` separator (`crate::index::format_md_entry`). Used to distinguish the
3155/// renderer's `  ·  #tag` suffix from a literal `·` inside the summary text.
3156fn is_tag_suffix(s: &str) -> bool {
3157    let mut any = false;
3158    for tok in s.split_whitespace() {
3159        if !tok.starts_with('#') || tok.len() < 2 {
3160            return false;
3161        }
3162        any = true;
3163    }
3164    any
3165}
3166
3167/// Parse a `log.md` entry header `## [YYYY-MM-DD HH:MM] <kind> | <object>`.
3168/// Returns `(timestamp, kind, object)`; `None` if the timestamp is unparseable
3169/// or the header isn't well-formed.
3170fn parse_log_header(line: &str) -> Option<(DateTime<FixedOffset>, String, Option<String>)> {
3171    let rest = line.strip_prefix("## [")?;
3172    let close = rest.find(']')?;
3173    let ts_str = &rest[..close];
3174    let tail = rest[close + 1..].trim();
3175
3176    // Parse `YYYY-MM-DD HH:MM` (the SPEC header form) as a naive local time and
3177    // attach a zero offset — the log header carries minute precision, no zone.
3178    let naive = NaiveDateTime::parse_from_str(ts_str.trim(), "%Y-%m-%d %H:%M").ok()?;
3179    let offset = FixedOffset::east_opt(0)?;
3180    let ts = naive.and_local_timezone(offset).single()?;
3181
3182    // kind | object
3183    let (kind, object) = match tail.split_once('|') {
3184        Some((k, o)) => {
3185            let o = o.trim();
3186            (
3187                k.trim().to_string(),
3188                if o.is_empty() {
3189                    None
3190                } else {
3191                    Some(o.to_string())
3192                },
3193            )
3194        }
3195        None => (tail.to_string(), None),
3196    };
3197    if kind.is_empty() {
3198        return None;
3199    }
3200    Some((ts, kind, object))
3201}
3202
3203/// Every log file that holds entries for the working-set scan: the active
3204/// `log.md` plus every `log/<YYYY-MM>.md` archive. [`Log::append`] rotates
3205/// strictly-prior-month entries into the archives, so the active file alone is
3206/// NOT the full timeline — both the last `validate` cutoff and a changed-but-
3207/// unvalidated object can live in an archive after a month rollover. Reading the
3208/// archives here keeps the working-set readers in sync with the rest of the log
3209/// layer (`Log::since`/`Log::tail`), which deliberately cross archives, and
3210/// prevents `dbmd validate` from silently skipping archived changed files. Reads
3211/// only log headers, never the content store, so the loop budget is preserved.
3212fn log_files_for_working_set(store: &Store) -> Vec<PathBuf> {
3213    let mut files = vec![store.root.join("log.md")];
3214    let archive_dir = store.root.join("log");
3215    if let Ok(entries) = std::fs::read_dir(&archive_dir) {
3216        let mut archives: Vec<PathBuf> = entries
3217            .flatten()
3218            .map(|e| e.path())
3219            .filter(|p| {
3220                p.is_file()
3221                    && p.file_name()
3222                        .and_then(|s| s.to_str())
3223                        .and_then(|n| n.strip_suffix(".md"))
3224                        .is_some_and(is_year_month_archive)
3225            })
3226            .collect();
3227        // Deterministic order (oldest month first); the callers fold across all
3228        // files so order doesn't affect the result, but a stable order keeps the
3229        // scan reproducible.
3230        archives.sort();
3231        files.extend(archives);
3232    }
3233    files
3234}
3235
3236/// True if `s` looks like a `YYYY-MM` archive stem (4 digits, `-`, 2 digits) —
3237/// the `log/<YYYY-MM>.md` naming the rotation in [`crate::log`] emits.
3238fn is_year_month_archive(s: &str) -> bool {
3239    let b = s.as_bytes();
3240    b.len() == 7
3241        && b[..4].iter().all(u8::is_ascii_digit)
3242        && b[4] == b'-'
3243        && b[5..7].iter().all(u8::is_ascii_digit)
3244}
3245
3246/// The timestamp of the most recent `validate` entry across the active `log.md`
3247/// **and** the `log/<YYYY-MM>.md` archives — the default working-set cutoff.
3248/// Reads only headers; never the whole store. Archive-aware so a `validate`
3249/// entry that rotated into an archive after a month rollover still anchors the
3250/// cutoff (without this, the cutoff silently resets to `None`).
3251fn last_validate_at(store: &Store) -> Option<DateTime<FixedOffset>> {
3252    let mut latest: Option<DateTime<FixedOffset>> = None;
3253    for file in log_files_for_working_set(store) {
3254        let Ok(text) = std::fs::read_to_string(&file) else {
3255            continue;
3256        };
3257        for line in text.lines() {
3258            if !line.starts_with("## [") {
3259                continue;
3260            }
3261            if let Some((ts, kind, _)) = parse_log_header(line) {
3262                if kind == "validate" {
3263                    latest = Some(match latest {
3264                        Some(p) if p >= ts => p,
3265                        _ => ts,
3266                    });
3267                }
3268            }
3269        }
3270    }
3271    latest
3272}
3273
3274/// The set of content objects changed since `cutoff`, read from log entries
3275/// whose kind mutates a file. When `cutoff` is `None`, every mutating entry
3276/// counts (no prior validate window). Returns store-relative `.md` paths.
3277///
3278/// Scans the active `log.md` **and** every `log/<YYYY-MM>.md` archive: after a
3279/// month rollover [`Log::append`] rotates prior-month entries out of the active
3280/// file, so an object changed-but-never-validated in a prior month lives only in
3281/// an archive. Reading the archives here is what keeps `dbmd validate` from
3282/// silently skipping those files. Reads only log headers, never the content
3283/// store.
3284fn changed_objects_since(
3285    store: &Store,
3286    cutoff: Option<DateTime<FixedOffset>>,
3287) -> BTreeSet<PathBuf> {
3288    let mut out = BTreeSet::new();
3289    for file in log_files_for_working_set(store) {
3290        let Ok(text) = std::fs::read_to_string(&file) else {
3291            continue;
3292        };
3293        for line in text.lines() {
3294            if !line.starts_with("## [") {
3295                continue;
3296            }
3297            let Some((ts, kind, object)) = parse_log_header(line) else {
3298                continue;
3299            };
3300            if let Some(c) = cutoff {
3301                if ts < c {
3302                    continue;
3303                }
3304            }
3305            if !matches!(
3306                kind.as_str(),
3307                "create" | "update" | "ingest" | "rename" | "delete" | "link"
3308            ) {
3309                continue;
3310            }
3311            if let Some(obj) = object {
3312                // The object slot is a store-relative path (or a wiki-link target).
3313                let bare = obj
3314                    .trim()
3315                    .trim_start_matches("[[")
3316                    .trim_end_matches("]]")
3317                    .split('|')
3318                    .next()
3319                    .unwrap_or("")
3320                    .trim()
3321                    .trim_end_matches(".md")
3322                    .to_string();
3323                if bare.is_empty() {
3324                    continue;
3325                }
3326                out.insert(PathBuf::from(format!("{bare}.md")));
3327            }
3328        }
3329    }
3330    out
3331}
3332
3333/// The result of the [`derived_from_ignored_type`] policy check: the
3334/// `derived_from` target that resolves to an ignored-type record, plus that
3335/// record's type. Carries exactly what both the validate finding and the
3336/// write-time warning need to render their message.
3337#[derive(Debug, Clone, PartialEq, Eq)]
3338pub struct DerivedFromIgnored {
3339    /// The `derived_from` wiki-link target as written (bare store-relative path,
3340    /// no `.md`).
3341    pub target: String,
3342    /// The resolved `type` of that target, which is present in
3343    /// `store.config.ignored_types`.
3344    pub target_type: String,
3345}
3346
3347/// **The single authoritative `### Ignored types` derivation check.** Decides
3348/// whether a conclusion record derives from an ignored-type record: the
3349/// `meta-type` must be `conclusion`, `### Ignored types` must be non-empty, and
3350/// some `derived_from` target must resolve to a record whose `type` is in
3351/// `ignored_types`. Returns the first such target (and its type), or `None`.
3352///
3353/// Both surfaces call this so the policy lives in exactly one place:
3354/// [`check_content_file`] (read side — `dbmd validate`) feeds it the
3355/// `derived_from` targets it scanned from the raw frontmatter, and the write
3356/// surface (`dbmd write`) feeds it the targets from the composed frontmatter.
3357/// The link *extraction* differs per surface (text-scan with line numbers vs.
3358/// the parsed `Frontmatter`); the *decision* — type gate, target-type
3359/// resolution, and `ignored_types` membership — does not.
3360pub fn derived_from_ignored_type<I, S>(
3361    store: &Store,
3362    meta_type: &str,
3363    derived_from_targets: I,
3364) -> Option<DerivedFromIgnored>
3365where
3366    I: IntoIterator<Item = S>,
3367    S: AsRef<str>,
3368{
3369    if meta_type != "conclusion" || store.config.ignored_types.is_empty() {
3370        return None;
3371    }
3372    for target in derived_from_targets {
3373        let target = target.as_ref();
3374        if let Some(target_type) = link_target_type(store, target) {
3375            if store.config.ignored_types.contains(&target_type) {
3376                return Some(DerivedFromIgnored {
3377                    target: target.to_string(),
3378                    target_type,
3379                });
3380            }
3381        }
3382    }
3383    None
3384}
3385
3386/// Resolve the `type` of a wiki-link target file (bare, no `.md`), or `None`.
3387fn link_target_type(store: &Store, target: &str) -> Option<String> {
3388    let bare = target.trim_end_matches(".md");
3389    let abs = store.root.join(safe_md_target_rel(bare)?);
3390    let text = std::fs::read_to_string(&abs).ok()?;
3391    let (yaml, _, _) = split_frontmatter(&text)?;
3392    let value: Value = serde_norway::from_str(&yaml).ok()?;
3393    if let Value::Mapping(m) = value {
3394        m.get(Value::String("type".into())).and_then(scalar_string)
3395    } else {
3396        None
3397    }
3398}
3399
3400// ── Shape validators ─────────────────────────────────────────────────────────
3401
3402/// True if a string is RFC3339 / ISO-8601 with a time + zone (the
3403/// `created`/`updated` contract: `2026-05-27T08:00:00-07:00`).
3404fn is_iso8601(s: &str) -> bool {
3405    DateTime::parse_from_rfc3339(s.trim()).is_ok()
3406}
3407
3408/// True if a string is an ISO-8601 *date* (`2026-05-27`) or a full RFC3339
3409/// datetime. Type-specific date fields (`expense.date`, `contact.last_touch`)
3410/// accept the date-only form per the SPEC's worked example.
3411fn is_iso8601_date_or_datetime(s: &str) -> bool {
3412    let s = s.trim();
3413    if DateTime::parse_from_rfc3339(s).is_ok() {
3414        return true;
3415    }
3416    chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_ok()
3417}
3418
3419/// True for `<local>@<domain>` with a non-empty local part and a dotted domain.
3420/// There must be exactly one `@`: a domain that still contains an `@` after the
3421/// split (the common double-`@` typo `sarah@@acme.com`, or `a@b@c.com`) is
3422/// rejected — without this the domain `@acme.com` passed every other check.
3423fn is_email(s: &str) -> bool {
3424    let s = s.trim();
3425    let Some((local, domain)) = s.split_once('@') else {
3426        return false;
3427    };
3428    !local.is_empty()
3429        && !domain.contains('@')
3430        && domain.contains('.')
3431        && !domain.starts_with('.')
3432        && !domain.ends_with('.')
3433        && !domain.contains(' ')
3434        && !local.contains(' ')
3435}
3436
3437/// True for a currency amount: an optional symbol or 3-letter ISO code, then a
3438/// plain decimal number with optional thousands separators and ≤ 2 decimals.
3439///
3440/// The numeric part is validated by hand (not `f64::parse`) so the non-numeric
3441/// floats `f64` accepts — `inf`, `-inf`, `NaN`, and `1e3`-style exponents — are
3442/// rejected, and the ≤ 2-decimal rule is actually enforced.
3443fn is_currency(s: &str) -> bool {
3444    let mut t = s.trim();
3445    // Strip a leading currency symbol …
3446    for sym in ["$", "€", "£", "¥"] {
3447        if let Some(rest) = t.strip_prefix(sym) {
3448            t = rest.trim_start();
3449            break;
3450        }
3451    }
3452    // … or a leading 3-letter ISO-4217-ish code (`USD 100`, `EUR 9.50`). The
3453    // code must be exactly three ASCII letters and separated from the number by
3454    // whitespace, so a bare `USD` with no amount still fails.
3455    if let Some((head, rest)) = t.split_once(char::is_whitespace) {
3456        if head.len() == 3 && head.chars().all(|c| c.is_ascii_alphabetic()) {
3457            t = rest.trim_start();
3458        }
3459    }
3460
3461    let cleaned: String = t.chars().filter(|c| *c != ',').collect();
3462    is_plain_amount(cleaned.trim())
3463}
3464
3465/// True for a bare decimal amount: optional sign, ≥ 1 digit, an optional
3466/// fractional part of 1–2 digits. No exponents, no `inf`/`NaN`, no empty string.
3467fn is_plain_amount(s: &str) -> bool {
3468    let digits = s.strip_prefix(['+', '-']).unwrap_or(s);
3469    let (int_part, frac_part) = match digits.split_once('.') {
3470        Some((i, f)) => (i, Some(f)),
3471        None => (digits, None),
3472    };
3473    if int_part.is_empty() || !int_part.bytes().all(|b| b.is_ascii_digit()) {
3474        return false;
3475    }
3476    match frac_part {
3477        None => true,
3478        Some(f) => (1..=2).contains(&f.len()) && f.bytes().all(|b| b.is_ascii_digit()),
3479    }
3480}
3481
3482/// True for an http(s) URL: a recognized scheme prefix with at least one
3483/// character after it. The length guard uses the *matched* scheme's own length,
3484/// so a single-character host on the shorter `http://` scheme (`http://x`, 8
3485/// bytes — e.g. an intranet/container hostname) is accepted; a bare scheme with
3486/// nothing after it (`http://`, `https://`) is rejected.
3487fn is_url(s: &str) -> bool {
3488    let s = s.trim();
3489    for scheme in ["http://", "https://"] {
3490        if let Some(rest) = s.strip_prefix(scheme) {
3491            return !rest.is_empty();
3492        }
3493    }
3494    false
3495}
3496
3497/// A short, deterministic suggestion for a `SCHEMA_SHAPE_MISMATCH`.
3498fn shape_suggestion(shape: Shape) -> String {
3499    match shape {
3500        Shape::String => "use a scalar string".into(),
3501        Shape::Int => "use an integer".into(),
3502        Shape::Bool => "use `true` or `false`".into(),
3503        Shape::Date => "use an ISO-8601 date, e.g. 2026-05-27".into(),
3504        Shape::Email => "use a `<local>@<domain>` address".into(),
3505        Shape::Currency => "use a numeric amount, e.g. 1234.56".into(),
3506        Shape::Url => "use an http(s) URL".into(),
3507    }
3508}
3509
3510/// Suggest a full-path rewrite for a short-form wiki-link. Without the layer we
3511/// can't know the folder, so the suggestion is generic but actionable.
3512fn short_form_suggestion(bare: &str) -> Option<String> {
3513    Some(format!(
3514        "use a full store-relative path, e.g. [[records/contacts/{}]]",
3515        slugish(bare)
3516    ))
3517}
3518
3519/// A filesystem-ish leaf for a plain string (lowercase, spaces → hyphens).
3520fn slugish(s: &str) -> String {
3521    s.trim()
3522        .to_lowercase()
3523        .chars()
3524        .map(|c| if c.is_whitespace() { '-' } else { c })
3525        .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '/' || *c == '_')
3526        .collect()
3527}
3528
3529/// Cross-file asset-manifest integrity (the `--all` sweep). Text-only: it never
3530/// hashes a byte or reads an asset file's contents — byte presence and hash
3531/// correctness are `dbmd assets verify`, not `validate`, so a fresh clone with
3532/// no restored bytes still passes. Cross-checks `assets.jsonl` against every
3533/// content file's `asset`/`assets` declarations.
3534fn check_assets(store: &Store, parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
3535    use crate::assets;
3536
3537    let manifest_rel = Path::new(assets::MANIFEST_FILE);
3538    let manifest_abs = store.root.join(assets::MANIFEST_FILE);
3539
3540    // Lenient manifest read: a malformed line is reported, not fatal.
3541    let mut manifest: BTreeMap<String, assets::AssetRecord> = BTreeMap::new();
3542    if let Ok(text) = std::fs::read_to_string(&manifest_abs) {
3543        for (i, line) in text.lines().enumerate() {
3544            if line.trim().is_empty() {
3545                continue;
3546            }
3547            match serde_json::from_str::<assets::AssetRecord>(line) {
3548                Ok(rec) => {
3549                    manifest.insert(rec.path.clone(), rec);
3550                }
3551                Err(e) => push(
3552                    issues,
3553                    Severity::Error,
3554                    codes::ASSET_MANIFEST_MALFORMED,
3555                    manifest_rel,
3556                    Some((i as u32) + 1),
3557                    None,
3558                    format!("invalid {} record: {e}", assets::MANIFEST_FILE),
3559                    Some("run `dbmd assets scan` to rebuild the manifest".to_string()),
3560                    vec![],
3561                ),
3562            }
3563        }
3564    }
3565
3566    // Per-wrapper declarations: every declared asset must be in the manifest and
3567    // must not point at a markdown content file.
3568    let mut declared: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
3569    for (rel, p) in parsed {
3570        let Some(map) = &p.fm else {
3571            continue;
3572        };
3573        for decl in assets::declarations_from_yaml_map(map) {
3574            let norm = match assets::normalize_asset_path(&decl.path) {
3575                Ok(n) => n,
3576                Err(_) => continue, // a bad declared path is surfaced by `scan`, not here
3577            };
3578            declared.insert(norm.clone());
3579            let is_md = Path::new(&norm)
3580                .extension()
3581                .and_then(|e| e.to_str())
3582                .map(|e| e.eq_ignore_ascii_case("md"))
3583                .unwrap_or(false);
3584            if is_md {
3585                push(
3586                    issues,
3587                    Severity::Warning,
3588                    codes::ASSET_PATH_IS_CONTENT,
3589                    rel,
3590                    None,
3591                    Some("asset".to_string()),
3592                    format!("asset path `{norm}` points at a markdown content file"),
3593                    Some("assets are raw binaries; reference a non-markdown path".to_string()),
3594                    vec![PathBuf::from(&norm)],
3595                );
3596            }
3597            if !manifest.contains_key(&norm) {
3598                push(
3599                    issues,
3600                    Severity::Error,
3601                    codes::ASSET_UNDECLARED,
3602                    rel,
3603                    None,
3604                    Some("asset".to_string()),
3605                    format!(
3606                        "references asset `{norm}` with no record in {}",
3607                        assets::MANIFEST_FILE
3608                    ),
3609                    Some("run `dbmd assets scan` to catalog it".to_string()),
3610                    vec![PathBuf::from(&norm)],
3611                );
3612            }
3613        }
3614    }
3615
3616    // Per-record: wrapper existence + orphan detection.
3617    for (path, rec) in &manifest {
3618        for w in &rec.wrappers {
3619            if !store.root.join(w).is_file() {
3620                push(
3621                    issues,
3622                    Severity::Error,
3623                    codes::ASSET_WRAPPER_BROKEN,
3624                    Path::new(path),
3625                    None,
3626                    None,
3627                    format!("manifest record for `{path}` names a missing wrapper `{w}`"),
3628                    Some("run `dbmd assets scan` to reconcile the manifest".to_string()),
3629                    vec![PathBuf::from(w)],
3630                );
3631            }
3632        }
3633        if !declared.contains(path) {
3634            push(
3635                issues,
3636                Severity::Warning,
3637                codes::ASSET_MANIFEST_ORPHAN,
3638                Path::new(path),
3639                None,
3640                None,
3641                format!(
3642                    "`{path}` is in {} but no wrapper references it",
3643                    assets::MANIFEST_FILE
3644                ),
3645                Some("run `dbmd assets scan` to drop the orphan, or add a wrapper".to_string()),
3646                vec![],
3647            );
3648        }
3649    }
3650}
3651
3652/// Push a fully-formed [`Issue`].
3653#[allow(clippy::too_many_arguments)]
3654fn push(
3655    issues: &mut Vec<Issue>,
3656    severity: Severity,
3657    code: &'static str,
3658    file: &Path,
3659    line: Option<u32>,
3660    key: Option<String>,
3661    message: String,
3662    suggestion: Option<String>,
3663    related: Vec<PathBuf>,
3664) {
3665    issues.push(Issue {
3666        severity,
3667        code,
3668        file: file.to_path_buf(),
3669        line,
3670        key,
3671        message,
3672        suggestion,
3673        related,
3674    });
3675}
3676
3677/// 1-based line of a top-level frontmatter key inside the YAML block, offset to
3678/// the file (the YAML starts at file line 2). `None` if not found.
3679fn fm_key_line(fm_yaml: &str, key: &str) -> Option<u32> {
3680    for (i, line) in fm_yaml.lines().enumerate() {
3681        let trimmed = line.trim_start();
3682        // A top-level key line: `key:` with no leading list dash.
3683        if let Some(rest) = trimmed.strip_prefix(key) {
3684            if rest.starts_with(':') && line.starts_with(key) {
3685                // +2: file line 1 is the opening `---`, YAML line 0 → file line 2.
3686                return Some((i as u32) + 2);
3687            }
3688        }
3689    }
3690    None
3691}
3692
3693/// The line a *field-absence* issue (a required key that is missing entirely)
3694/// anchors to: the key's line when present, else line `1` — the frontmatter
3695/// block's opening `---`. A missing key has no line of its own; anchoring it to
3696/// the block top gives the agent (and the `EXPECTED` golden) a stable, non-null
3697/// line to point at instead of an unhelpful `null`.
3698fn fm_key_line_or_top(fm_yaml: &str, key: &str) -> Option<u32> {
3699    fm_key_line(fm_yaml, key).or(Some(1))
3700}
3701
3702/// A stable sort order for issues: by file, then line, then code. Keeps `--json`
3703/// output deterministic across runs.
3704fn issue_order(a: &Issue, b: &Issue) -> std::cmp::Ordering {
3705    a.file
3706        .cmp(&b.file)
3707        .then(a.line.cmp(&b.line))
3708        .then(a.code.cmp(b.code))
3709        .then(a.key.cmp(&b.key))
3710}
3711
3712// ═════════════════════════════════════════════════════════════════════════════
3713//  Tests
3714// ═════════════════════════════════════════════════════════════════════════════
3715
3716#[cfg(test)]
3717mod tests {
3718    use super::*;
3719    use crate::parser::{Config, FieldSpec};
3720    use std::fs;
3721    use tempfile::TempDir;
3722
3723    #[test]
3724    fn split_frontmatter_tolerates_leading_bom() {
3725        // Regression (finding #19 cross-module): a UTF-8 BOM before the opening
3726        // fence must not make validate treat the file as frontmatter-less while
3727        // the catalog indexes it. Pre-fix `first.trim_end() != "---"` was true
3728        // for `\u{feff}---` and the function returned None.
3729        let text = "\u{feff}---\ntype: contact\nsummary: hi\n---\nbody\n";
3730        let parsed = split_frontmatter(text);
3731        assert!(
3732            parsed.is_some(),
3733            "a leading BOM must not hide frontmatter from validate"
3734        );
3735        let (yaml, body, close_line) = parsed.unwrap();
3736        assert_eq!(yaml, "type: contact\nsummary: hi\n");
3737        assert_eq!(body, "body");
3738        assert_eq!(close_line, 4, "BOM is inline on line 1, not a new line");
3739    }
3740
3741    /// A test store builder over a real tempdir. Every helper writes real files
3742    /// so the assertions exercise real behavior, not mocks.
3743    struct Fixture {
3744        dir: TempDir,
3745        config: Config,
3746    }
3747
3748    impl Fixture {
3749        /// A fresh store with a **valid** `DB.md` (the identity contract:
3750        /// `type: db-md` + `scope` + `owner`) and the two layer dirs. A valid
3751        /// DB.md keeps `check_db_md` silent so a "clean store" fixture is truly
3752        /// clean; tests that want a broken DB.md write their own via `write`.
3753        fn new() -> Self {
3754            let dir = TempDir::new().unwrap();
3755            fs::write(
3756                dir.path().join("DB.md"),
3757                "---\ntype: db-md\nscope: company\nowner: Test\n---\n",
3758            )
3759            .unwrap();
3760            for layer in ["sources", "records"] {
3761                fs::create_dir_all(dir.path().join(layer)).unwrap();
3762            }
3763            Fixture {
3764                dir,
3765                config: Config::default(),
3766            }
3767        }
3768
3769        /// A store with no `DB.md` marker.
3770        fn bare() -> Self {
3771            let dir = TempDir::new().unwrap();
3772            Fixture {
3773                dir,
3774                config: Config::default(),
3775            }
3776        }
3777
3778        /// Write a file at a store-relative path, creating parent dirs.
3779        fn write(&self, rel: &str, contents: &str) {
3780            let abs = self.dir.path().join(rel);
3781            fs::create_dir_all(abs.parent().unwrap()).unwrap();
3782            fs::write(abs, contents).unwrap();
3783        }
3784
3785        fn store(&self) -> Store {
3786            Store {
3787                root: self.dir.path().to_path_buf(),
3788                config: self.config.clone(),
3789            }
3790        }
3791
3792        fn store_all(&self) -> Vec<Issue> {
3793            validate_all(&self.store()).unwrap()
3794        }
3795
3796        /// Write the canonical `index.md` + `index.jsonl` at every level via the
3797        /// real builder ([`crate::index::Index::rebuild_all`]) — the same
3798        /// projection a `dbmd index rebuild` produces. Use this (rather than a
3799        /// hand-typed sidecar line) whenever a test asserts a *clean* store, so
3800        /// the sidecar carries the COMPLETE per-field projection and the fixture
3801        /// can't silently drift from what the index writer emits.
3802        fn rebuild_indexes(&self) {
3803            crate::index::Index::rebuild_all(&self.store()).unwrap();
3804        }
3805    }
3806
3807    /// True if any issue has this code.
3808    fn has(issues: &[Issue], code: &str) -> bool {
3809        issues.iter().any(|i| i.code == code)
3810    }
3811
3812    /// Count issues with a code.
3813    fn count(issues: &[Issue], code: &str) -> usize {
3814        issues.iter().filter(|i| i.code == code).count()
3815    }
3816
3817    /// The first issue with a code, or panic.
3818    fn find<'a>(issues: &'a [Issue], code: &str) -> &'a Issue {
3819        issues
3820            .iter()
3821            .find(|i| i.code == code)
3822            .unwrap_or_else(|| panic!("expected an issue with code {code}; got {issues:#?}"))
3823    }
3824
3825    /// A minimal valid `contact` body for reuse.
3826    fn valid_contact(summary: &str) -> String {
3827        format!(
3828            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{summary}\"\nname: A\n---\n\n# A\n"
3829        )
3830    }
3831
3832    // ── store marker ──────────────────────────────────────────────────────────
3833
3834    #[test]
3835    fn not_a_store_when_db_md_absent() {
3836        let fx = Fixture::bare();
3837        let issues = fx.store_all();
3838        assert_eq!(issues.len(), 1, "only NOT_A_STORE expected: {issues:#?}");
3839        assert_eq!(issues[0].code, codes::NOT_A_STORE);
3840        assert!(issues[0].is_error());
3841    }
3842
3843    #[test]
3844    fn working_set_also_reports_not_a_store() {
3845        let fx = Fixture::bare();
3846        let issues = validate_working_set(&fx.store(), None).unwrap();
3847        assert!(has(&issues, codes::NOT_A_STORE));
3848    }
3849
3850    #[test]
3851    fn clean_store_has_no_issues() {
3852        let fx = Fixture::new();
3853        fx.write("records/contacts/a.md", &valid_contact("A contact"));
3854        // Build the canonical indexes (complete per-field jsonl included) the
3855        // same way `dbmd index rebuild` does, so a freshly-rebuilt store is
3856        // proven clean across every projected field, not just summary/type.
3857        fx.rebuild_indexes();
3858        let issues = fx.store_all();
3859        assert!(
3860            issues.is_empty(),
3861            "expected a clean store, got: {issues:#?}"
3862        );
3863    }
3864
3865    // ── meta-type closed enum ─────────────────────────────────────────────────
3866
3867    /// Regression (adversarial review): a NON-SCALAR `meta-type` (a YAML list or
3868    /// mapping) must be rejected with `FM_BAD_META_TYPE`, not silently slip past
3869    /// the enum check (and then get reclassified as the default `fact`). Pre-fix
3870    /// the check was gated on `and_then(scalar_string)`, which returned `None`
3871    /// for a sequence/mapping and short-circuited the whole branch.
3872    #[test]
3873    fn meta_type_enum_is_closed_for_scalars_and_non_scalars() {
3874        let fx = Fixture::new();
3875        let body = |mt: &str| {
3876            format!(
3877                "---\ntype: profile\nmeta-type: {mt}\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n"
3878            )
3879        };
3880
3881        // Valid enum members + absent (default fact) → no FM_BAD_META_TYPE.
3882        for ok in ["fact", "operational", "conclusion"] {
3883            fx.write("records/profiles/ok.md", &body(ok));
3884            let issues = validate_working_set(&fx.store(), None).unwrap();
3885            assert!(
3886                !has(&issues, codes::FM_BAD_META_TYPE),
3887                "`meta-type: {ok}` must be accepted; got {issues:#?}"
3888            );
3889        }
3890        fx.write(
3891            "records/profiles/absent.md",
3892            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n",
3893        );
3894        assert!(
3895            !has(
3896                &validate_working_set(&fx.store(), None).unwrap(),
3897                codes::FM_BAD_META_TYPE
3898            ),
3899            "an absent meta-type is the default `fact` and must be accepted"
3900        );
3901
3902        // Scalar-but-wrong, AND non-scalar (list / mapping) → FM_BAD_META_TYPE.
3903        for bad in ["xyz", "Fact", "[fact, conclusion]", "{kind: conclusion}"] {
3904            let fx2 = Fixture::new();
3905            fx2.write("records/profiles/bad.md", &body(bad));
3906            let issues = validate_working_set(&fx2.store(), None).unwrap();
3907            assert!(
3908                has(&issues, codes::FM_BAD_META_TYPE),
3909                "`meta-type: {bad}` must be rejected with FM_BAD_META_TYPE; got {issues:#?}"
3910            );
3911        }
3912    }
3913
3914    // ── DB.md structure ───────────────────────────────────────────────────────
3915
3916    /// The `Fixture::new` DB.md is valid → no `DB_MD_*` issue. This pins the
3917    /// "valid identity file is silent" half (a bug that flagged a valid DB.md
3918    /// would fail here).
3919    #[test]
3920    fn valid_db_md_emits_no_structure_issue() {
3921        let fx = Fixture::new();
3922        let issues = fx.store_all();
3923        assert!(
3924            !has(&issues, codes::DB_MD_BAD_TYPE)
3925                && !has(&issues, codes::DB_MD_MISSING_FIELD)
3926                && !has(&issues, codes::DB_MD_UNKNOWN_SECTION),
3927            "a valid DB.md (type: db-md + scope + owner, recognized sections) is silent: {issues:#?}"
3928        );
3929    }
3930
3931    /// A DB.md whose `type:` isn't `db-md` → `DB_MD_BAD_TYPE`, keyed on `type`,
3932    /// anchored to the `type:` line (file line 2). Failing to read the type, or
3933    /// accepting a non-`db-md` type, breaks this.
3934    #[test]
3935    fn db_md_wrong_type_is_error() {
3936        let fx = Fixture::new();
3937        fx.write("DB.md", "---\ntype: notes\nscope: company\nowner: T\n---\n");
3938        let issues = fx.store_all();
3939        let i = find(&issues, codes::DB_MD_BAD_TYPE);
3940        assert!(i.is_error());
3941        assert_eq!(i.file, PathBuf::from("DB.md"));
3942        assert_eq!(i.key.as_deref(), Some("type"));
3943        assert_eq!(i.line, Some(2), "anchors to the `type:` line");
3944    }
3945
3946    /// A DB.md missing `scope` and `owner` → one `DB_MD_MISSING_FIELD` per
3947    /// absent field, each keyed on its field name, anchored to the block top.
3948    #[test]
3949    fn db_md_missing_scope_and_owner_each_report() {
3950        let fx = Fixture::new();
3951        fx.write("DB.md", "---\ntype: db-md\n---\n");
3952        let issues = fx.store_all();
3953        assert_eq!(
3954            count(&issues, codes::DB_MD_MISSING_FIELD),
3955            2,
3956            "both scope and owner absent → two issues: {issues:#?}"
3957        );
3958        let keys: BTreeSet<Option<String>> = issues
3959            .iter()
3960            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
3961            .map(|i| i.key.clone())
3962            .collect();
3963        assert_eq!(
3964            keys,
3965            BTreeSet::from([Some("scope".to_string()), Some("owner".to_string())]),
3966            "one issue keyed on each missing field"
3967        );
3968        for i in issues
3969            .iter()
3970            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
3971        {
3972            assert!(i.is_error());
3973            assert_eq!(i.line, Some(1), "absent field anchors to the block top");
3974        }
3975    }
3976
3977    /// A present-but-blank required field is still missing (`DB_MD_MISSING_FIELD`),
3978    /// anchored to its own line — guarding against an "is the key textually
3979    /// present?" shortcut that would miss `owner:` with an empty value.
3980    #[test]
3981    fn db_md_blank_required_field_is_missing() {
3982        let fx = Fixture::new();
3983        fx.write(
3984            "DB.md",
3985            "---\ntype: db-md\nscope: company\nowner: \"\"\n---\n",
3986        );
3987        let issues = fx.store_all();
3988        let i = find(&issues, codes::DB_MD_MISSING_FIELD);
3989        assert_eq!(i.key.as_deref(), Some("owner"));
3990        assert_eq!(
3991            i.line,
3992            Some(4),
3993            "a present-but-empty field anchors to its line"
3994        );
3995        assert!(
3996            count(&issues, codes::DB_MD_MISSING_FIELD) == 1,
3997            "scope is present and non-empty → only owner reported"
3998        );
3999    }
4000
4001    /// An unrecognized `##` section → `DB_MD_UNKNOWN_SECTION` (warning), anchored
4002    /// to the heading's file line; the three recognized sections stay silent.
4003    #[test]
4004    fn db_md_unknown_section_is_warning() {
4005        let fx = Fixture::new();
4006        fx.write(
4007            "DB.md",
4008            // line 1 `---`, 2 type, 3 scope, 4 owner, 5 `---`, 6 blank,
4009            // 7 `## Agent instructions`, 8 blank, 9 prose, 10 blank,
4010            // 11 `## Glossary`.
4011            "---\ntype: db-md\nscope: company\nowner: T\n---\n\n## Agent instructions\n\nbe good\n\n## Glossary\n\nterms\n",
4012        );
4013        let issues = fx.store_all();
4014        let i = find(&issues, codes::DB_MD_UNKNOWN_SECTION);
4015        assert!(!i.is_error(), "unknown section is a warning, not an error");
4016        assert_eq!(i.severity, Severity::Warning);
4017        assert_eq!(
4018            i.line,
4019            Some(11),
4020            "anchors to the `## Glossary` heading line"
4021        );
4022        assert!(
4023            i.message.contains("Glossary"),
4024            "the message names the offending section: {}",
4025            i.message
4026        );
4027        // The recognized `## Agent instructions` section did NOT fire.
4028        assert_eq!(
4029            count(&issues, codes::DB_MD_UNKNOWN_SECTION),
4030            1,
4031            "only the unrecognized section is flagged: {issues:#?}"
4032        );
4033    }
4034
4035    /// A DB.md with no frontmatter at all → `DB_MD_BAD_TYPE` plus both
4036    /// `DB_MD_MISSING_FIELD`s (no provable type, no provable fields).
4037    #[test]
4038    fn db_md_no_frontmatter_reports_type_and_both_fields() {
4039        let fx = Fixture::new();
4040        fx.write("DB.md", "# just a heading, no frontmatter\n");
4041        let issues = fx.store_all();
4042        assert!(has(&issues, codes::DB_MD_BAD_TYPE));
4043        assert_eq!(count(&issues, codes::DB_MD_MISSING_FIELD), 2);
4044    }
4045
4046    // ── frontmatter ─────────────────────────────────────────────────────────
4047
4048    #[test]
4049    fn missing_type_is_error() {
4050        let fx = Fixture::new();
4051        fx.write(
4052            "records/contacts/a.md",
4053            "---\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\n# A\n",
4054        );
4055        let issues = fx.store_all();
4056        assert!(has(&issues, codes::FM_MISSING_TYPE));
4057        assert!(find(&issues, codes::FM_MISSING_TYPE).is_error());
4058    }
4059
4060    #[test]
4061    fn missing_universal_timestamps_are_errors_on_content_files() {
4062        let fx = Fixture::new();
4063        fx.write(
4064            "records/contacts/a.md",
4065            "---\ntype: contact\nsummary: x\nname: A\n---\n\n# A\n",
4066        );
4067        let issues = fx.store_all();
4068
4069        let missing_created = find(&issues, codes::FM_MISSING_CREATED);
4070        assert_eq!(missing_created.key.as_deref(), Some("created"));
4071        assert!(missing_created.is_error());
4072
4073        let missing_updated = find(&issues, codes::FM_MISSING_UPDATED);
4074        assert_eq!(missing_updated.key.as_deref(), Some("updated"));
4075        assert!(missing_updated.is_error());
4076    }
4077
4078    #[test]
4079    fn meta_files_do_not_require_universal_timestamps() {
4080        let fx = Fixture::new();
4081        let issues = fx.store_all();
4082
4083        assert!(
4084            !has(&issues, codes::FM_MISSING_CREATED),
4085            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
4086        );
4087        assert!(
4088            !has(&issues, codes::FM_MISSING_UPDATED),
4089            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
4090        );
4091    }
4092
4093    #[test]
4094    fn content_file_with_no_frontmatter_block_reports_type_and_summary() {
4095        let fx = Fixture::new();
4096        fx.write(
4097            "records/profiles/a.md",
4098            "# Just a heading\n\nNo frontmatter here.\n",
4099        );
4100        let issues = fx.store_all();
4101        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
4102        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4103    }
4104
4105    #[test]
4106    fn content_file_with_empty_frontmatter_reports_type_and_summary() {
4107        let fx = Fixture::new();
4108        fx.write("records/profiles/a.md", "---\n---\n\nbody\n");
4109        let issues = fx.store_all();
4110        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
4111        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4112    }
4113
4114    #[test]
4115    fn malformed_yaml_is_error_and_suppresses_field_checks() {
4116        let fx = Fixture::new();
4117        // A tab inside a mapping value is invalid YAML.
4118        fx.write(
4119            "records/contacts/a.md",
4120            "---\ntype: contact\n  bad: : : :\n: : nope\n---\n\nbody\n",
4121        );
4122        let issues = fx.store_all();
4123        let issue = find(&issues, codes::FM_MALFORMED_YAML);
4124        assert!(issue.is_error());
4125        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4126        // When YAML doesn't parse we don't *also* claim the summary is missing;
4127        // the agent fixes the YAML first.
4128        assert!(
4129            !has(&issues, codes::SUMMARY_MISSING),
4130            "malformed YAML should suppress SUMMARY_MISSING: {issues:#?}"
4131        );
4132    }
4133
4134    #[test]
4135    fn bad_created_timestamp_is_error() {
4136        let fx = Fixture::new();
4137        fx.write(
4138            "records/contacts/a.md",
4139            "---\ntype: contact\ncreated: not-a-date\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
4140        );
4141        let issues = fx.store_all();
4142        let issue = find(&issues, codes::FM_BAD_TIMESTAMP);
4143        assert_eq!(issue.key.as_deref(), Some("created"));
4144        assert!(issue.is_error());
4145    }
4146
4147    #[test]
4148    fn date_only_created_is_rejected_but_type_date_field_accepted() {
4149        let fx = Fixture::new();
4150        // `created` must be a full RFC3339 datetime → a date-only value is bad.
4151        // `last_touch` is a type-specific date field → date-only is fine.
4152        fx.write(
4153            "records/contacts/a.md",
4154            "---\ntype: contact\ncreated: 2026-05-22\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\nlast_touch: 2026-05-22\n---\n\n# A\n",
4155        );
4156        let issues = fx.store_all();
4157        let created_issues: Vec<_> = issues
4158            .iter()
4159            .filter(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created"))
4160            .collect();
4161        assert_eq!(
4162            created_issues.len(),
4163            1,
4164            "date-only `created` must fail: {issues:#?}"
4165        );
4166        assert!(
4167            !issues.iter().any(
4168                |i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("last_touch")
4169            ),
4170            "date-only `last_touch` is valid: {issues:#?}"
4171        );
4172    }
4173
4174    // ── summary ─────────────────────────────────────────────────────────────
4175
4176    #[test]
4177    fn summary_missing_empty_multiline_toolong() {
4178        let fx = Fixture::new();
4179        fx.write(
4180            "records/profiles/missing.md",
4181            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\n---\n\nbody\n",
4182        );
4183        fx.write(
4184            "records/profiles/empty.md",
4185            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"   \"\n---\n\nbody\n",
4186        );
4187        let long = "x".repeat(201);
4188        fx.write(
4189            "records/profiles/long.md",
4190            &format!("---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{long}\"\n---\n\nbody\n"),
4191        );
4192        let issues = fx.store_all();
4193        assert!(has(&issues, codes::SUMMARY_MISSING));
4194        assert_eq!(
4195            find(&issues, codes::SUMMARY_MISSING).file,
4196            PathBuf::from("records/profiles/missing.md")
4197        );
4198        assert!(has(&issues, codes::SUMMARY_EMPTY));
4199        assert!(has(&issues, codes::SUMMARY_TOO_LONG));
4200        assert_eq!(
4201            find(&issues, codes::SUMMARY_TOO_LONG).severity,
4202            Severity::Warning
4203        );
4204    }
4205
4206    #[test]
4207    fn summary_multiline_via_yaml_block_scalar() {
4208        let fx = Fixture::new();
4209        // A literal block scalar produces a value with a newline.
4210        fx.write(
4211            "records/profiles/a.md",
4212            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: |\n  line one\n  line two\n---\n\nbody\n",
4213        );
4214        let issues = fx.store_all();
4215        assert!(has(&issues, codes::SUMMARY_MULTILINE), "{issues:#?}");
4216    }
4217
4218    #[test]
4219    fn summary_exactly_200_chars_is_ok() {
4220        let fx = Fixture::new();
4221        let s = "y".repeat(200);
4222        fx.write(
4223            "records/profiles/a.md",
4224            &format!("---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{s}\"\n---\n\nbody\n"),
4225        );
4226        let issues = fx.store_all();
4227        assert!(
4228            !has(&issues, codes::SUMMARY_TOO_LONG),
4229            "200 is the bound, inclusive: {issues:#?}"
4230        );
4231    }
4232
4233    #[test]
4234    fn meta_files_need_no_summary() {
4235        let fx = Fixture::new();
4236        // The root/layer/type indexes + log carry no summary and must not be
4237        // flagged. (A lone DB.md store with one contact and full indexes.)
4238        fx.write("records/contacts/a.md", &valid_contact("A contact"));
4239        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n# I\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4240        fx.write(
4241            "records/index.md",
4242            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4243        );
4244        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — A contact\n");
4245        fx.write(
4246            "records/contacts/index.jsonl",
4247            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"A contact\"}\n",
4248        );
4249        fx.write("log.md", "---\ntype: log\n---\n\n# Log\n");
4250        let issues = fx.store_all();
4251        assert!(!has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4252    }
4253
4254    // ── tags ────────────────────────────────────────────────────────────────
4255
4256    #[test]
4257    fn nested_tags_warns_flat_tags_ok() {
4258        let fx = Fixture::new();
4259        fx.write(
4260            "records/contacts/nested.md",
4261            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags:\n  - good\n  - [nested, list]\n---\n\n# A\n",
4262        );
4263        fx.write(
4264            "records/contacts/flat.md",
4265            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags: [customer, vip]\n---\n\n# A\n",
4266        );
4267        let issues = fx.store_all();
4268        let tag_issues: Vec<_> = issues
4269            .iter()
4270            .filter(|i| i.code == codes::TAGS_MALFORMED)
4271            .collect();
4272        assert_eq!(
4273            tag_issues.len(),
4274            1,
4275            "only the nested-tags file should warn: {issues:#?}"
4276        );
4277        assert_eq!(
4278            tag_issues[0].file,
4279            PathBuf::from("records/contacts/nested.md")
4280        );
4281        assert_eq!(tag_issues[0].severity, Severity::Warning);
4282    }
4283
4284    // ── wiki-links ────────────────────────────────────────────────────────────
4285
4286    #[test]
4287    fn short_form_wiki_link_is_error() {
4288        let fx = Fixture::new();
4289        let mut body = valid_contact("links to a short form");
4290        body.push_str("\nSee [[sarah-chen]] for details.\n");
4291        fx.write("records/contacts/a.md", &body);
4292        let issues = fx.store_all();
4293        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
4294        assert!(issue.is_error());
4295        assert!(issue.message.contains("sarah-chen"));
4296        // A short-form link must NOT also be reported broken — fix the form first.
4297        assert!(
4298            !issues
4299                .iter()
4300                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.message.contains("sarah-chen")),
4301            "short-form should suppress broken: {issues:#?}"
4302        );
4303    }
4304
4305    #[test]
4306    fn broken_full_path_wiki_link_is_error() {
4307        let fx = Fixture::new();
4308        let mut body = valid_contact("links to a missing file");
4309        body.push_str("\nSee [[records/contacts/ghost]].\n");
4310        fx.write("records/contacts/a.md", &body);
4311        let issues = fx.store_all();
4312        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
4313        assert!(issue.is_error());
4314        assert!(issue.message.contains("records/contacts/ghost"));
4315        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4316    }
4317
4318    #[test]
4319    fn traversal_full_path_wiki_link_is_rejected_before_probe() {
4320        let fx = Fixture::new();
4321        let mut body = valid_contact("links with traversal");
4322        body.push_str("\nSee [[records/contacts/../../ghost]].\n");
4323        fx.write("records/contacts/a.md", &body);
4324        let issues = fx.store_all();
4325        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
4326        assert!(issue.message.contains("not a safe store-relative path"));
4327        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4328    }
4329
4330    #[test]
4331    fn valid_full_path_wiki_link_passes() {
4332        let fx = Fixture::new();
4333        fx.write("records/contacts/target.md", &valid_contact("target"));
4334        let mut body = valid_contact("links to target");
4335        body.push_str("\nSee [[records/contacts/target]].\n");
4336        fx.write("records/contacts/a.md", &body);
4337        let issues = fx.store_all();
4338        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4339        assert!(!has(&issues, codes::WIKI_LINK_SHORT_FORM), "{issues:#?}");
4340    }
4341
4342    #[test]
4343    fn md_extension_wiki_link_warns_and_resolves() {
4344        let fx = Fixture::new();
4345        fx.write("records/contacts/target.md", &valid_contact("target"));
4346        let mut body = valid_contact("links with extension");
4347        body.push_str("\nSee [[records/contacts/target.md]].\n");
4348        fx.write("records/contacts/a.md", &body);
4349        let issues = fx.store_all();
4350        let issue = find(&issues, codes::WIKI_LINK_HAS_EXTENSION);
4351        assert_eq!(issue.severity, Severity::Warning);
4352        assert_eq!(
4353            issue.suggestion.as_deref(),
4354            Some("drop the extension: [[records/contacts/target]]")
4355        );
4356        // The target exists once `.md` is stripped → not broken.
4357        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4358    }
4359
4360    #[test]
4361    fn wiki_links_in_code_fences_are_ignored() {
4362        let fx = Fixture::new();
4363        let mut body = valid_contact("has a fenced example");
4364        body.push_str("\n```\n[[sarah-chen]]\n```\n");
4365        fx.write("records/contacts/a.md", &body);
4366        let issues = fx.store_all();
4367        assert!(
4368            !has(&issues, codes::WIKI_LINK_SHORT_FORM),
4369            "fenced wiki-links must be ignored: {issues:#?}"
4370        );
4371    }
4372
4373    #[test]
4374    fn flow_form_link_list_in_frontmatter_is_error() {
4375        let fx = Fixture::new();
4376        fx.write(
4377            "records/meetings/m.md",
4378            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees: [[[records/contacts/a]], [[records/contacts/b]]]\n---\n\n# M\n",
4379        );
4380        let issues = fx.store_all();
4381        let issue = find(&issues, codes::WIKI_LINK_FLOW_FORM_LIST);
4382        assert!(issue.is_error());
4383        assert_eq!(issue.key.as_deref(), Some("attendees"));
4384    }
4385
4386    #[test]
4387    fn block_form_link_list_in_frontmatter_is_not_flow_form() {
4388        let fx = Fixture::new();
4389        fx.write("records/contacts/a.md", &valid_contact("a"));
4390        fx.write("records/contacts/b.md", &valid_contact("b"));
4391        fx.write(
4392            "records/meetings/m.md",
4393            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees:\n  - [[records/contacts/a]]\n  - [[records/contacts/b]]\n---\n\n# M\n",
4394        );
4395        let issues = fx.store_all();
4396        assert!(
4397            !has(&issues, codes::WIKI_LINK_FLOW_FORM_LIST),
4398            "{issues:#?}"
4399        );
4400        // Block-form link targets are still integrity-checked (both exist here).
4401        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4402    }
4403
4404    #[test]
4405    fn frontmatter_short_form_link_field_is_error() {
4406        let fx = Fixture::new();
4407        // `related` is a *custom* (non-schema) wiki-link field, so it goes
4408        // through the generic doctrine path → a short form is WIKI_LINK_SHORT_FORM.
4409        fx.write(
4410            "records/synthesis/a.md",
4411            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: \"[[sarah-chen]]\"\n---\n\n# A\n",
4412        );
4413        let issues = fx.store_all();
4414        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
4415        assert!(issue.is_error());
4416        assert_eq!(issue.key.as_deref(), Some("related"));
4417    }
4418
4419    #[test]
4420    fn unquoted_frontmatter_link_is_recognized() {
4421        // An UNQUOTED `[[...]]` parses in YAML as a nested sequence, not a
4422        // string. The validator must still see it as a wiki-link (text-based
4423        // extraction). A short-form custom field must report SHORT_FORM, and a
4424        // full-path one with a missing target must report BROKEN.
4425        let fx = Fixture::new();
4426        fx.write(
4427            "records/synthesis/short.md",
4428            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[sarah-chen]]\n---\n\n# A\n",
4429        );
4430        fx.write(
4431            "records/synthesis/broken.md",
4432            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[records/contacts/ghost]]\n---\n\n# A\n",
4433        );
4434        let issues = fx.store_all();
4435        assert!(
4436            issues.iter().any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
4437                && i.file == Path::new("records/synthesis/short.md")
4438                && i.key.as_deref() == Some("related")),
4439            "unquoted short-form frontmatter link must be caught: {issues:#?}"
4440        );
4441        assert!(
4442            issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN
4443                && i.file == Path::new("records/synthesis/broken.md")),
4444            "unquoted full-path frontmatter link to a missing file must be caught: {issues:#?}"
4445        );
4446    }
4447
4448    #[test]
4449    fn short_form_in_declared_link_field_is_prefix_mismatch_not_double_reported() {
4450        // A short-form value in a *declared* link field (a `### contact` schema
4451        // with `company link to records/companies/`) is SCHEMA_LINK_PREFIX_MISMATCH
4452        // (the target isn't under the prefix), and must NOT also be reported as a
4453        // bare WIKI_LINK_SHORT_FORM — the schema path owns that field once.
4454        let mut fx = Fixture::new();
4455        fx.config.schemas.insert(
4456            "contact".into(),
4457            Schema {
4458                fields: vec![FieldSpec {
4459                    name: "company".into(),
4460                    link_prefix: Some(PathBuf::from("records/companies")),
4461                    ..Default::default()
4462                }],
4463                ..Default::default()
4464            },
4465        );
4466        fx.write(
4467            "records/contacts/a.md",
4468            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[northstar]]\"\n---\n\n# A\n",
4469        );
4470        let issues = fx.store_all();
4471        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
4472        assert_eq!(issue.key.as_deref(), Some("company"));
4473        // The same link must NOT also be double-reported via the generic path.
4474        assert!(
4475            !issues
4476                .iter()
4477                .any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
4478                    && i.key.as_deref() == Some("company")),
4479            "schema link fields are checked once, by the schema path: {issues:#?}"
4480        );
4481    }
4482
4483    #[test]
4484    fn schema_link_field_with_md_extension_still_warns() {
4485        let mut fx = Fixture::new();
4486        fx.config.schemas.insert(
4487            "contact".into(),
4488            Schema {
4489                fields: vec![FieldSpec {
4490                    name: "company".into(),
4491                    link_prefix: Some(PathBuf::from("records/companies")),
4492                    ..Default::default()
4493                }],
4494                ..Default::default()
4495            },
4496        );
4497        fx.write(
4498            "records/companies/acme.md",
4499            "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: Acme\nname: Acme\n---\n\n# Acme\n",
4500        );
4501        fx.write(
4502            "records/contacts/a.md",
4503            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[records/companies/acme.md]]\"\n---\n\n# A\n",
4504        );
4505        let issues = fx.store_all();
4506        let issue = issues
4507            .iter()
4508            .find(|i| {
4509                i.code == codes::WIKI_LINK_HAS_EXTENSION && i.key.as_deref() == Some("company")
4510            })
4511            .unwrap_or_else(|| panic!("schema link extension warning missing: {issues:#?}"));
4512        assert_eq!(issue.severity, Severity::Warning);
4513        assert!(
4514            !issues
4515                .iter()
4516                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.key.as_deref() == Some("company")),
4517            "extensionless existence check should still find acme.md: {issues:#?}"
4518        );
4519    }
4520
4521    // ── schema: explicit DB.md schema (required / shape / enum) ───────────────
4522
4523    #[test]
4524    fn explicit_schema_required_shape_enum() {
4525        let fx = {
4526            let mut fx = Fixture::new();
4527            // contact schema: name required, email required+email shape,
4528            // status enum: active|inactive
4529            let schema = Schema {
4530                fields: vec![
4531                    FieldSpec {
4532                        name: "name".into(),
4533                        required: true,
4534                        ..Default::default()
4535                    },
4536                    FieldSpec {
4537                        name: "email".into(),
4538                        required: true,
4539                        shape: Some(Shape::Email),
4540                        ..Default::default()
4541                    },
4542                    FieldSpec {
4543                        name: "status".into(),
4544                        enum_values: Some(vec!["active".into(), "inactive".into()]),
4545                        ..Default::default()
4546                    },
4547                ],
4548                ..Default::default()
4549            };
4550            fx.config.schemas.insert("contact".into(), schema);
4551            fx
4552        };
4553        fx.write(
4554            "records/contacts/a.md",
4555            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nemail: not-an-email\nstatus: archived\n---\n\n# A\n",
4556        );
4557        let issues = fx.store_all();
4558        // name absent → MISSING_REQUIRED
4559        assert!(
4560            issues
4561                .iter()
4562                .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
4563                    && i.key.as_deref() == Some("name")),
4564            "{issues:#?}"
4565        );
4566        // email malformed → SHAPE_MISMATCH
4567        assert!(
4568            issues.iter().any(
4569                |i| i.code == codes::SCHEMA_SHAPE_MISMATCH && i.key.as_deref() == Some("email")
4570            ),
4571            "{issues:#?}"
4572        );
4573        // status archived not in enum → ENUM_VIOLATION
4574        assert!(
4575            issues
4576                .iter()
4577                .any(|i| i.code == codes::SCHEMA_ENUM_VIOLATION
4578                    && i.key.as_deref() == Some("status")),
4579            "{issues:#?}"
4580        );
4581    }
4582
4583    #[test]
4584    fn schema_without_link_field_allows_plain_value() {
4585        // A `contact` schema with no `company` link field means a plain `company`
4586        // string is fine — schema enforcement is exactly what the store declares,
4587        // nothing implicit.
4588        let mut fx = Fixture::new();
4589        fx.config.schemas.insert(
4590            "contact".into(),
4591            Schema {
4592                fields: vec![FieldSpec {
4593                    name: "name".into(),
4594                    required: true,
4595                    ..Default::default()
4596                }],
4597                ..Default::default()
4598            },
4599        );
4600        fx.write(
4601            "records/contacts/a.md",
4602            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
4603        );
4604        let issues = fx.store_all();
4605        assert!(
4606            !has(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH),
4607            "no declared link field for `company` → a plain value is fine: {issues:#?}"
4608        );
4609    }
4610
4611    #[test]
4612    fn schema_link_field_plain_value_is_prefix_mismatch() {
4613        // The surviving link-enforcement path: a declared `link to <prefix>/`
4614        // field with a plain-string value is SCHEMA_LINK_PREFIX_MISMATCH.
4615        let mut fx = Fixture::new();
4616        fx.config.schemas.insert(
4617            "contact".into(),
4618            Schema {
4619                fields: vec![FieldSpec {
4620                    name: "company".into(),
4621                    link_prefix: Some(PathBuf::from("records/companies")),
4622                    ..Default::default()
4623                }],
4624                ..Default::default()
4625            },
4626        );
4627        fx.write(
4628            "records/contacts/a.md",
4629            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
4630        );
4631        let issues = fx.store_all();
4632        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
4633        assert_eq!(issue.key.as_deref(), Some("company"));
4634        assert!(issue
4635            .suggestion
4636            .as_deref()
4637            .unwrap()
4638            .contains("records/companies/"));
4639    }
4640
4641    #[test]
4642    fn schema_shape_int_and_url_and_currency() {
4643        let mut fx = Fixture::new();
4644        fx.config.schemas.insert(
4645            "widget".into(),
4646            Schema {
4647                fields: vec![
4648                    FieldSpec {
4649                        name: "qty".into(),
4650                        shape: Some(Shape::Int),
4651                        ..Default::default()
4652                    },
4653                    FieldSpec {
4654                        name: "site".into(),
4655                        shape: Some(Shape::Url),
4656                        ..Default::default()
4657                    },
4658                    FieldSpec {
4659                        name: "price".into(),
4660                        shape: Some(Shape::Currency),
4661                        ..Default::default()
4662                    },
4663                ],
4664                ..Default::default()
4665            },
4666        );
4667        // `USD 100` is the corpus-realistic shape (an `expense.currency`-style
4668        // ISO code + amount). It must pass — it used to spuriously fail.
4669        fx.write(
4670            "records/widgets/ok.md",
4671            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nqty: 5\nsite: https://example.com\nprice: \"USD 1,234.50\"\n---\n\n# ok\n",
4672        );
4673        // `free` is non-numeric; `inf`/`NaN`/3-decimal used to slip through
4674        // because the old impl leaned on `f64::parse`. `price: inf` here guards
4675        // the under-rejection half of the finding.
4676        fx.write(
4677            "records/widgets/bad.md",
4678            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nqty: five\nsite: ftp://nope\nprice: inf\n---\n\n# bad\n",
4679        );
4680        let issues = fx.store_all();
4681        let bad_shape: Vec<_> = issues
4682            .iter()
4683            .filter(|i| {
4684                i.code == codes::SCHEMA_SHAPE_MISMATCH
4685                    && i.file == Path::new("records/widgets/bad.md")
4686            })
4687            .map(|i| i.key.clone().unwrap_or_default())
4688            .collect();
4689        assert!(bad_shape.contains(&"qty".to_string()), "{issues:#?}");
4690        assert!(bad_shape.contains(&"site".to_string()), "{issues:#?}");
4691        assert!(
4692            bad_shape.contains(&"price".to_string()),
4693            "inf must be rejected as currency: {issues:#?}"
4694        );
4695        assert!(
4696            !issues.iter().any(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH
4697                && i.file == Path::new("records/widgets/ok.md")),
4698            "valid shapes (incl. `USD 1,234.50`) must not fire: {issues:#?}"
4699        );
4700    }
4701
4702    #[test]
4703    fn schema_shape_or_enum_field_with_non_scalar_value_is_shape_mismatch() {
4704        let mut fx = Fixture::new();
4705        fx.config.schemas.insert(
4706            "contact".into(),
4707            Schema {
4708                fields: vec![
4709                    FieldSpec {
4710                        name: "email".into(),
4711                        required: true,
4712                        shape: Some(Shape::Email),
4713                        ..Default::default()
4714                    },
4715                    FieldSpec {
4716                        name: "status".into(),
4717                        enum_values: Some(vec!["active".into(), "inactive".into()]),
4718                        ..Default::default()
4719                    },
4720                ],
4721                ..Default::default()
4722            },
4723        );
4724        // A required EMAIL field and an ENUM field, each holding a LIST. Both
4725        // used to slip through entirely (`scalar_string` → None → the shape and
4726        // enum bodies silently no-op); now they flag SCHEMA_SHAPE_MISMATCH.
4727        fx.write(
4728            "records/contacts/bad.md",
4729            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nemail:\n  - a@b.com\n  - c@d.com\nstatus:\n  - active\n---\n\n# bad\n",
4730        );
4731        let issues = fx.store_all();
4732        let mismatched: Vec<_> = issues
4733            .iter()
4734            .filter(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH)
4735            .map(|i| i.key.clone().unwrap_or_default())
4736            .collect();
4737        assert!(
4738            mismatched.contains(&"email".to_string()),
4739            "list-valued required email must flag: {issues:#?}"
4740        );
4741        assert!(
4742            mismatched.contains(&"status".to_string()),
4743            "list-valued enum must flag: {issues:#?}"
4744        );
4745    }
4746
4747    #[test]
4748    fn is_currency_accepts_codes_and_rejects_non_numeric() {
4749        // Symbols and 3-letter ISO codes both strip; plain numbers pass.
4750        for ok in [
4751            "100",
4752            "1234.56",
4753            "$1,234.50",
4754            "USD 100", // the finding's headline probe — used to be false
4755            "usd 100", // case-insensitive code
4756            "EUR 9.50",
4757            "£12",
4758            "¥1000",
4759            "-5.00", // signed amounts are real (refunds)
4760            "+5",
4761            "1,000,000",
4762        ] {
4763            assert!(is_currency(ok), "expected currency: {ok:?}");
4764        }
4765        // Non-numeric floats `f64::parse` would accept, and the > 2-decimal /
4766        // bare-code / exponent cases the docstring forbids.
4767        for bad in [
4768            "inf", "-inf", "infinity", "NaN", "nan",    // f64 accepts these; we must not
4769            "12.999", // 3 decimals
4770            "1.2345", // 4 decimals
4771            "USD",    // bare code, no amount
4772            "$",      // bare symbol
4773            "free", "", " ", "1e3",      // exponent form
4774            "1.",       // trailing dot, no fractional digits
4775            ".5",       // leading dot, no integer digits
4776            "1 000",    // space as separator is not a thousands separator
4777            "USDD 100", // 4-letter "code" must not strip
4778        ] {
4779            assert!(!is_currency(bad), "expected NOT currency: {bad:?}");
4780        }
4781    }
4782
4783    // ── policies ───────────────────────────────────────────────────────────
4784
4785    #[test]
4786    fn ignored_type_present_is_info() {
4787        let mut fx = Fixture::new();
4788        fx.config.ignored_types.push("temp".into());
4789        fx.write(
4790            "records/temps/x.md",
4791            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
4792        );
4793        let issues = fx.store_all();
4794        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_PRESENT);
4795        assert_eq!(issue.severity, Severity::Info);
4796        assert!(!issue.is_error());
4797        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4798    }
4799
4800    #[test]
4801    fn conclusion_record_derived_from_ignored_type_warns() {
4802        let mut fx = Fixture::new();
4803        fx.config.ignored_types.push("temp".into());
4804        fx.write(
4805            "records/temps/x.md",
4806            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
4807        );
4808        // The policy now gates on `meta-type: conclusion` (not the retired
4809        // `type: wiki-page`): a conclusion record that derives from an
4810        // ignored-type record warns.
4811        fx.write(
4812            "records/synthesis/t.md",
4813            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: derived\nderived_from: \"[[records/temps/x]]\"\n---\n\n# t\n",
4814        );
4815        let issues = fx.store_all();
4816        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_DERIVED);
4817        assert_eq!(issue.severity, Severity::Warning);
4818        assert_eq!(issue.key.as_deref(), Some("derived_from"));
4819        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4820    }
4821
4822    /// The shared `derived_from_ignored_type` entry point — the single
4823    /// policy-decision both `dbmd validate` (read) and `dbmd write` (write-time
4824    /// warning) now route through, so they cannot diverge. This pins its
4825    /// contract directly: the meta-type gate (now `meta-type: conclusion`, not
4826    /// the retired `type: wiki-page`), the empty-ignored-types gate, a positive
4827    /// match carrying the resolved target type, and a non-ignored target
4828    /// rejected.
4829    #[test]
4830    fn derived_from_ignored_type_is_the_shared_policy_decision() {
4831        let mut fx = Fixture::new();
4832        fx.config.ignored_types.push("secret".into());
4833        // An ignored-type record …
4834        fx.write(
4835            "records/secrets/s.md",
4836            "---\ntype: secret\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: hush\n---\n\n# s\n",
4837        );
4838        // … and a non-ignored record.
4839        fx.write(
4840            "records/contacts/c.md",
4841            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nname: C\n---\n\n# c\n",
4842        );
4843        let store = fx.store();
4844
4845        // Positive: a conclusion record deriving from the ignored-type record
4846        // matches, and the hit carries both the target (as written) and its
4847        // resolved type.
4848        let hit =
4849            derived_from_ignored_type(&store, "conclusion", std::iter::once("records/secrets/s"))
4850                .expect("conclusion → ignored-type record must match");
4851        assert_eq!(hit.target, "records/secrets/s");
4852        assert_eq!(hit.target_type, "secret");
4853
4854        // Meta-type gate: a non-`conclusion` meta-type never triggers, even with
4855        // the same ignored-type target.
4856        assert_eq!(
4857            derived_from_ignored_type(&store, "fact", std::iter::once("records/secrets/s")),
4858            None,
4859            "only conclusion derivation is policed"
4860        );
4861
4862        // Target gate: a conclusion deriving from a non-ignored record is fine.
4863        assert_eq!(
4864            derived_from_ignored_type(&store, "conclusion", std::iter::once("records/contacts/c")),
4865            None,
4866            "deriving from a non-ignored type is allowed"
4867        );
4868
4869        // First match wins across multiple targets (here the second is the hit).
4870        let hit = derived_from_ignored_type(
4871            &store,
4872            "conclusion",
4873            ["records/contacts/c", "records/secrets/s"],
4874        )
4875        .expect("a later ignored-type target must still be found");
4876        assert_eq!(hit.target, "records/secrets/s");
4877
4878        // Empty-policy gate: with no `### Ignored types`, nothing is policed.
4879        fx.config.ignored_types.clear();
4880        let store = fx.store();
4881        assert_eq!(
4882            derived_from_ignored_type(&store, "conclusion", std::iter::once("records/secrets/s")),
4883            None,
4884            "an empty ignored-types policy short-circuits"
4885        );
4886    }
4887
4888    // ── duplicates ───────────────────────────────────────────────────────────
4889
4890    #[test]
4891    fn dup_id_is_hard_error_with_related() {
4892        let fx = Fixture::new();
4893        fx.write(
4894            "records/contacts/a.md",
4895            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
4896        );
4897        fx.write(
4898            "records/contacts/b.md",
4899            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
4900        );
4901        let issues = fx.store_all();
4902        // Reporting rule #1: ONE issue per collision group, keyed on the
4903        // lexicographically smallest path (`a.md`), partner in `related`.
4904        assert_eq!(
4905            count(&issues, codes::DUP_ID),
4906            1,
4907            "one issue per group: {issues:#?}"
4908        );
4909        let a = issues.iter().find(|i| i.code == codes::DUP_ID).unwrap();
4910        assert_eq!(a.file, PathBuf::from("records/contacts/a.md"));
4911        assert!(a.is_error());
4912        assert_eq!(a.key.as_deref(), Some("id"));
4913        assert_eq!(
4914            a.line,
4915            Some(3),
4916            "anchors to the `id` line on the reported file"
4917        );
4918        assert_eq!(a.related, vec![PathBuf::from("records/contacts/b.md")]);
4919    }
4920
4921    #[test]
4922    fn dup_id_not_fired_in_working_set() {
4923        // DUP_* is an --all-only cross-file check; the working set must not run it.
4924        let fx = Fixture::new();
4925        fx.write(
4926            "records/contacts/a.md",
4927            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
4928        );
4929        fx.write(
4930            "records/contacts/b.md",
4931            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
4932        );
4933        // Log says both changed since epoch, so they're in the working set.
4934        fx.write(
4935            "log.md",
4936            "---\ntype: log\n---\n\n## [2026-05-22 10:00] create | records/contacts/a\nx\n\n## [2026-05-22 10:01] create | records/contacts/b\nx\n",
4937        );
4938        let issues = validate_working_set(&fx.store(), None).unwrap();
4939        assert!(
4940            !has(&issues, codes::DUP_ID),
4941            "DUP_ID is --all only: {issues:#?}"
4942        );
4943    }
4944
4945    #[test]
4946    fn dup_unique_key_single_field_is_warning() {
4947        let mut fx = Fixture::new();
4948        // contact declares `- unique: email`.
4949        fx.config.schemas.insert(
4950            "contact".into(),
4951            Schema {
4952                unique_keys: vec![vec!["email".into()]],
4953                ..Default::default()
4954            },
4955        );
4956        for (f, name) in [("a", "A"), ("b", "B")] {
4957            fx.write(
4958                &format!("records/contacts/{f}.md"),
4959                &format!("---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: s\nname: {name}\nemail: dup@x.com\n---\n\n# {name}\n"),
4960            );
4961        }
4962        let issues = fx.store_all();
4963        // One issue per group (rule #1), keyed on the smallest path, anchored to
4964        // the single `email` field.
4965        assert_eq!(count(&issues, codes::DUP_UNIQUE_KEY), 1);
4966        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
4967        assert_eq!(dup.severity, Severity::Warning);
4968        assert_eq!(dup.file, PathBuf::from("records/contacts/a.md"));
4969        assert_eq!(dup.key.as_deref(), Some("email"));
4970        assert_eq!(dup.related, vec![PathBuf::from("records/contacts/b.md")]);
4971    }
4972
4973    #[test]
4974    fn dup_unique_key_compound_and_clean_when_one_field_differs() {
4975        let mut fx = Fixture::new();
4976        // expense declares `- unique: date, amount, vendor` (a compound key).
4977        fx.config.schemas.insert(
4978            "expense".into(),
4979            Schema {
4980                unique_keys: vec![vec!["date".into(), "amount".into(), "vendor".into()]],
4981                ..Default::default()
4982            },
4983        );
4984        fx.write("records/companies/acme.md", "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: c\nname: Acme\n---\n# A\n");
4985        let exp = |f: &str, amount: &str| {
4986            format!(
4987            "---\ntype: expense\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: e\ndate: 2026-05-01\namount: {amount}\nvendor: \"[[records/companies/acme]]\"\n---\n\n# {f}\n"
4988        )
4989        };
4990        fx.write("records/expenses/e1.md", &exp("e1", "100"));
4991        fx.write("records/expenses/e2.md", &exp("e2", "100"));
4992        fx.write("records/expenses/e3.md", &exp("e3", "200")); // different amount
4993        let issues = fx.store_all();
4994        // One issue for the e1+e2 group (rule #1), keyed on the smallest path
4995        // (e1) with e2 in `related`; e3 differs on amount and never appears.
4996        assert_eq!(
4997            count(&issues, codes::DUP_UNIQUE_KEY),
4998            1,
4999            "only e1+e2 collide, one issue: {issues:#?}"
5000        );
5001        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
5002        assert_eq!(dup.file, PathBuf::from("records/expenses/e1.md"));
5003        assert_eq!(
5004            dup.line,
5005            Some(1),
5006            "compound-key collision anchors to line 1"
5007        );
5008        assert_eq!(dup.related, vec![PathBuf::from("records/expenses/e2.md")]);
5009        assert!(
5010            !issues.iter().any(|i| i.code == codes::DUP_UNIQUE_KEY
5011                && i.related.contains(&PathBuf::from("records/expenses/e3.md"))),
5012            "e3 differs on amount and must not collide: {issues:#?}"
5013        );
5014    }
5015
5016    #[test]
5017    fn dup_unique_key_list_field_is_order_independent() {
5018        let mut fx = Fixture::new();
5019        // meeting declares `- unique: date, attendees`; the list field is a set.
5020        fx.config.schemas.insert(
5021            "meeting".into(),
5022            Schema {
5023                unique_keys: vec![vec!["date".into(), "attendees".into()]],
5024                ..Default::default()
5025            },
5026        );
5027        fx.write("records/contacts/a.md", &valid_contact("a"));
5028        fx.write("records/contacts/b.md", &valid_contact("b"));
5029        let m = |f: &str, order: &str| {
5030            let attendees = if order == "ab" {
5031                "  - [[records/contacts/a]]\n  - [[records/contacts/b]]"
5032            } else {
5033                "  - [[records/contacts/b]]\n  - [[records/contacts/a]]"
5034            };
5035            format!(
5036                "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nattendees:\n{attendees}\n---\n\n# {f}\n"
5037            )
5038        };
5039        fx.write("records/meetings/m1.md", &m("m1", "ab"));
5040        fx.write("records/meetings/m2.md", &m("m2", "ba"));
5041        let issues = fx.store_all();
5042        // The attendee SET is order-independent, so m1 (ab) and m2 (ba) collide
5043        // → a single issue on the smaller path.
5044        assert_eq!(
5045            count(&issues, codes::DUP_UNIQUE_KEY),
5046            1,
5047            "same date + same attendee set (any order) collide as one issue: {issues:#?}"
5048        );
5049        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
5050        assert_eq!(dup.file, PathBuf::from("records/meetings/m1.md"));
5051        assert_eq!(dup.related, vec![PathBuf::from("records/meetings/m2.md")]);
5052    }
5053
5054    // ── indexes ───────────────────────────────────────────────────────────────
5055
5056    #[test]
5057    fn missing_indexes_at_all_three_levels() {
5058        let fx = Fixture::new();
5059        fx.write("records/contacts/a.md", &valid_contact("a"));
5060        let issues = fx.store_all();
5061        // root, layer (records), and type-folder (records/contacts) all missing.
5062        // The type-folder INDEX_MISSING is keyed on the FOLDER path (not its
5063        // would-be index.md), per the field convention `EXPECTED` pins.
5064        let missing_files: BTreeSet<PathBuf> = issues
5065            .iter()
5066            .filter(|i| i.code == codes::INDEX_MISSING)
5067            .map(|i| i.file.clone())
5068            .collect();
5069        assert!(
5070            missing_files.contains(&PathBuf::from("index.md")),
5071            "{issues:#?}"
5072        );
5073        assert!(
5074            missing_files.contains(&PathBuf::from("records/index.md")),
5075            "{issues:#?}"
5076        );
5077        assert!(
5078            missing_files.contains(&PathBuf::from("records/contacts")),
5079            "{issues:#?}"
5080        );
5081        // When the index.md is entirely absent we do NOT additionally fire
5082        // INDEX_JSONL_MISSING — one INDEX_MISSING covers the folder (rule #4).
5083        assert!(!has(&issues, codes::INDEX_JSONL_MISSING), "{issues:#?}");
5084    }
5085
5086    #[test]
5087    fn index_stale_entry_and_missing_entry() {
5088        let fx = Fixture::new();
5089        fx.write(
5090            "records/contacts/present.md",
5091            &valid_contact("present contact"),
5092        );
5093        // Indexes for the parents (root/layer) present so we isolate type-folder.
5094        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5095        fx.write(
5096            "records/index.md",
5097            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5098        );
5099        // Type-folder index lists a GHOST (stale) and omits `present` (missing).
5100        fx.write(
5101            "records/contacts/index.md",
5102            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/ghost]] — gone\n",
5103        );
5104        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/present.md\",\"type\":\"contact\",\"summary\":\"present contact\"}\n");
5105        let issues = fx.store_all();
5106        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
5107        assert!(stale.message.contains("ghost"));
5108        assert!(stale.is_error());
5109        let missing = find(&issues, codes::INDEX_MISSING_ENTRY);
5110        assert!(
5111            missing.message.contains("present.md"),
5112            "{}",
5113            missing.message
5114        );
5115    }
5116
5117    #[test]
5118    fn index_md_entry_with_traversal_path_is_stale_not_probe() {
5119        let fx = Fixture::new();
5120        fx.write("records/contacts/a.md", &valid_contact("a"));
5121        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5122        fx.write(
5123            "records/index.md",
5124            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5125        );
5126        fx.write(
5127            "records/contacts/index.md",
5128            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/../../ghost]] — unsafe\n",
5129        );
5130        fx.write(
5131            "records/contacts/index.jsonl",
5132            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5133        );
5134        let issues = fx.store_all();
5135        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
5136        assert!(stale.message.contains("not a safe store-relative path"));
5137    }
5138
5139    #[test]
5140    fn index_summary_mismatch() {
5141        let fx = Fixture::new();
5142        fx.write("records/contacts/a.md", &valid_contact("the real summary"));
5143        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5144        fx.write(
5145            "records/index.md",
5146            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5147        );
5148        fx.write(
5149            "records/contacts/index.md",
5150            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a STALE summary\n",
5151        );
5152        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"the real summary\"}\n");
5153        let issues = fx.store_all();
5154        let issue = find(&issues, codes::INDEX_SUMMARY_MISMATCH);
5155        assert!(issue.is_error());
5156        assert_eq!(issue.related, vec![PathBuf::from("records/contacts/a.md")]);
5157    }
5158
5159    #[test]
5160    fn index_summary_match_passes() {
5161        let fx = Fixture::new();
5162        fx.write("records/contacts/a.md", &valid_contact("matching summary"));
5163        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5164        fx.write(
5165            "records/index.md",
5166            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5167        );
5168        fx.write(
5169            "records/contacts/index.md",
5170            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — matching summary\n",
5171        );
5172        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"matching summary\"}\n");
5173        let issues = fx.store_all();
5174        assert!(!has(&issues, codes::INDEX_SUMMARY_MISMATCH), "{issues:#?}");
5175    }
5176
5177    #[test]
5178    fn index_entry_with_tag_suffix_matches_summary() {
5179        let fx = Fixture::new();
5180        fx.write("records/contacts/a.md", &valid_contact("clean summary"));
5181        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5182        fx.write(
5183            "records/index.md",
5184            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5185        );
5186        // Entry carries the renderer's `  ·  #tag` suffix (the EXACT double-spaced
5187        // delimiter `crate::index::format_md_entry` emits for a tagged file),
5188        // which must be stripped before comparing against the file's summary.
5189        fx.write(
5190            "records/contacts/index.md",
5191            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — clean summary  ·  #customer\n",
5192        );
5193        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"clean summary\"}\n");
5194        let issues = fx.store_all();
5195        assert!(
5196            !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
5197            "tag suffix should be stripped: {issues:#?}"
5198        );
5199    }
5200
5201    #[test]
5202    fn index_entry_single_spaced_middot_tail_is_part_of_summary() {
5203        // Regression (the finding): a tagless file whose `summary` legitimately
5204        // ends in a single-spaced ` · #word` tail round-trips through `index
5205        // rebuild` verbatim (the renderer appends NO `  ·  #tag` block, since the
5206        // file has no tags). The validator must NOT mistake that single-spaced
5207        // tail for the renderer's tag suffix, or it reports a spurious — and
5208        // unfixable — INDEX_SUMMARY_MISMATCH on a freshly rebuilt store.
5209        let fx = Fixture::new();
5210        fx.write(
5211            "records/contacts/a.md",
5212            &valid_contact("Standup notes · #standup"),
5213        );
5214        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5215        fx.write(
5216            "records/index.md",
5217            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5218        );
5219        fx.write(
5220            "records/contacts/index.md",
5221            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — Standup notes · #standup\n",
5222        );
5223        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"Standup notes · #standup\"}\n");
5224        let issues = fx.store_all();
5225        assert!(
5226            !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
5227            "a single-spaced middot tail is part of the summary, not a tag block: {issues:#?}"
5228        );
5229    }
5230
5231    #[test]
5232    fn index_jsonl_desync_missing_file_in_jsonl() {
5233        let fx = Fixture::new();
5234        fx.write("records/contacts/a.md", &valid_contact("a"));
5235        fx.write("records/contacts/b.md", &valid_contact("b"));
5236        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (2 files)\n");
5237        fx.write(
5238            "records/index.md",
5239            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5240        );
5241        fx.write(
5242            "records/contacts/index.md",
5243            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n- [[records/contacts/b]] — b\n",
5244        );
5245        // jsonl only lists `a` → `b` is a desync (the twin must be complete).
5246        fx.write(
5247            "records/contacts/index.jsonl",
5248            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5249        );
5250        let issues = fx.store_all();
5251        let desync = find(&issues, codes::INDEX_JSONL_DESYNC);
5252        assert!(desync.message.contains("b.md"), "{}", desync.message);
5253    }
5254
5255    #[test]
5256    fn index_jsonl_desync_record_points_at_missing_file() {
5257        let fx = Fixture::new();
5258        fx.write("records/contacts/a.md", &valid_contact("a"));
5259        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5260        fx.write(
5261            "records/index.md",
5262            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5263        );
5264        fx.write(
5265            "records/contacts/index.md",
5266            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
5267        );
5268        fx.write(
5269            "records/contacts/index.jsonl",
5270            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
5271        );
5272        let issues = fx.store_all();
5273        assert!(
5274            issues
5275                .iter()
5276                .any(|i| i.code == codes::INDEX_JSONL_DESYNC && i.message.contains("ghost.md")),
5277            "{issues:#?}"
5278        );
5279    }
5280
5281    #[test]
5282    fn index_jsonl_record_with_traversal_path_is_desync_not_probe() {
5283        let fx = Fixture::new();
5284        fx.write("records/contacts/a.md", &valid_contact("a"));
5285        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5286        fx.write(
5287            "records/index.md",
5288            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5289        );
5290        fx.write(
5291            "records/contacts/index.md",
5292            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
5293        );
5294        fx.write(
5295            "records/contacts/index.jsonl",
5296            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/../../ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
5297        );
5298        let issues = fx.store_all();
5299        assert!(
5300            issues.iter().any(|i| i.code == codes::INDEX_JSONL_DESYNC
5301                && i.message.contains("not a safe store-relative path")),
5302            "{issues:#?}"
5303        );
5304    }
5305
5306    #[test]
5307    fn index_jsonl_stale_summary() {
5308        let fx = Fixture::new();
5309        fx.write("records/contacts/a.md", &valid_contact("real summary"));
5310        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5311        fx.write(
5312            "records/index.md",
5313            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5314        );
5315        fx.write(
5316            "records/contacts/index.md",
5317            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — real summary\n",
5318        );
5319        // jsonl summary disagrees with the file frontmatter.
5320        fx.write(
5321            "records/contacts/index.jsonl",
5322            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"OUTDATED\"}\n",
5323        );
5324        let issues = fx.store_all();
5325        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5326        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
5327        assert!(stale.key.as_deref().unwrap().contains("summary"));
5328    }
5329
5330    /// The whole point of `INDEX_JSONL_STALE`: a sidecar field the query/search
5331    /// path actually reads (`email`, `domain`, the `(date,amount,vendor)` dedup
5332    /// tuple, `tags`, `updated`, `links`, `company` …) that disagrees with the
5333    /// `.md` is STALE — even when `summary` and `type` are perfectly correct.
5334    /// Pre-fix the validator only diffed summary+type, so a sidecar with a wrong
5335    /// `email` validated clean and answered `--where email=…` with a phantom
5336    /// value present in no file. This is the direct regression guard.
5337    #[test]
5338    fn index_jsonl_stale_queryable_field_email() {
5339        let fx = Fixture::new();
5340        let contact = "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"a contact\"\nname: A\nemail: real@correct.com\n---\n\n# A\n";
5341        fx.write("records/contacts/a.md", contact);
5342        // Start from the canonical, fully-correct sidecar set …
5343        fx.rebuild_indexes();
5344        let jsonl_path = fx.dir.path().join("records/contacts/index.jsonl");
5345        let good = fs::read_to_string(&jsonl_path).unwrap();
5346        // sanity: the canonical store is clean (no STALE on a fresh rebuild).
5347        assert!(
5348            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
5349            "freshly-rebuilt sidecar must not be stale"
5350        );
5351        // … then desync ONLY the email so it's the single differing field.
5352        assert!(
5353            good.contains("real@correct.com"),
5354            "sidecar projects email: {good}"
5355        );
5356        fx.write(
5357            "records/contacts/index.jsonl",
5358            &good.replace("real@correct.com", "STALE-WRONG@evil.com"),
5359        );
5360
5361        let issues = fx.store_all();
5362        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5363        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
5364        // The mismatch is reported precisely on `email`, and summary/type — which
5365        // still match — are NOT named.
5366        let key = stale.key.as_deref().unwrap();
5367        assert!(
5368            key.contains("email"),
5369            "expected `email` in stale key, got {key:?}"
5370        );
5371        assert!(!key.contains("summary"), "summary still matches: {key:?}");
5372        assert!(!key.contains("type"), "type still matches: {key:?}");
5373    }
5374
5375    /// Broaden the guard across the typed/list/timestamp projections at once:
5376    /// a wrong `tags`, `updated`, and a custom dedup field (`amount`) are each
5377    /// caught, with all three named in one issue.
5378    #[test]
5379    fn index_jsonl_stale_typed_and_list_fields() {
5380        let fx = Fixture::new();
5381        let expense = "---\ntype: expense\ncreated: 2026-05-20T08:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"office chairs\"\ntags: [furniture, q2]\namount: 1299\nvendor: Acme\ndate: 2026-05-20\n---\n\n# Expense\n";
5382        fx.write("records/expenses/e.md", expense);
5383        fx.rebuild_indexes();
5384        let jsonl_path = fx.dir.path().join("records/expenses/index.jsonl");
5385        let good = fs::read_to_string(&jsonl_path).unwrap();
5386        assert!(
5387            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
5388            "freshly-rebuilt sidecar must not be stale"
5389        );
5390        // Desync a list field (tags), a timestamp (updated), and a number (amount).
5391        let stale_line = good
5392            .replace("\"q2\"", "\"WRONG-TAG\"")
5393            .replace("2026-05-22T10:00:00-07:00", "2099-01-01T00:00:00-07:00")
5394            .replace("1299", "9999");
5395        fx.write("records/expenses/index.jsonl", &stale_line);
5396
5397        let issues = fx.store_all();
5398        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5399        let key = stale.key.as_deref().unwrap();
5400        for expected in ["amount", "tags", "updated"] {
5401            assert!(
5402                key.contains(expected),
5403                "expected `{expected}` in stale key, got {key:?}"
5404            );
5405        }
5406    }
5407
5408    #[test]
5409    fn index_orphan_in_noncanonical_folder() {
5410        let fx = Fixture::new();
5411        fx.write("records/contacts/a.md", &valid_contact("a"));
5412        // Build the canonical indexes so they aren't reported as orphans.
5413        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5414        fx.write(
5415            "records/index.md",
5416            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5417        );
5418        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
5419        fx.write(
5420            "records/contacts/index.jsonl",
5421            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5422        );
5423        // An index.md inside a sub-sub-folder (operator territory) is an orphan.
5424        fx.write(
5425            "records/contacts/subfolder/index.md",
5426            "---\ntype: index\nscope: type-folder\n---\n\n# stray\n",
5427        );
5428        let issues = fx.store_all();
5429        let orphan = find(&issues, codes::INDEX_ORPHAN);
5430        assert_eq!(orphan.severity, Severity::Warning);
5431        assert_eq!(
5432            orphan.file,
5433            PathBuf::from("records/contacts/subfolder/index.md")
5434        );
5435    }
5436
5437    #[test]
5438    fn index_wrong_scope() {
5439        let fx = Fixture::new();
5440        fx.write("records/contacts/a.md", &valid_contact("a"));
5441        // Root index declares the wrong scope.
5442        fx.write("index.md", "---\ntype: index\nscope: layer\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5443        fx.write(
5444            "records/index.md",
5445            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5446        );
5447        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
5448        fx.write(
5449            "records/contacts/index.jsonl",
5450            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5451        );
5452        let issues = fx.store_all();
5453        let issue = find(&issues, codes::INDEX_WRONG_SCOPE);
5454        assert_eq!(issue.severity, Severity::Warning);
5455        assert_eq!(issue.file, PathBuf::from("index.md"));
5456    }
5457
5458    #[test]
5459    fn capped_type_folder_index_does_not_flag_missing_entries() {
5460        // Over the 500-entry cap, omitted entries are expected, not an error.
5461        let fx = Fixture::new();
5462        for i in 0..501 {
5463            fx.write(
5464                &format!("records/contacts/c{i:04}.md"),
5465                &valid_contact(&format!("contact {i}")),
5466            );
5467        }
5468        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (501 files)\n");
5469        fx.write(
5470            "records/index.md",
5471            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5472        );
5473        // Type-folder index lists only ONE entry + a More footer.
5474        fx.write(
5475            "records/contacts/index.md",
5476            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/c0000]] — contact 0\n\n## More\n\nThis folder has 501 files.\n",
5477        );
5478        // jsonl must still be complete — write all 501 lines.
5479        let mut jsonl = String::new();
5480        for i in 0..501 {
5481            jsonl.push_str(&format!(
5482                "{{\"path\":\"records/contacts/c{i:04}.md\",\"type\":\"contact\",\"summary\":\"contact {i}\"}}\n"
5483            ));
5484        }
5485        fx.write("records/contacts/index.jsonl", &jsonl);
5486        let issues = fx.store_all();
5487        assert!(
5488            !has(&issues, codes::INDEX_MISSING_ENTRY),
5489            "over the cap, missing browse entries are expected: {issues:#?}"
5490        );
5491        // But the jsonl is complete → no desync.
5492        assert!(
5493            !has(&issues, codes::INDEX_JSONL_DESYNC),
5494            "{:#?}",
5495            issues
5496                .iter()
5497                .filter(|i| i.code == codes::INDEX_JSONL_DESYNC)
5498                .collect::<Vec<_>>()
5499        );
5500    }
5501
5502    // ── log ────────────────────────────────────────────────────────────────
5503
5504    #[test]
5505    fn log_bad_timestamp_unknown_kind_out_of_order() {
5506        let fx = Fixture::new();
5507        fx.write(
5508            "log.md",
5509            concat!(
5510                "---\ntype: log\n---\n\n# Log\n\n",
5511                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5512                "## [2026-05-27 09:00] update | records/contacts/b\nx\n\n", // out of order
5513                "## [2026-05-27 11:00] frobnicate | records/contacts/c\nx\n\n", // unknown kind
5514                "## [not-a-date] create | records/contacts/d\nx\n",         // bad timestamp
5515            ),
5516        );
5517        let issues = fx.store_all();
5518        assert!(has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
5519        assert_eq!(
5520            find(&issues, codes::LOG_OUT_OF_ORDER).severity,
5521            Severity::Warning
5522        );
5523        let unknown = find(&issues, codes::LOG_UNKNOWN_KIND);
5524        assert_eq!(unknown.severity, Severity::Warning);
5525        assert!(unknown.message.contains("frobnicate"));
5526        assert!(unknown
5527            .suggestion
5528            .as_deref()
5529            .is_some_and(|s| s.contains("create")));
5530        let bad = find(&issues, codes::LOG_BAD_TIMESTAMP);
5531        assert!(bad.is_error());
5532    }
5533
5534    #[test]
5535    fn log_validate_entry_without_object_is_well_formed() {
5536        let fx = Fixture::new();
5537        fx.write(
5538            "log.md",
5539            "---\ntype: log\n---\n\n## [2026-05-27 10:00] validate\nPASS\n",
5540        );
5541        let issues = fx.store_all();
5542        assert!(!has(&issues, codes::LOG_BAD_TIMESTAMP), "{issues:#?}");
5543        assert!(!has(&issues, codes::LOG_UNKNOWN_KIND), "{issues:#?}");
5544    }
5545
5546    #[test]
5547    fn log_in_order_is_clean() {
5548        let fx = Fixture::new();
5549        fx.write(
5550            "log.md",
5551            concat!(
5552                "---\ntype: log\n---\n\n",
5553                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5554                "## [2026-05-27 10:05] update | records/contacts/a\nx\n",
5555            ),
5556        );
5557        let issues = fx.store_all();
5558        assert!(!has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
5559    }
5560
5561    #[test]
5562    fn log_not_checked_in_working_set() {
5563        // log.md ordering is an --all-only check.
5564        let fx = Fixture::new();
5565        fx.write(
5566            "log.md",
5567            concat!(
5568                "---\ntype: log\n---\n\n",
5569                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5570                "## [2026-05-27 09:00] update | records/contacts/a\nx\n",
5571            ),
5572        );
5573        let issues = validate_working_set(&fx.store(), None).unwrap();
5574        assert!(
5575            !has(&issues, codes::LOG_OUT_OF_ORDER),
5576            "log ordering is --all only: {issues:#?}"
5577        );
5578    }
5579
5580    // ── working-set scoping ───────────────────────────────────────────────────
5581
5582    #[test]
5583    fn working_set_validates_only_changed_files() {
5584        let fx = Fixture::new();
5585        // `dirty` has a bad timestamp; `clean_but_unlogged` also does but is NOT
5586        // in the log → working set must skip it.
5587        fx.write(
5588            "records/contacts/dirty.md",
5589            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5590        );
5591        fx.write(
5592            "records/contacts/unlogged.md",
5593            "---\ntype: contact\ncreated: ALSO-BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5594        );
5595        fx.write(
5596            "log.md",
5597            "---\ntype: log\n---\n\n## [2026-05-22 10:00] update | records/contacts/dirty\nedited\n",
5598        );
5599        let issues = validate_working_set(&fx.store(), None).unwrap();
5600        assert!(
5601            issues.iter().any(|i| i.code == codes::FM_BAD_TIMESTAMP
5602                && i.file == Path::new("records/contacts/dirty.md")),
5603            "{issues:#?}"
5604        );
5605        assert!(
5606            !issues
5607                .iter()
5608                .any(|i| i.file == Path::new("records/contacts/unlogged.md")),
5609            "unlogged file must not be in the working set: {issues:#?}"
5610        );
5611    }
5612
5613    #[test]
5614    fn working_set_includes_incoming_linkers_to_changed_path() {
5615        let fx = Fixture::new();
5616        // `changed` was renamed/removed (logged). `linker` points at it with a
5617        // now-broken link and was NOT itself logged — but must be pulled in.
5618        fx.write(
5619            "records/profiles/linker.md",
5620            "---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: links to a removed page\n---\n\nSee [[records/contacts/changed]].\n",
5621        );
5622        // `changed.md` does NOT exist on disk (removed).
5623        fx.write(
5624            "log.md",
5625            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/changed\nremoved\n",
5626        );
5627        let issues = validate_working_set(&fx.store(), None).unwrap();
5628        assert!(
5629            issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN
5630                && i.file == Path::new("records/profiles/linker.md")),
5631            "incoming linker to a removed path must be validated: {issues:#?}"
5632        );
5633    }
5634
5635    #[test]
5636    fn working_set_respects_explicit_since_cutoff() {
5637        let fx = Fixture::new();
5638        fx.write(
5639            "records/contacts/old.md",
5640            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5641        );
5642        fx.write(
5643            "records/contacts/new.md",
5644            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5645        );
5646        fx.write(
5647            "log.md",
5648            concat!(
5649                "---\ntype: log\n---\n\n",
5650                "## [2026-05-20 10:00] update | records/contacts/old\nx\n\n",
5651                "## [2026-05-25 10:00] update | records/contacts/new\nx\n",
5652            ),
5653        );
5654        // Cutoff after `old` but before `new`.
5655        let since = DateTime::parse_from_rfc3339("2026-05-22T00:00:00+00:00").unwrap();
5656        let issues = validate_working_set(&fx.store(), Some(since)).unwrap();
5657        assert!(
5658            issues
5659                .iter()
5660                .any(|i| i.file == Path::new("records/contacts/new.md")),
5661            "{issues:#?}"
5662        );
5663        assert!(
5664            !issues
5665                .iter()
5666                .any(|i| i.file == Path::new("records/contacts/old.md")),
5667            "old change is before the cutoff: {issues:#?}"
5668        );
5669    }
5670
5671    #[test]
5672    fn working_set_default_since_is_last_validate_entry() {
5673        let fx = Fixture::new();
5674        // `before` changed before the last validate; `after` changed after.
5675        fx.write(
5676            "records/contacts/before.md",
5677            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5678        );
5679        fx.write(
5680            "records/contacts/after.md",
5681            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5682        );
5683        fx.write(
5684            "log.md",
5685            concat!(
5686                "---\ntype: log\n---\n\n",
5687                "## [2026-05-20 10:00] update | records/contacts/before\nx\n\n",
5688                "## [2026-05-21 10:00] validate\nPASS\n\n",
5689                "## [2026-05-22 10:00] update | records/contacts/after\nx\n",
5690            ),
5691        );
5692        let issues = validate_working_set(&fx.store(), None).unwrap();
5693        assert!(
5694            issues
5695                .iter()
5696                .any(|i| i.file == Path::new("records/contacts/after.md")),
5697            "{issues:#?}"
5698        );
5699        assert!(
5700            !issues
5701                .iter()
5702                .any(|i| i.file == Path::new("records/contacts/before.md")),
5703            "change before the last validate entry is outside the default window: {issues:#?}"
5704        );
5705    }
5706
5707    // ── ordering / determinism ────────────────────────────────────────────────
5708
5709    #[test]
5710    fn issues_are_sorted_by_file_then_line() {
5711        let fx = Fixture::new();
5712        fx.write("records/profiles/z.md", "---\ntype: profile\nmeta-type: conclusion\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
5713        fx.write("records/profiles/a.md", "---\ntype: profile\nmeta-type: conclusion\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
5714        let issues = fx.store_all();
5715        let files: Vec<&PathBuf> = issues.iter().map(|i| &i.file).collect();
5716        let mut sorted = files.clone();
5717        sorted.sort();
5718        assert_eq!(
5719            files, sorted,
5720            "issues must be emitted in a stable file order"
5721        );
5722    }
5723
5724    // ── boundaries: codes validate must NOT emit ──────────────────────────────
5725
5726    #[test]
5727    fn frozen_page_is_not_a_validate_error() {
5728        // POLICY_FROZEN_PAGE is a *write-time* refusal, never a validate finding.
5729        // A clean file listed in `### Frozen pages` must validate clean.
5730        let mut fx = Fixture::new();
5731        fx.config
5732            .frozen_pages
5733            .push(PathBuf::from("records/decisions/d.md"));
5734        fx.write(
5735            "records/decisions/d.md",
5736            "---\ntype: decision\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a finalized decision\n---\n\n# D\n",
5737        );
5738        let issues = fx.store_all();
5739        assert!(
5740            !has(&issues, codes::POLICY_FROZEN_PAGE),
5741            "frozen pages are enforced at write-time, not by validate: {issues:#?}"
5742        );
5743    }
5744
5745    #[test]
5746    fn wiki_link_ambiguous_is_never_emitted_under_full_path_doctrine() {
5747        // The full-path doctrine makes ambiguity impossible; the defensive code
5748        // must never fire on a normal store.
5749        let fx = Fixture::new();
5750        fx.write("records/contacts/sarah-chen.md", &valid_contact("sarah"));
5751        let mut body = valid_contact("links to sarah");
5752        body.push_str("\nSee [[records/contacts/sarah-chen]].\n");
5753        fx.write("records/contacts/p.md", &body);
5754        let issues = fx.store_all();
5755        assert!(!has(&issues, codes::WIKI_LINK_AMBIGUOUS), "{issues:#?}");
5756    }
5757
5758    // ── unknown-type / unknown-field passthrough ──────────────────────────────
5759
5760    #[test]
5761    fn unknown_type_passes_through() {
5762        // A custom type is ambient context: it has a `type`, so no
5763        // FM_MISSING_TYPE, and with no matching schema there are no schema
5764        // errors. Only the universal contract (summary, timestamps) applies.
5765        let fx = Fixture::new();
5766        fx.write(
5767            "records/proposals/x.md",
5768            "---\ntype: proposal\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a proposal\ncustom_field: anything\nbudget: 5000\n---\n\n# Proposal\n",
5769        );
5770        let issues = fx.store_all();
5771        assert!(!has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
5772        assert!(!has(&issues, codes::SCHEMA_MISSING_REQUIRED), "{issues:#?}");
5773        assert!(!has(&issues, codes::SCHEMA_SHAPE_MISMATCH), "{issues:#?}");
5774        // The unknown fields don't trip anything.
5775        assert!(
5776            !issues
5777                .iter()
5778                .any(|i| i.key.as_deref() == Some("custom_field")
5779                    || i.key.as_deref() == Some("budget")),
5780            "unknown fields are ambient context: {issues:#?}"
5781        );
5782    }
5783
5784    // ── find_links_to prefix-collision safety (working set) ───────────────────
5785
5786    #[test]
5787    fn incoming_linker_scan_does_not_prefix_match() {
5788        // A changed `records/contacts/sarah` must NOT pull in a file that only
5789        // links to `records/contacts/sarah-chen` (a longer path sharing a prefix).
5790        let fx = Fixture::new();
5791        fx.write(
5792            "records/profiles/only-sarah-chen.md",
5793            "---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
5794        );
5795        // The log says `records/contacts/sarah` (the shorter path) changed.
5796        fx.write(
5797            "log.md",
5798            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah\nremoved\n",
5799        );
5800        let issues = validate_working_set(&fx.store(), None).unwrap();
5801        assert!(
5802            !issues
5803                .iter()
5804                .any(|i| i.file == Path::new("records/profiles/only-sarah-chen.md")),
5805            "a prefix-sharing link must not pull a file into the working set: {issues:#?}"
5806        );
5807    }
5808
5809    #[test]
5810    fn incoming_linker_scan_pulls_in_catalog_index_md() {
5811        // CONTRACT: the working-set incoming-linker scan rides the embedded-
5812        // ripgrep `Store::find_links_to`, which scans EVERY `.md` (including
5813        // `index.md` catalogs) — NOT the walk-and-read over `walk_content_files`,
5814        // which excludes `index.md`. A type-folder `index.md` that lists a now-
5815        // deleted target must be pulled into the working set so its dangling
5816        // catalog entry is flagged `WIKI_LINK_BROKEN`. The old walk-and-read
5817        // implementation skipped `index.md` and let this broken link survive the
5818        // loop silently; this test fails if anyone reverts to that path.
5819        let fx = Fixture::new();
5820        // A catalog that still lists the deleted contact (a real, common stale
5821        // state after a `delete`). No other file references the target, so the
5822        // catalog is the ONLY incoming linker — if it isn't scanned, nothing is.
5823        fx.write(
5824            "records/contacts/index.md",
5825            "---\ntype: index\n---\n\n- [[records/contacts/sarah-chen]] — Sarah Chen\n",
5826        );
5827        // The log says `records/contacts/sarah-chen` was deleted.
5828        fx.write(
5829            "log.md",
5830            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n",
5831        );
5832        let issues = validate_working_set(&fx.store(), None).unwrap();
5833        assert!(
5834            issues
5835                .iter()
5836                .any(|i| i.file == Path::new("records/contacts/index.md")
5837                    && i.code == codes::WIKI_LINK_BROKEN),
5838            "the catalog `index.md` linking to the deleted target must be pulled \
5839             into the working set and flagged WIKI_LINK_BROKEN (proves the scan \
5840             uses embedded-ripgrep `Store::find_links_to`, not the index-skipping \
5841             walk-and-read): {issues:#?}"
5842        );
5843    }
5844
5845    #[test]
5846    fn incoming_linker_scan_covers_the_whole_changed_set_in_one_pass() {
5847        // CONTRACT (the O(changed × store) fix): the working-set scan finds
5848        // incoming linkers for EVERY changed object, and does so via the single
5849        // batch pass `Store::find_links_to_any` — not one full store read per
5850        // changed object. This test pins the behavior that makes the single-pass
5851        // correct: with two DISTINCT deleted targets, the linker to EACH is pulled
5852        // into the working set and flagged. A regression that scanned for only the
5853        // first/last changed object, or that dropped the batch union, would leave
5854        // one of the two broken links unreported and fail here.
5855        let fx = Fixture::new();
5856        // Linker A → deleted target #1 (in the body).
5857        fx.write(
5858            "records/profiles/refers-sarah.md",
5859            "---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
5860        );
5861        // Linker B → deleted target #2 (in a typed frontmatter field — an edge the
5862        // sidecar `links` projection would miss, which is why this must be a
5863        // content scan, not a sidecar read).
5864        fx.write(
5865            "records/meetings/2026/05/kickoff.md",
5866            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\ncompany: \"[[records/companies/acme]]\"\n---\n\n# Kickoff\n",
5867        );
5868        // The log says BOTH targets were deleted in this window.
5869        fx.write(
5870            "log.md",
5871            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n\n## [2026-05-22 10:05] delete | records/companies/acme\nremoved\n",
5872        );
5873
5874        let issues = validate_working_set(&fx.store(), None).unwrap();
5875        assert!(
5876            issues
5877                .iter()
5878                .any(|i| i.file == Path::new("records/profiles/refers-sarah.md")
5879                    && i.code == codes::WIKI_LINK_BROKEN),
5880            "linker to the FIRST deleted target must be pulled in and flagged: {issues:#?}"
5881        );
5882        assert!(
5883            issues.iter().any(
5884                |i| i.file == Path::new("records/meetings/2026/05/kickoff.md")
5885                    && i.code == codes::WIKI_LINK_BROKEN
5886            ),
5887            "linker to the SECOND deleted target (typed-field edge) must also be \
5888             pulled in and flagged — proves the scan covers the whole changed set, \
5889             not just one object: {issues:#?}"
5890        );
5891    }
5892
5893    #[test]
5894    fn frontmatter_block_sequence_links_each_get_their_own_line() {
5895        // Each block-sequence wiki-link reports on its own source line.
5896        let fx = Fixture::new();
5897        // Neither target exists → two WIKI_LINK_BROKEN, on different lines.
5898        fx.write(
5899            "records/meetings/m.md",
5900            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nparticipants:\n  - [[records/contacts/ghost1]]\n  - [[records/contacts/ghost2]]\n---\n\n# M\n",
5901        );
5902        let issues = fx.store_all();
5903        let broken_lines: BTreeSet<Option<u32>> = issues
5904            .iter()
5905            .filter(|i| i.code == codes::WIKI_LINK_BROKEN)
5906            .map(|i| i.line)
5907            .collect();
5908        assert_eq!(
5909            broken_lines.len(),
5910            2,
5911            "two distinct broken-link lines: {issues:#?}"
5912        );
5913    }
5914
5915    // ── Regression: null / non-scalar created/updated ────────────────────────
5916
5917    #[test]
5918    fn null_created_is_missing_not_silently_passed() {
5919        // Regression: a present-but-`null` `created:` previously slipped past
5920        // both FM_MISSING_CREATED (only `!contains_key` was checked) and
5921        // FM_BAD_TIMESTAMP (`scalar_string(null)` is None → branch no-oped).
5922        let fx = Fixture::new();
5923        fx.write(
5924            "records/contacts/a.md",
5925            "---\ntype: contact\ncreated:\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5926        );
5927        let issues = fx.store_all();
5928        assert!(
5929            has(&issues, codes::FM_MISSING_CREATED),
5930            "null `created:` must read as missing: {issues:#?}"
5931        );
5932    }
5933
5934    #[test]
5935    fn sequence_created_is_bad_timestamp() {
5936        // A non-scalar `created: [2026]` is not a timestamp string → FM_BAD_TIMESTAMP.
5937        let fx = Fixture::new();
5938        fx.write(
5939            "records/contacts/a.md",
5940            "---\ntype: contact\ncreated: [2026]\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5941        );
5942        let issues = fx.store_all();
5943        assert!(
5944            issues
5945                .iter()
5946                .any(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created")),
5947            "a sequence `created:` must be FM_BAD_TIMESTAMP: {issues:#?}"
5948        );
5949    }
5950
5951    // ── Regression: schema required null / empty-collection ──────────────────
5952
5953    #[test]
5954    fn required_field_null_or_empty_collection_is_missing() {
5955        // Regression: a plain required field (no shape/enum) holding YAML null
5956        // (`name:`), an empty list (`name: []`), or an empty mapping (`name: {}`)
5957        // previously validated with 0 issues — `scalar_string` returned None and
5958        // `.unwrap_or(false)` treated the value as non-empty.
5959        for value in ["", " []", " {}"] {
5960            let mut fx = Fixture::new();
5961            fx.config.schemas.insert(
5962                "contact".into(),
5963                Schema {
5964                    fields: vec![FieldSpec {
5965                        name: "name".into(),
5966                        required: true,
5967                        ..Default::default()
5968                    }],
5969                    ..Default::default()
5970                },
5971            );
5972            fx.write(
5973                "records/contacts/a.md",
5974                &format!(
5975                    "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname:{value}\n---\n\n# A\n"
5976                ),
5977            );
5978            let issues = fx.store_all();
5979            assert!(
5980                issues
5981                    .iter()
5982                    .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
5983                        && i.key.as_deref() == Some("name")),
5984                "required `name:{value}` must be SCHEMA_MISSING_REQUIRED: {issues:#?}"
5985            );
5986        }
5987    }
5988
5989    // ── Regression: WIKI_LINK_BROKEN on raw source files ─────────────────────
5990
5991    #[test]
5992    fn wiki_link_to_raw_source_file_resolves() {
5993        // Regression: a body link to a raw `.eml`/`.pdf` source kept verbatim
5994        // under `sources/` was flagged WIKI_LINK_BROKEN because the existence
5995        // probe only ever stat'd `{bare}.md`. It must resolve the literal path.
5996        let fx = Fixture::new();
5997        fx.write("sources/emails/2026-05-22-elena.eml", "raw email bytes\n");
5998        fx.write(
5999            "records/contacts/a.md",
6000            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\nSee [[sources/emails/2026-05-22-elena.eml]] for context.\n",
6001        );
6002        let issues = fx.store_all();
6003        assert!(
6004            !issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN),
6005            "a link to an existing raw source file must not be broken: {issues:#?}"
6006        );
6007    }
6008
6009    // ── Regression: unreadable (non-UTF-8) content file ──────────────────────
6010
6011    #[test]
6012    fn non_utf8_content_file_is_reported() {
6013        // Regression: a content file with invalid UTF-8 bytes made
6014        // check_content_file return None silently, so the store passed with exit
6015        // 0. It must surface FM_UNREADABLE instead of passing vacuously.
6016        let fx = Fixture::new();
6017        let abs = fx.dir.path().join("records/notes/corrupt.md");
6018        fs::create_dir_all(abs.parent().unwrap()).unwrap();
6019        fs::write(&abs, [0xFF, 0xFE, 0x00, 0x01]).unwrap();
6020        let issues = validate_working_set(&fx.store(), None).unwrap();
6021        assert!(
6022            has(&issues, codes::FM_UNREADABLE),
6023            "an unreadable content file must be reported, not silently skipped: {issues:#?}"
6024        );
6025    }
6026
6027    // ── Regression: code-fence char/run tracking ─────────────────────────────
6028
6029    #[test]
6030    fn tilde_fence_containing_backtick_fence_does_not_invert() {
6031        // Regression: a `~~~` block legally contains ``` lines (documenting a
6032        // backtick fence); a naive toggle inverted `in_fence` and checked the
6033        // demo `[[fake]]` inside the code block as a live link. The link inside
6034        // BOTH fences must be skipped.
6035        let body = "~~~markdown\n```\n[[fake-link]]\n```\n~~~\n";
6036        let links = extract_wiki_links(body);
6037        assert!(
6038            links.is_empty(),
6039            "wiki-link inside a nested code fence must be skipped: {links:?}"
6040        );
6041    }
6042
6043    // ── Regression: --all skips in-layer `log/` folder ───────────────────────
6044
6045    #[test]
6046    fn all_sweep_visits_in_layer_log_folder() {
6047        // Regression: `validate --all` pruned every dir named `log`, so a real
6048        // content folder like `records/log/` was invisible to the full sweep —
6049        // reporting FEWER errors than the default scope. A frontmatter-less file
6050        // there must still surface FM_MISSING_TYPE under --all.
6051        let fx = Fixture::new();
6052        fx.write("records/log/2026-06-01-pricing.md", "no frontmatter here\n");
6053        let issues = fx.store_all();
6054        assert!(
6055            has(&issues, codes::FM_MISSING_TYPE),
6056            "--all must validate files under an in-layer `log/` folder: {issues:#?}"
6057        );
6058    }
6059
6060    // ── Regression: flow-form list with whitespace ───────────────────────────
6061
6062    #[test]
6063    fn flow_form_link_list_with_spaces_is_flagged() {
6064        // Regression: `attendees: [ [[a]] ]` parses to the same nested-sequence
6065        // mis-encoding as `[[[a]]]` but evaded the literal `starts_with("[[[")`
6066        // text test. The value-based detector must catch the whitespace variant.
6067        let keys = detect_flow_form_link_lists("attendees: [ [[records/contacts/elena]] ]\n");
6068        assert!(
6069            keys.iter().any(|k| k == "attendees"),
6070            "spaced flow-form list must be detected: {keys:?}"
6071        );
6072    }
6073
6074    // ── Regression: INDEX_SUMMARY_MISMATCH middot tail ───────────────────────
6075
6076    #[test]
6077    fn middot_hashtag_summary_tail_round_trips() {
6078        // Regression: a tagless summary that legitimately ends in a single-spaced
6079        // ` · #word` tail round-trips through the renderer verbatim, but the loose
6080        // ` · ` strip mistook it for the tag block and reported a spurious,
6081        // unfixable INDEX_SUMMARY_MISMATCH. The strip must use the renderer's
6082        // exact double-spaced `  ·  ` delimiter.
6083        assert_eq!(
6084            extract_index_entry_summary("— Standup notes · #standup").as_deref(),
6085            Some("Standup notes · #standup"),
6086            "a single-spaced middot tail is part of the summary, not a tag block"
6087        );
6088        // The renderer's real double-spaced tag suffix IS still stripped.
6089        assert_eq!(
6090            extract_index_entry_summary("— Renewal champion  ·  #renewal #acme").as_deref(),
6091            Some("Renewal champion"),
6092            "the renderer's double-spaced `  ·  #tag` suffix is stripped"
6093        );
6094    }
6095
6096    // ── Regression: shape Url / Email edge cases ─────────────────────────────
6097
6098    #[test]
6099    fn url_shape_accepts_short_http_and_rejects_bare_scheme() {
6100        assert!(is_url("http://x"), "an 8-char http URL is valid");
6101        assert!(is_url("https://x"), "a 9-char https URL is valid");
6102        assert!(!is_url("http://"), "a bare scheme with no host is rejected");
6103        assert!(!is_url("https://"), "a bare https scheme is rejected");
6104    }
6105
6106    #[test]
6107    fn email_shape_rejects_double_at() {
6108        assert!(!is_email("sarah@@acme.com"), "double-@ domain is rejected");
6109        assert!(!is_email("a@b@c.com"), "two @ signs are rejected");
6110        assert!(is_email("sarah@acme.com"), "a normal address still passes");
6111    }
6112
6113    // ── Regression: working-set vs --all agree on log.md links ───────────────
6114
6115    #[test]
6116    fn working_set_does_not_flag_log_md_body_links() {
6117        // Regression: the working-set incoming-linker scan runs root `log.md`
6118        // through the body wiki-link check, flagging a historical `[[deleted]]`
6119        // mention as WIKI_LINK_BROKEN — an error `--all` never reports and that
6120        // the append-only log can't have "fixed". The root meta files must be
6121        // excluded from the body link check, matching --all.
6122        let fx = Fixture::new();
6123        fx.write("records/contacts/a.md", &valid_contact("A"));
6124        fx.write(
6125            "log.md",
6126            "---\ntype: log\n---\n\n## [2026-06-01 10:00] delete | records/contacts/ghost\n\nRemoved [[records/contacts/ghost]] per cleanup.\n",
6127        );
6128        let issues = validate_working_set(&fx.store(), None).unwrap();
6129        assert!(
6130            !issues
6131                .iter()
6132                .any(|i| i.code == codes::WIKI_LINK_BROKEN
6133                    && i.file == std::path::Path::new("log.md")),
6134            "a broken wiki-link inside append-only log.md must not be flagged: {issues:#?}"
6135        );
6136    }
6137
6138    // ── Regression: DB.md schema field lint ──────────────────────────────────
6139
6140    #[test]
6141    fn schema_duplicate_field_name_is_flagged() {
6142        let mut fx = Fixture::new();
6143        fx.config.schemas.insert(
6144            "contact".into(),
6145            Schema {
6146                fields: vec![
6147                    FieldSpec {
6148                        name: "name".into(),
6149                        required: true,
6150                        ..Default::default()
6151                    },
6152                    FieldSpec {
6153                        name: "name".into(),
6154                        ..Default::default()
6155                    },
6156                ],
6157                ..Default::default()
6158            },
6159        );
6160        let issues = fx.store_all();
6161        assert!(
6162            issues
6163                .iter()
6164                .any(|i| i.code == codes::DB_MD_SCHEMA_FIELD && i.key.as_deref() == Some("name")),
6165            "a duplicate schema field name must be flagged: {issues:#?}"
6166        );
6167    }
6168
6169    #[test]
6170    fn schema_unknown_modifier_is_info() {
6171        let mut fx = Fixture::new();
6172        fx.config.schemas.insert(
6173            "contact".into(),
6174            Schema {
6175                fields: vec![FieldSpec {
6176                    name: "name".into(),
6177                    unknown_modifiers: vec!["requierd".into()],
6178                    ..Default::default()
6179                }],
6180                ..Default::default()
6181            },
6182        );
6183        let issues = fx.store_all();
6184        assert!(
6185            issues.iter().any(|i| i.code == codes::DB_MD_SCHEMA_FIELD
6186                && i.severity == Severity::Info
6187                && i.key.as_deref() == Some("name")),
6188            "an unrecognized schema modifier must surface as Info: {issues:#?}"
6189        );
6190    }
6191
6192    /// Every code in `mod codes` must appear as a row in SPEC.md § Validation —
6193    /// the SPEC table is the declared "complete vocabulary" an agent branches on,
6194    /// and the module doc-comment promises this code implements "exactly those
6195    /// codes — no more, no fewer." This guards against the code/SPEC drift where a
6196    /// new validation code is added to the engine but never documented.
6197    #[test]
6198    fn every_code_constant_is_documented_in_spec() {
6199        // Parse the canonical constant *values* straight out of this module's
6200        // source, so a future `pub const X: &str = "X";` is covered with no test
6201        // edit. Format is uniform: `    pub const NAME: &str = "VALUE";`.
6202        let this_src = include_str!("validate.rs");
6203        let mut codes_in_module: Vec<String> = Vec::new();
6204        let mut in_codes_mod = false;
6205        for line in this_src.lines() {
6206            let t = line.trim();
6207            if t.starts_with("pub mod codes") {
6208                in_codes_mod = true;
6209                continue;
6210            }
6211            // The `mod codes` block ends at its closing brace at column 0.
6212            if in_codes_mod && line == "}" {
6213                break;
6214            }
6215            if in_codes_mod {
6216                if let Some(rest) = t.strip_prefix("pub const ") {
6217                    // rest = `NAME: &str = "VALUE";`
6218                    let value = rest
6219                        .split_once('=')
6220                        .map(|(_, v)| v.trim())
6221                        .and_then(|v| v.strip_prefix('"'))
6222                        .and_then(|v| v.strip_suffix("\";"))
6223                        .unwrap_or_else(|| panic!("unparseable code constant line: {line:?}"));
6224                    codes_in_module.push(value.to_string());
6225                }
6226            }
6227        }
6228        assert!(
6229            codes_in_module.len() >= 36,
6230            "parsed only {} code constants from `mod codes`; the parser likely \
6231             broke against a source-format change",
6232            codes_in_module.len()
6233        );
6234
6235        // SPEC.md lives at the repo root, two levels up from this crate's manifest.
6236        let spec_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../../SPEC.md");
6237        let spec = fs::read_to_string(&spec_path)
6238            .unwrap_or_else(|e| panic!("cannot read {}: {e}", spec_path.display()));
6239
6240        // Each code must appear as a SPEC § Validation table cell: `` | `CODE` | ``.
6241        let missing: Vec<&String> = codes_in_module
6242            .iter()
6243            .filter(|code| !spec.contains(&format!("| `{code}` |")))
6244            .collect();
6245        assert!(
6246            missing.is_empty(),
6247            "validation codes emitted by the engine but absent from SPEC.md \
6248             § Validation (the declared complete vocabulary): {missing:?}"
6249        );
6250    }
6251}