Skip to main content

dbmd_core/
validate.rs

1//! `validate` — the validation engine.
2//!
3//! The canonical issue-code vocabulary is **SPEC.md § Validation** (that table
4//! is the single source of truth). This module implements exactly those codes
5//! — no more, no fewer. If a code is added here it must be added to the SPEC
6//! table in the same change. The codes are exposed as the [`codes`] constants
7//! so call sites never spell a code as a bare string literal.
8//!
9//! **Two scopes.** [`validate_working_set`] is the loop default: content files
10//! changed since `since`, plus any file whose wiki-links target a changed path.
11//! The changed set and the per-file checks are O(changed); the incoming linkers
12//! are found by a *single* embedded-ripgrep pass over the store for the whole
13//! changed set at once ([`Store::find_links_to_any`], one scan — not a full read
14//! per changed object, and not the parse-the-tree walk `--all` does). On this
15//! changed-set path it never builds the global cross-file state.
16//!
17//! The **one** exception is the vacuous-pass guard: when the change log records
18//! no objects since the cutoff and no explicit `--since` was given (a fresh
19//! store, a missing/empty `log.md`, or external edits never logged), the default
20//! call falls back to a single per-file content sweep ([`Store::walk`]) so an
21//! externally edited or freshly copied store cannot pass validation vacuously.
22//! That fallback is O(store) by design; the O(changed) guarantee is about the
23//! normal post-write path, not this safety net.
24//!
25//! [`validate_all`] is the full SWEEP: it adds the checks that need the global
26//! cross-file state — entity-dedup `DUP_*`, every-index sync, and `log.md`
27//! ordering.
28//!
29//! ## Why this module is self-contained
30//!
31//! Validation does its own frontmatter split, YAML parse, wiki-link scan,
32//! log-header parse, and file walk here, reading only the two public,
33//! caller-populated fields of a [`Store`]: [`Store::root`] and
34//! [`Store::config`] — rather than routing through the sibling modules
35//! ([`crate::parser`], [`crate::store`], [`crate::log`], [`crate::index`]).
36//! Keeping the checks local lets the validator report precise, per-issue
37//! diagnostics (exact codes, file, and context) without coupling its output to
38//! incidental behavior of the shared readers; the public surface and the
39//! emitted issue vocabulary are the contract.
40
41use std::collections::{BTreeMap, BTreeSet, HashMap};
42use std::path::{Component, Path, PathBuf};
43
44use chrono::{DateTime, FixedOffset, NaiveDateTime};
45use serde_norway::Value;
46
47use crate::parser::{Schema, Shape};
48use crate::store::Store;
49
50/// Severity of a validation [`Issue`]. Any [`Severity::Error`] fails validation
51/// (non-zero exit); warnings and info do not.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum Severity {
54    /// Blocks: a hard violation of the format or doctrine.
55    Error,
56    /// A decision point the agent resolves at its discretion.
57    Warning,
58    /// Visibility only; never affects exit status.
59    Info,
60}
61
62/// A single structured validation finding. Agent-primary and machine-parseable
63/// via `--json`; `suggestion` is a deterministic remediation hint the agent
64/// applies without guessing.
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub struct Issue {
67    /// The severity; only [`Severity::Error`] fails validation.
68    pub severity: Severity,
69    /// The structured code, e.g. `"WIKI_LINK_SHORT_FORM"` — one of [`codes`].
70    pub code: &'static str,
71    /// The file the issue is about.
72    pub file: PathBuf,
73    /// The 1-based line, when applicable.
74    pub line: Option<u32>,
75    /// The frontmatter key, when the issue is about a specific field.
76    pub key: Option<String>,
77    /// A human-readable message.
78    pub message: String,
79    /// A deterministic remediation hint, when one exists.
80    pub suggestion: Option<String>,
81    /// Other files involved (e.g. the duplicate partner in a collision).
82    pub related: Vec<PathBuf>,
83}
84
85impl Issue {
86    /// True if this issue fails validation (i.e. its severity is
87    /// [`Severity::Error`]).
88    pub fn is_error(&self) -> bool {
89        matches!(self.severity, Severity::Error)
90    }
91}
92
93/// The canonical validation issue codes — one constant per row of the SPEC.md
94/// § Validation table. Call sites reference these instead of bare strings so
95/// the code and the SPEC table can never silently drift.
96pub mod codes {
97    /// path has no `DB.md`; not a db.md store.
98    pub const NOT_A_STORE: &str = "NOT_A_STORE";
99    /// the store's `DB.md` is not `type: db-md`.
100    pub const DB_MD_BAD_TYPE: &str = "DB_MD_BAD_TYPE";
101    /// the store's `DB.md` frontmatter lacks `scope` or `owner`.
102    pub const DB_MD_MISSING_FIELD: &str = "DB_MD_MISSING_FIELD";
103    /// `DB.md` has an `##` section other than the three recognized ones.
104    pub const DB_MD_UNKNOWN_SECTION: &str = "DB_MD_UNKNOWN_SECTION";
105    /// a `DB.md ## Schemas` field declaration is malformed (empty or duplicate
106    /// field name) or carries an unrecognized modifier.
107    pub const DB_MD_SCHEMA_FIELD: &str = "DB_MD_SCHEMA_FIELD";
108    /// content file has no `type:`.
109    pub const FM_MISSING_TYPE: &str = "FM_MISSING_TYPE";
110    /// content file has no `created:`.
111    pub const FM_MISSING_CREATED: &str = "FM_MISSING_CREATED";
112    /// content file has no `updated:`.
113    pub const FM_MISSING_UPDATED: &str = "FM_MISSING_UPDATED";
114    /// content file can't be read (not valid UTF-8, or an I/O error).
115    pub const FM_UNREADABLE: &str = "FM_UNREADABLE";
116    /// frontmatter block isn't valid YAML.
117    pub const FM_MALFORMED_YAML: &str = "FM_MALFORMED_YAML";
118    /// `created` or `updated` isn't ISO-8601.
119    pub const FM_BAD_TIMESTAMP: &str = "FM_BAD_TIMESTAMP";
120    /// `meta-type` is present but not one of fact / operational / conclusion.
121    pub const FM_BAD_META_TYPE: &str = "FM_BAD_META_TYPE";
122    /// content file has no `summary`.
123    pub const SUMMARY_MISSING: &str = "SUMMARY_MISSING";
124    /// `summary` present but empty.
125    pub const SUMMARY_EMPTY: &str = "SUMMARY_EMPTY";
126    /// `summary` contains newlines.
127    pub const SUMMARY_MULTILINE: &str = "SUMMARY_MULTILINE";
128    /// `summary` > 200 chars.
129    pub const SUMMARY_TOO_LONG: &str = "SUMMARY_TOO_LONG";
130    /// wiki-link target isn't a full store-relative path.
131    pub const WIKI_LINK_SHORT_FORM: &str = "WIKI_LINK_SHORT_FORM";
132    /// wiki-link target file doesn't exist.
133    pub const WIKI_LINK_BROKEN: &str = "WIKI_LINK_BROKEN";
134    /// wiki-link target matches multiple files (defensive).
135    pub const WIKI_LINK_AMBIGUOUS: &str = "WIKI_LINK_AMBIGUOUS";
136    /// wiki-link target carries a `.md` extension — drop it.
137    pub const WIKI_LINK_HAS_EXTENSION: &str = "WIKI_LINK_HAS_EXTENSION";
138    /// frontmatter list uses inline `[[[a]], [[b]]]` — use block form.
139    pub const WIKI_LINK_FLOW_FORM_LIST: &str = "WIKI_LINK_FLOW_FORM_LIST";
140    /// two files declare the same explicit `id`.
141    pub const DUP_ID: &str = "DUP_ID";
142    /// two records of a type collide on a `DB.md ## Schemas` `unique:` key.
143    pub const DUP_UNIQUE_KEY: &str = "DUP_UNIQUE_KEY";
144    /// a `DB.md` schema requires a field that's absent.
145    pub const SCHEMA_MISSING_REQUIRED: &str = "SCHEMA_MISSING_REQUIRED";
146    /// a value doesn't match the schema's shape modifier.
147    pub const SCHEMA_SHAPE_MISMATCH: &str = "SCHEMA_SHAPE_MISMATCH";
148    /// a `link to <prefix>/` field has a plain or wrong-prefix value.
149    pub const SCHEMA_LINK_PREFIX_MISMATCH: &str = "SCHEMA_LINK_PREFIX_MISMATCH";
150    /// a value isn't in the schema's `enum`.
151    pub const SCHEMA_ENUM_VIOLATION: &str = "SCHEMA_ENUM_VIOLATION";
152    /// a write was attempted on a `### Frozen pages` path (write-time).
153    pub const POLICY_FROZEN_PAGE: &str = "POLICY_FROZEN_PAGE";
154    /// a file with an `### Ignored types` type exists.
155    pub const POLICY_IGNORED_TYPE_PRESENT: &str = "POLICY_IGNORED_TYPE_PRESENT";
156    /// a `meta-type: conclusion` record derives from an ignored-type record.
157    pub const POLICY_IGNORED_TYPE_DERIVED: &str = "POLICY_IGNORED_TYPE_DERIVED";
158    /// a `log.md` entry header timestamp is unparseable.
159    pub const LOG_BAD_TIMESTAMP: &str = "LOG_BAD_TIMESTAMP";
160    /// a `log.md` entry kind isn't recognized.
161    pub const LOG_UNKNOWN_KIND: &str = "LOG_UNKNOWN_KIND";
162    /// `log.md` entries aren't in non-decreasing time order (possible rewrite).
163    pub const LOG_OUT_OF_ORDER: &str = "LOG_OUT_OF_ORDER";
164    /// a non-empty canonical folder lacks `index.md`.
165    pub const INDEX_MISSING: &str = "INDEX_MISSING";
166    /// an `index.md` lists a file that no longer exists.
167    pub const INDEX_STALE_ENTRY: &str = "INDEX_STALE_ENTRY";
168    /// a file isn't listed in its folder's `index.md`.
169    pub const INDEX_MISSING_ENTRY: &str = "INDEX_MISSING_ENTRY";
170    /// an `index.md` sits in an empty / non-canonical folder.
171    pub const INDEX_ORPHAN: &str = "INDEX_ORPHAN";
172    /// an index's `scope:` doesn't match its filesystem location.
173    pub const INDEX_WRONG_SCOPE: &str = "INDEX_WRONG_SCOPE";
174    /// an index entry's text doesn't match the target file's `summary`.
175    pub const INDEX_SUMMARY_MISMATCH: &str = "INDEX_SUMMARY_MISMATCH";
176    /// a type-folder's `index.jsonl` twin is missing.
177    pub const INDEX_JSONL_MISSING: &str = "INDEX_JSONL_MISSING";
178    /// a file isn't in the `index.jsonl`, or a jsonl record points at a missing
179    /// file.
180    pub const INDEX_JSONL_DESYNC: &str = "INDEX_JSONL_DESYNC";
181    /// a `index.jsonl` record's fields don't match the file's frontmatter.
182    pub const INDEX_JSONL_STALE: &str = "INDEX_JSONL_STALE";
183    /// `tags` isn't a flat YAML list of short scalar labels.
184    pub const TAGS_MALFORMED: &str = "TAGS_MALFORMED";
185    /// a line in `assets.jsonl` is not a valid asset record.
186    pub const ASSET_MANIFEST_MALFORMED: &str = "ASSET_MANIFEST_MALFORMED";
187    /// a content file references an `asset`/`assets` path with no record in
188    /// `assets.jsonl` (run `dbmd assets scan`).
189    pub const ASSET_UNDECLARED: &str = "ASSET_UNDECLARED";
190    /// an `assets.jsonl` record names a wrapper file that does not exist.
191    pub const ASSET_WRAPPER_BROKEN: &str = "ASSET_WRAPPER_BROKEN";
192    /// an `assets.jsonl` record's path is referenced by no wrapper.
193    pub const ASSET_MANIFEST_ORPHAN: &str = "ASSET_MANIFEST_ORPHAN";
194    /// an `asset`/`assets` path points at a tracked markdown content file.
195    pub const ASSET_PATH_IS_CONTENT: &str = "ASSET_PATH_IS_CONTENT";
196}
197
198/// The SPEC's `summary` length bound (chars). Over it → `SUMMARY_TOO_LONG`.
199const MAX_SUMMARY_LEN: usize = 200;
200
201/// Recognized `log.md` entry kinds (SPEC § `log.md`). Anything else →
202/// `LOG_UNKNOWN_KIND` (warning, not error).
203const RECOGNIZED_LOG_KINDS: &[&str] = &[
204    "ingest",
205    "create",
206    "update",
207    "delete",
208    "rename",
209    "link",
210    "validate",
211    "index-rebuild",
212    "contradiction",
213];
214
215// ─────────────────────────────────────────────────────────────────────────────
216//  Public entrypoints
217// ─────────────────────────────────────────────────────────────────────────────
218
219/// **Loop default.** Validate the working set: content files changed since
220/// `since` (default: the last `validate` entry in `log.md`), plus any file whose
221/// wiki-links target a changed/renamed/removed path. Per-file *checks* only —
222/// none of the cross-file global passes (entity-dedup, every-index sync,
223/// `log.md` ordering) that `--all` adds. If the default call finds no logged
224/// changed objects, it falls back to a per-file content sweep so an externally
225/// edited or freshly copied store cannot pass vacuously.
226///
227/// **Cost.** The changed set is read from `log.md` — O(changed): every
228/// `create`/`update`/`ingest`/`rename`/`delete`/`link` entry newer than the
229/// cutoff names an object. Per-file frontmatter + link-doctrine checks then run
230/// over that set plus its incoming linkers — also O(changed). The one part that
231/// is *not* O(changed) is discovering those incoming linkers: a link to a
232/// changed path can live in the body or a typed frontmatter field of any file,
233/// so it is found by a **single** embedded-ripgrep pass over the store
234/// ([`Store::find_links_to_any`]) for the whole changed set at once — one store
235/// scan, flat in the changed-set size. (It was previously a full store read
236/// *per* changed object — `O(changed × store)`; that is the blow-up this path
237/// no longer pays.) The unavoidable single content scan is the same shape as
238/// free-text `dbmd search`; the sidecar `links` projection can't replace it
239/// because it omits body/typed-field edges.
240pub fn validate_working_set(
241    store: &Store,
242    since: Option<DateTime<FixedOffset>>,
243) -> crate::Result<Vec<Issue>> {
244    if !store_marker_present(store) {
245        return Ok(vec![not_a_store_issue(store)]);
246    }
247
248    let cutoff = match since {
249        Some(ts) => Some(ts),
250        None => last_validate_at(store),
251    };
252
253    // 1. Changed objects, straight from the log (O(changed) — never a walk).
254    let changed = changed_objects_since(store, cutoff);
255    if changed.is_empty() && since.is_none() {
256        return validate_content_sweep(store);
257    }
258
259    // 2. Add every file with an incoming wiki-link to a changed/renamed/removed
260    //    path (the linker may now be stale even though it didn't change). The
261    //    incoming-linker scan is `Store::find_links_to_any` — ONE embedded-ripgrep
262    //    pass over the store for the WHOLE changed set (one `.md` walk, one
263    //    presence-only/early-exit scan per file), not one walk per object. This
264    //    is the fix for the `O(changed × store)` blow-up that calling
265    //    `find_links_to` in a loop produced (a full store read per changed
266    //    object); the cost is now a single store scan regardless of how many
267    //    objects changed. A returned self-link is harmlessly deduped by the set
268    //    (the object is already inserted below).
269    let changed_targets: Vec<PathBuf> = changed.iter().cloned().collect();
270    let mut working: BTreeSet<PathBuf> = changed;
271    for linker in store.find_links_to_any(&changed_targets)? {
272        working.insert(linker);
273    }
274
275    let mut issues = Vec::new();
276    for rel in &working {
277        let abs = store.root.join(rel);
278        // A changed path can be a *deletion* — skip files that no longer exist;
279        // the incoming-linker scan above already flagged links into them.
280        if !abs.is_file() {
281            continue;
282        }
283        // `None` basename index: the working-set pass does not build the
284        // store-wide basename map (that is a `--all`-only structure), so a bare
285        // short-form target is reported as plain `WIKI_LINK_SHORT_FORM` and the
286        // `--all` sweep does the ambiguity upgrade.
287        check_content_file(store, rel, &abs, None, &mut issues);
288    }
289    issues.sort_by(issue_order);
290    Ok(issues)
291}
292
293fn validate_content_sweep(store: &Store) -> crate::Result<Vec<Issue>> {
294    let mut issues = Vec::new();
295    for rel in store.walk()? {
296        let abs = store.root.join(&rel);
297        check_content_file(store, &rel, &abs, None, &mut issues);
298    }
299    issues.sort_by(issue_order);
300    Ok(issues)
301}
302
303/// **Full SWEEP (O(store)).** Validate every file, every link, and every index,
304/// adding the cross-file checks that need global state: entity-dedup `DUP_*`,
305/// every-index sync (md + jsonl), and `log.md` ordering. CI / recovery, not the
306/// loop.
307pub fn validate_all(store: &Store) -> crate::Result<Vec<Issue>> {
308    if !store_marker_present(store) {
309        return Ok(vec![not_a_store_issue(store)]);
310    }
311
312    let mut issues = Vec::new();
313
314    // Store-identity file: `DB.md` shape (type / required fields / section
315    // headers). A single root file, checked once in the sweep — not a content
316    // file (it carries no `summary`), so it is not part of `walk_content_files`.
317    check_db_md(store, &mut issues);
318
319    let files = walk_content_files(&store.root);
320
321    // The basename index makes the short-form wiki-link check able to upgrade a
322    // bare-basename target to `WIKI_LINK_AMBIGUOUS` when it matches ≥2 files.
323    // Built once from the already-gathered sweep list (no extra walk); only the
324    // `--all` path has it (the working-set path stays O(changed)).
325    let basenames = build_basename_index(&files);
326
327    // Per-file checks over the whole store.
328    let mut parsed: Vec<(PathBuf, Parsed)> = Vec::new();
329    for rel in &files {
330        let abs = store.root.join(rel);
331        if let Some(p) = check_content_file(store, rel, &abs, Some(&basenames), &mut issues) {
332            parsed.push((rel.clone(), p));
333        }
334    }
335
336    // Cross-file: hard `id` + soft schema-declared `unique:` dedup collisions.
337    check_duplicates(store, &parsed, &mut issues);
338
339    // Cross-file: hierarchical index.md + index.jsonl sync.
340    check_indexes(store, &files, &mut issues);
341
342    // Cross-file: log.md well-formedness + ordering.
343    check_log(store, &mut issues);
344
345    // Cross-file: asset manifest (assets.jsonl) integrity against wrapper
346    // declarations. Text-only, no hashing, no byte reads — a SWEEP check like
347    // dedup. Byte presence/correctness is `dbmd assets verify`, not validate, so
348    // a fresh clone with no restored bytes still passes here.
349    check_assets(store, &parsed, &mut issues);
350
351    issues.sort_by(issue_order);
352    Ok(issues)
353}
354
355// ─────────────────────────────────────────────────────────────────────────────
356//  Per-file content checks (shared by both scopes)
357// ─────────────────────────────────────────────────────────────────────────────
358
359/// What `validate_all`'s cross-file pass needs from a per-file parse: the
360/// parsed YAML mapping (for dedup keys) and the raw frontmatter text (for
361/// text-based wiki-link extraction). The body and fence-line are consumed
362/// inline during the per-file pass and not carried here.
363struct Parsed {
364    /// The parsed top-level YAML mapping, keyed by string. `None` ⇒ malformed
365    /// YAML (a `FM_MALFORMED_YAML` was already emitted).
366    fm: Option<BTreeMap<String, Value>>,
367    /// The raw frontmatter YAML text (between the fences) — the source for
368    /// text-based wiki-link extraction in dedup.
369    fm_yaml: String,
370}
371
372/// Run every per-file check on one content file, pushing issues. Returns the
373/// parsed file so `validate_all` can reuse it for cross-file checks. Returns
374/// `None` only when the file is unreadable or has no frontmatter block at all
375/// (which for a content file is itself reported).
376fn check_content_file(
377    store: &Store,
378    rel: &Path,
379    abs: &Path,
380    basenames: Option<&BasenameIndex>,
381    issues: &mut Vec<Issue>,
382) -> Option<Parsed> {
383    let text = match std::fs::read_to_string(abs) {
384        Ok(t) => t,
385        Err(e) => {
386            // The file exists in the walk but can't be read as UTF-8 text
387            // (invalid bytes) or hit an I/O error. Returning `None` silently
388            // here let a store whose only content file was binary garbage pass
389            // `dbmd validate` with exit 0 — the exact vacuous-pass the fallback
390            // sweep exists to prevent. Report it so the agent gets an actionable
391            // diagnostic naming the unreadable file (and `index rebuild`, which
392            // hard-fails on the same file, isn't the only signal).
393            let detail = if e.kind() == std::io::ErrorKind::InvalidData {
394                "file is not valid UTF-8 text".to_string()
395            } else {
396                format!("file could not be read: {e}")
397            };
398            push(
399                issues,
400                Severity::Error,
401                codes::FM_UNREADABLE,
402                rel,
403                None,
404                None,
405                format!("content file is unreadable: {detail}"),
406                Some(
407                    "save the file as UTF-8 text, or remove it if it isn't a db.md content file"
408                        .into(),
409                ),
410                vec![],
411            );
412            return None;
413        }
414    };
415
416    let is_content = is_content_file(rel);
417
418    let (fm_yaml, body, fm_end_line) = match split_frontmatter(&text) {
419        Some(split) => split,
420        None => {
421            // No frontmatter at all. For a content file that means there's no
422            // `type:` and no `summary:` — report both the way a parsed-but-empty
423            // file would, so the agent gets the same actionable codes.
424            if is_content {
425                push(
426                    issues,
427                    Severity::Error,
428                    codes::FM_MISSING_TYPE,
429                    rel,
430                    None,
431                    Some("type".into()),
432                    "content file has no frontmatter `type:`".into(),
433                    Some("add a YAML frontmatter block with `type:`".into()),
434                    vec![],
435                );
436                push(
437                    issues,
438                    Severity::Error,
439                    codes::SUMMARY_MISSING,
440                    rel,
441                    None,
442                    Some("summary".into()),
443                    "content file has no `summary`".into(),
444                    Some("run `dbmd fm init`".into()),
445                    vec![],
446                );
447            }
448            return None;
449        }
450    };
451
452    // Parse the YAML block.
453    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
454        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
455        // An empty frontmatter block parses as Null; treat as an empty mapping.
456        Ok(Value::Null) => Some(BTreeMap::new()),
457        Ok(_) => {
458            // A scalar / sequence at the top level isn't a frontmatter mapping.
459            // Anchor to line 1 — the frontmatter block's opening `---`; the whole
460            // block is opaque, so there is no single offending field line.
461            push(
462                issues,
463                Severity::Error,
464                codes::FM_MALFORMED_YAML,
465                rel,
466                Some(1),
467                None,
468                "frontmatter is not a YAML mapping".into(),
469                Some("repair the frontmatter YAML mapping, then rerun `dbmd validate`".into()),
470                vec![],
471            );
472            None
473        }
474        Err(e) => {
475            // Anchor to line 1 (the opening `---`): an unparseable block has no
476            // single offending field line; the agent re-reads the whole block.
477            push(
478                issues,
479                Severity::Error,
480                codes::FM_MALFORMED_YAML,
481                rel,
482                Some(1),
483                None,
484                format!("frontmatter block isn't valid YAML: {e}"),
485                Some("repair the frontmatter YAML block, then rerun `dbmd validate`".into()),
486                vec![],
487            );
488            None
489        }
490    };
491
492    if let Some(map) = &fm {
493        // The detailed frontmatter checks only run when the YAML parsed.
494        check_frontmatter(store, rel, map, &fm_yaml, basenames, issues, is_content);
495    }
496
497    // Wiki-link doctrine checks run on the body of content files. They are NOT
498    // run on:
499    //   - the root append-only meta files `log.md`/`DB.md` — they reach this
500    //     function only via the working-set incoming-linker scan (`walk_all_md`
501    //     includes them), and `validate --all` never link-checks their bodies. A
502    //     historical `[[deleted-page]]` mention in a `log.md` note, or a `[[…]]`
503    //     in DB.md's `## Agent instructions`, must not be `WIKI_LINK_BROKEN`; the
504    //     log is append-only, so "fix the link" can't even be applied.
505    //   - the derived catalogs `index.md`/`index.jsonl` — their "links" are
506    //     GENERATED catalog entries, not authored body wiki-links. A folder's
507    //     `index.md` is pulled into the working set as an incoming linker (an
508    //     entry `[[records/contacts/a]]` IS a wiki-link to a member, so touching
509    //     or deleting any member drags its folder `index.md` in). Its integrity
510    //     is the job of `check_indexes` under `--all`, which reports a dangling
511    //     entry as `INDEX_STALE_ENTRY` ("run `dbmd index rebuild`"). Body-link-
512    //     checking it here instead emitted `WIKI_LINK_BROKEN` ("create the
513    //     target") for the SAME condition — a different code with the OPPOSITE
514    //     remedy across the loop default vs the sweep, steering an agent to
515    //     recreate deleted data. `walk_content_files` skips `index.md` under
516    //     `--all` for exactly this reason; the working-set scope must match.
517    // Without these guards the two scopes disagree on the same store.
518    if !is_root_meta_file(rel) && !is_index_catalog_file(rel) {
519        check_body_wiki_links(store, rel, &body, fm_end_line, basenames, issues);
520    }
521
522    Some(Parsed { fm, fm_yaml })
523}
524
525/// All frontmatter-level checks for a content file with valid YAML.
526fn check_frontmatter(
527    store: &Store,
528    rel: &Path,
529    fm: &BTreeMap<String, Value>,
530    fm_yaml: &str,
531    basenames: Option<&BasenameIndex>,
532    issues: &mut Vec<Issue>,
533    is_content: bool,
534) {
535    let type_ = fm.get("type").and_then(scalar_string);
536
537    // ── type ────────────────────────────────────────────────────────────────
538    if is_content && type_.is_none() {
539        push(
540            issues,
541            Severity::Error,
542            codes::FM_MISSING_TYPE,
543            rel,
544            fm_key_line_or_top(fm_yaml, "type"),
545            Some("type".into()),
546            "content file has no `type:`".into(),
547            Some("add a `type:` field (e.g. `type: contact`)".into()),
548            vec![],
549        );
550    }
551
552    // ── meta-type (records-only epistemic class; closed enum) ─────────────────
553    // Present-but-out-of-enum is an error; absent is fine (effective default
554    // `fact`). Sources don't normally carry one, but validating the value when
555    // present is layer-agnostic and harmless.
556    if is_content {
557        // Branch on the raw value, NOT `and_then(scalar_string)`. Pre-filtering
558        // through `scalar_string` made a list/mapping value (which returns `None`)
559        // short-circuit the whole check, so a structurally-wrong `meta-type`
560        // slipped through clean AND was silently reclassified as the default
561        // `fact` by the rest of the toolkit. Absent or explicit-`null` is fine
562        // (effective default `fact`); a present non-null value must be a scalar in
563        // the closed enum. This mirrors the sibling timestamp check below, which
564        // was already hardened against the same non-scalar escape.
565        if let Some(v) = fm.get("meta-type").filter(|v| !v.is_null()) {
566            match scalar_string(v) {
567                Some(mt) if matches!(mt.as_str(), "fact" | "operational" | "conclusion") => {}
568                Some(mt) => push(
569                    issues,
570                    Severity::Error,
571                    codes::FM_BAD_META_TYPE,
572                    rel,
573                    fm_key_line_or_top(fm_yaml, "meta-type"),
574                    Some("meta-type".into()),
575                    format!("`meta-type: {mt}` is not one of fact / operational / conclusion"),
576                    Some(
577                        "use one of: fact, operational, conclusion (or omit for the default `fact`)"
578                            .into(),
579                    ),
580                    vec![],
581                ),
582                None => push(
583                    issues,
584                    Severity::Error,
585                    codes::FM_BAD_META_TYPE,
586                    rel,
587                    fm_key_line_or_top(fm_yaml, "meta-type"),
588                    Some("meta-type".into()),
589                    "`meta-type` is not one of fact / operational / conclusion: expected a scalar \
590                     string, found a list or mapping"
591                        .to_string(),
592                    Some(
593                        "use one of: fact, operational, conclusion (or omit for the default `fact`)"
594                            .into(),
595                    ),
596                    vec![],
597                ),
598            }
599        }
600    }
601
602    // ── summary (universal on content files) ──────────────────────────────────
603    if is_content {
604        check_summary(rel, fm, fm_yaml, issues);
605    }
606
607    // ── timestamps: created / updated ─────────────────────────────────────────
608    // The `created`/`updated` contract is content-file-only; meta files
609    // (`DB.md`, `log.md`, index twins) legitimately carry no such timestamps.
610    if is_content {
611        for (key, missing_code) in [
612            ("created", codes::FM_MISSING_CREATED),
613            ("updated", codes::FM_MISSING_UPDATED),
614        ] {
615            // A key that is absent, or present-but-`null`, has *no* timestamp →
616            // `FM_MISSING_*`. The toolkit's parser also treats a null value as
617            // "no timestamp", so a null `created:` must read as missing, not
618            // silently pass.
619            let value = fm.get(key);
620            let missing = value.is_none() || value.is_some_and(Value::is_null);
621            if missing {
622                push(
623                    issues,
624                    Severity::Error,
625                    missing_code,
626                    rel,
627                    fm_key_line_or_top(fm_yaml, key),
628                    Some(key.into()),
629                    format!("content file has no `{key}:` timestamp"),
630                    Some(format!(
631                        "set `{key}` to an RFC3339 timestamp, e.g. 2026-05-27T08:00:00-07:00"
632                    )),
633                    vec![],
634                );
635            } else if let Some(v) = value {
636                // Present and non-null. A scalar is checked for ISO-8601; a
637                // sequence/mapping is not a timestamp string at all and so
638                // cannot be ISO-8601 → `FM_BAD_TIMESTAMP` (it must not slip
639                // through the way it did when `scalar_string` returned `None`
640                // and the branch silently no-oped).
641                match scalar_string(v) {
642                    Some(s) if is_iso8601(&s) => {}
643                    Some(s) => push(
644                        issues,
645                        Severity::Error,
646                        codes::FM_BAD_TIMESTAMP,
647                        rel,
648                        fm_key_line(fm_yaml, key),
649                        Some(key.into()),
650                        format!("`{key}` is not ISO-8601: {s:?}"),
651                        Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
652                        vec![],
653                    ),
654                    None => push(
655                        issues,
656                        Severity::Error,
657                        codes::FM_BAD_TIMESTAMP,
658                        rel,
659                        fm_key_line(fm_yaml, key),
660                        Some(key.into()),
661                        format!(
662                            "`{key}` is not ISO-8601: expected a timestamp string, found a list or mapping"
663                        ),
664                        Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
665                        vec![],
666                    ),
667                }
668            }
669        }
670    }
671    // ── tags shape ────────────────────────────────────────────────────────────
672    if let Some(tags) = fm.get("tags") {
673        if !is_flat_scalar_list(tags) {
674            push(
675                issues,
676                Severity::Warning,
677                codes::TAGS_MALFORMED,
678                rel,
679                fm_key_line(fm_yaml, "tags"),
680                Some("tags".into()),
681                "`tags` must be a flat YAML list of short scalar labels".into(),
682                Some("use block form: one `- <tag>` per line".into()),
683                vec![],
684            );
685        }
686    }
687
688    // ── inline flow-form wiki-link lists in frontmatter ──────────────────────
689    for key in detect_flow_form_link_lists(fm_yaml) {
690        push(
691            issues,
692            Severity::Error,
693            codes::WIKI_LINK_FLOW_FORM_LIST,
694            rel,
695            fm_key_line(fm_yaml, &key),
696            Some(key.clone()),
697            format!("`{key}` uses inline flow form `[[[a]], [[b]]]`"),
698            Some("use YAML block-sequence form: one `- [[...]]` per line".into()),
699            vec![],
700        );
701    }
702
703    // ── frontmatter wiki-link fields: doctrine + integrity ───────────────────
704    // Skip keys that have an explicit `link to` schema spec — those are checked
705    // (with prefix enforcement) in `check_schema`, and double-reporting the same
706    // link via two paths would be noise.
707    let schema_link_keys: BTreeSet<String> =
708        effective_schema(store, type_.as_deref().unwrap_or(""))
709            .map(|s| {
710                s.fields
711                    .iter()
712                    .filter(|f| f.link_prefix.is_some())
713                    .map(|f| f.name.clone())
714                    .collect()
715            })
716            .unwrap_or_default();
717    for (key, link) in frontmatter_link_fields_text(fm_yaml, 2) {
718        if schema_link_keys.contains(&key) {
719            continue;
720        }
721        check_wiki_link(
722            store,
723            rel,
724            &link,
725            Some(link.line),
726            Some(&key),
727            basenames,
728            issues,
729        );
730    }
731
732    // ── policies: ignored types ──────────────────────────────────────────────
733    if let Some(t) = &type_ {
734        if store.config.ignored_types.iter().any(|it| it == t) {
735            push(
736                issues,
737                Severity::Info,
738                codes::POLICY_IGNORED_TYPE_PRESENT,
739                rel,
740                fm_key_line(fm_yaml, "type"),
741                Some("type".into()),
742                format!("file has ignored type `{t}` (per DB.md ## Policies)"),
743                Some(
744                    "change the `type`, or remove it from DB.md `### Ignored types` if it should be managed"
745                        .into(),
746                ),
747                // The policy source: `DB.md` declares the ignored type.
748                vec![PathBuf::from("DB.md")],
749            );
750        }
751        // A conclusion record (`meta-type: conclusion`) deriving from an
752        // ignored-type record → warning. The decision lives in the shared
753        // `derived_from_ignored_type` entry point; this side only supplies the
754        // `derived_from` targets (with their line, which the issue carries) and
755        // renders the finding.
756        let meta_type = fm
757            .get("meta-type")
758            .and_then(scalar_string)
759            .unwrap_or_else(|| "fact".to_string());
760        for link in frontmatter_links_for_key(fm_yaml, "derived_from", 2) {
761            if let Some(hit) =
762                derived_from_ignored_type(store, &meta_type, std::iter::once(link.target.as_str()))
763            {
764                push(
765                    issues,
766                    Severity::Warning,
767                    codes::POLICY_IGNORED_TYPE_DERIVED,
768                    rel,
769                    Some(link.line),
770                    Some("derived_from".into()),
771                    format!(
772                        "conclusion record derives from ignored-type record `{}` (type `{}`)",
773                        hit.target, hit.target_type
774                    ),
775                    Some(
776                        "drop this `derived_from` link, or remove the target type from DB.md `### Ignored types`"
777                            .into(),
778                    ),
779                    // The ignored-type source record, plus `DB.md` (the policy
780                    // source that lists the ignored type).
781                    vec![
782                        PathBuf::from(format!("{}.md", hit.target)),
783                        PathBuf::from("DB.md"),
784                    ],
785                );
786            }
787        }
788    }
789
790    // ── schema enforcement: DB.md ## Schemas (the only schema source) ─────────
791    if let Some(t) = &type_ {
792        if let Some(schema) = effective_schema(store, t) {
793            check_schema(store, rel, fm, fm_yaml, &schema, issues);
794        }
795    }
796}
797
798/// `summary` rules: required, non-empty, single-line, ≤ 200 chars.
799fn check_summary(rel: &Path, fm: &BTreeMap<String, Value>, fm_yaml: &str, issues: &mut Vec<Issue>) {
800    let line = fm_key_line(fm_yaml, "summary");
801    match fm.get("summary") {
802        None => push(
803            issues,
804            Severity::Error,
805            codes::SUMMARY_MISSING,
806            rel,
807            // A missing `summary` key has no line of its own → anchor to the
808            // frontmatter block top (line 1), the EXPECTED field-absence rule.
809            fm_key_line_or_top(fm_yaml, "summary"),
810            Some("summary".into()),
811            "content file has no `summary`".into(),
812            Some("run `dbmd fm init`".into()),
813            vec![],
814        ),
815        Some(v) => {
816            let s = scalar_string(v).unwrap_or_default();
817            if s.trim().is_empty() {
818                push(
819                    issues,
820                    Severity::Error,
821                    codes::SUMMARY_EMPTY,
822                    rel,
823                    line,
824                    Some("summary".into()),
825                    "`summary` is present but empty".into(),
826                    Some("write a one-line summary, or run `dbmd fm init`".into()),
827                    vec![],
828                );
829            } else if s.contains('\n') {
830                push(
831                    issues,
832                    Severity::Error,
833                    codes::SUMMARY_MULTILINE,
834                    rel,
835                    line,
836                    Some("summary".into()),
837                    "`summary` must be one line (contains a newline)".into(),
838                    Some("collapse the summary to a single line".into()),
839                    vec![],
840                );
841            } else if s.chars().count() > MAX_SUMMARY_LEN {
842                push(
843                    issues,
844                    Severity::Warning,
845                    codes::SUMMARY_TOO_LONG,
846                    rel,
847                    line,
848                    Some("summary".into()),
849                    format!(
850                        "`summary` is {} chars (> {MAX_SUMMARY_LEN})",
851                        s.chars().count()
852                    ),
853                    Some(format!("trim the summary to ≤ {MAX_SUMMARY_LEN} chars")),
854                    vec![],
855                );
856            }
857        }
858    }
859}
860
861/// Wiki-link checks for a body. Per-link doctrine (`WIKI_LINK_*`).
862fn check_body_wiki_links(
863    store: &Store,
864    rel: &Path,
865    body: &str,
866    fm_end_line: u32,
867    basenames: Option<&BasenameIndex>,
868    issues: &mut Vec<Issue>,
869) {
870    for link in extract_wiki_links(body) {
871        // Body lines are offset past the frontmatter block. `link.line` is
872        // 1-based within `body`; the body starts at `fm_end_line + 1`.
873        let abs_line = fm_end_line + link.line;
874        check_wiki_link(store, rel, &link, Some(abs_line), None, basenames, issues);
875    }
876}
877
878/// A store-wide map from a file's bare basename (its stem, no `.md`) to every
879/// store-relative path carrying that basename. Built once per `validate --all`
880/// sweep so the short-form wiki-link check can distinguish a merely short-form
881/// target (`WIKI_LINK_SHORT_FORM`) from one that is *ambiguous* because the bare
882/// basename matches two or more files (`WIKI_LINK_AMBIGUOUS`, the defensive
883/// code). `None` in the working-set path — that loop is O(changed) and never
884/// walks the store, so it reports the plain short-form error without the scan.
885type BasenameIndex = HashMap<String, Vec<PathBuf>>;
886
887/// Build the [`BasenameIndex`] from the swept file list (already gathered by
888/// `validate_all`; no extra walk).
889fn build_basename_index(files: &[PathBuf]) -> BasenameIndex {
890    let mut idx: BasenameIndex = HashMap::new();
891    for rel in files {
892        if let Some(stem) = rel.file_stem().and_then(|s| s.to_str()) {
893            idx.entry(stem.to_string()).or_default().push(rel.clone());
894        }
895    }
896    idx
897}
898
899/// The shared per-wiki-link doctrine + integrity check used by both body links
900/// and frontmatter link-fields. `basenames` is `Some` only in the `--all`
901/// sweep, where a no-slash short-form target is upgraded to `WIKI_LINK_AMBIGUOUS`
902/// when its bare basename matches ≥2 files.
903fn check_wiki_link(
904    store: &Store,
905    rel: &Path,
906    link: &Link,
907    line: Option<u32>,
908    key: Option<&str>,
909    basenames: Option<&BasenameIndex>,
910    issues: &mut Vec<Issue>,
911) {
912    let bare = link.target.trim_end_matches(".md");
913
914    // Short-form: not a full store-relative path (no `/`, or first segment isn't
915    // a known layer).
916    if !is_full_store_path(bare) {
917        // Ambiguous (defensive) takes precedence over plain short-form when the
918        // target is a bare basename (no `/`) that matches ≥2 files in the store.
919        // Only computable in the sweep (where `basenames` is populated); the
920        // working-set path falls through to the plain short-form error.
921        if !bare.contains('/') {
922            if let Some(idx) = basenames {
923                if let Some(matches) = idx.get(bare) {
924                    if matches.len() >= 2 {
925                        let mut related = matches.clone();
926                        related.sort();
927                        push(
928                            issues,
929                            Severity::Error,
930                            codes::WIKI_LINK_AMBIGUOUS,
931                            rel,
932                            line,
933                            key.map(str::to_string),
934                            format!(
935                                "short-form wiki-link `[[{}]]` matches multiple files",
936                                link.target
937                            ),
938                            Some("use the full store-relative path to disambiguate".into()),
939                            related,
940                        );
941                        return;
942                    }
943                }
944            }
945        }
946        push(
947            issues,
948            Severity::Error,
949            codes::WIKI_LINK_SHORT_FORM,
950            rel,
951            line,
952            key.map(str::to_string),
953            format!(
954                "wiki-link `[[{}]]` is not a full store-relative path",
955                link.target
956            ),
957            short_form_suggestion(bare),
958            vec![],
959        );
960        // Don't also report broken; the agent must fix the form first.
961        return;
962    }
963
964    // `.md` extension → warning, then still check existence.
965    if link.target.ends_with(".md") {
966        push(
967            issues,
968            Severity::Warning,
969            codes::WIKI_LINK_HAS_EXTENSION,
970            rel,
971            line,
972            key.map(str::to_string),
973            format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
974            Some(format!("drop the extension: [[{bare}]]")),
975            vec![],
976        );
977    }
978
979    // Broken: target file doesn't exist (O(1) stat). Resolve the target the
980    // same way the graph engine does — the literal path first (so a link to a
981    // raw `.eml`/`.pdf` source kept verbatim under `sources/` resolves), then
982    // the `.md`-appended path.
983    match resolve_wiki_target(store, bare) {
984        TargetResolution::Exists => {}
985        TargetResolution::Missing => push(
986            issues,
987            Severity::Error,
988            codes::WIKI_LINK_BROKEN,
989            rel,
990            line,
991            key.map(str::to_string),
992            format!("wiki-link target `{bare}` doesn't exist"),
993            Some(format!(
994                "create `{bare}.md`, or point the link at an existing file"
995            )),
996            vec![],
997        ),
998        TargetResolution::Unsafe => push(
999            issues,
1000            Severity::Error,
1001            codes::WIKI_LINK_BROKEN,
1002            rel,
1003            line,
1004            key.map(str::to_string),
1005            format!("wiki-link target `{bare}` is not a safe store-relative path"),
1006            Some("use a full store-relative path under sources/ or records/".into()),
1007            vec![],
1008        ),
1009    }
1010}
1011
1012// ─────────────────────────────────────────────────────────────────────────────
1013//  Schema enforcement (user-declared DB.md ## Schemas — the only source)
1014// ─────────────────────────────────────────────────────────────────────────────
1015
1016/// The effective schema for a type: the store's explicit `DB.md ## Schemas`
1017/// block, or `None`. This is the **only** source of schema enforcement — the
1018/// toolkit ships no implicit or built-in per-type schema (SPEC § Schemas). A
1019/// store that wants its `contact` / `expense` / etc. fields enforced declares
1020/// them in `## Schemas`; the example schema pack in SPEC § Example types is a
1021/// copy-in starting point.
1022fn effective_schema(store: &Store, type_: &str) -> Option<Schema> {
1023    store.config.schemas.get(type_).cloned()
1024}
1025
1026/// Validate a file's frontmatter against a schema's [`FieldSpec`]s.
1027fn check_schema(
1028    store: &Store,
1029    rel: &Path,
1030    fm: &BTreeMap<String, Value>,
1031    fm_yaml: &str,
1032    schema: &Schema,
1033    issues: &mut Vec<Issue>,
1034) {
1035    for spec in &schema.fields {
1036        let present = fm.get(&spec.name);
1037        let line = fm_key_line(fm_yaml, &spec.name);
1038
1039        // Required. "Empty" means: the key is absent, or its value carries no
1040        // content — a YAML `null` (`name:`), an empty list (`name: []`), an
1041        // empty mapping (`name: {}`), or a blank/whitespace-only scalar
1042        // (`name: ""`). `scalar_string` returns `None` for null/list/mapping, so
1043        // a bare `.unwrap_or(false)` wrongly treated those as non-empty and let
1044        // a required field with a null or empty-collection value pass silently;
1045        // route them through `is_empty_value` instead.
1046        let is_empty = match present {
1047            None => true,
1048            Some(v) => is_empty_value(v),
1049        };
1050        if spec.required && is_empty {
1051            push(
1052                issues,
1053                Severity::Error,
1054                codes::SCHEMA_MISSING_REQUIRED,
1055                rel,
1056                // Absent key → anchor to the frontmatter top (line 1); a
1057                // present-but-empty value keeps its own line.
1058                fm_key_line_or_top(fm_yaml, &spec.name),
1059                Some(spec.name.clone()),
1060                format!("required field `{}` is absent or empty", spec.name),
1061                Some(format!("set `{}` to a non-empty value", spec.name)),
1062                vec![],
1063            );
1064            continue;
1065        }
1066        let Some(value) = present else { continue };
1067
1068        // An OPTIONAL field that is `null` or empty is simply unset — there is
1069        // no value to shape/enum/link-check. (The required+empty case already
1070        // returned above as `SCHEMA_MISSING_REQUIRED`.) Without this, an
1071        // `paid_at: null` on an `invoice` whose schema marks `paid_at (date)`
1072        // would wrongly fire `SCHEMA_SHAPE_MISMATCH` against the empty string.
1073        let value_empty = value.is_null()
1074            || scalar_string(value)
1075                .map(|s| s.trim().is_empty())
1076                .unwrap_or(false);
1077        if !spec.required && value_empty {
1078            continue;
1079        }
1080
1081        // link to <prefix>/ — extract the link target(s) from the raw frontmatter
1082        // text (unquoted `[[...]]` is a YAML nested-sequence, not a string).
1083        if let Some(prefix) = &spec.link_prefix {
1084            check_schema_link(store, rel, &spec.name, fm_yaml, prefix, line, issues);
1085            continue; // a link field is never also shape/enum-checked
1086        }
1087
1088        // A shape- or enum-constrained field expects a SCALAR. A YAML sequence
1089        // or mapping satisfies neither, and would otherwise slip through both
1090        // checks (`scalar_string` returns `None` for non-scalars, so the enum
1091        // and shape bodies silently no-op). Flag it as a shape mismatch rather
1092        // than let a structurally-wrong value validate clean. (Link fields,
1093        // which legitimately take block-form sequences, already `continue`d.)
1094        if (spec.shape.is_some() || spec.enum_values.is_some()) && scalar_string(value).is_none() {
1095            push(
1096                issues,
1097                Severity::Error,
1098                codes::SCHEMA_SHAPE_MISMATCH,
1099                rel,
1100                line,
1101                Some(spec.name.clone()),
1102                format!(
1103                    "`{}` must be a scalar value, found a list or mapping",
1104                    spec.name
1105                ),
1106                Some(format!("set `{}` to a single scalar value", spec.name)),
1107                vec![],
1108            );
1109            continue;
1110        }
1111
1112        // enum
1113        if let Some(allowed) = &spec.enum_values {
1114            if let Some(s) = scalar_string(value) {
1115                if !allowed.iter().any(|a| a == &s) {
1116                    push(
1117                        issues,
1118                        Severity::Error,
1119                        codes::SCHEMA_ENUM_VIOLATION,
1120                        rel,
1121                        line,
1122                        Some(spec.name.clone()),
1123                        format!("`{}` value {s:?} not in enum {allowed:?}", spec.name),
1124                        Some(format!("use one of: {}", allowed.join(", "))),
1125                        vec![],
1126                    );
1127                }
1128            }
1129            continue;
1130        }
1131
1132        // shape
1133        if let Some(shape) = spec.shape {
1134            check_schema_shape(rel, &spec.name, value, shape, line, issues);
1135        }
1136    }
1137}
1138
1139/// `link to <prefix>/` enforcement: the value must be a wiki-link whose target
1140/// starts with `<prefix>`. Reads the link target(s) from the raw frontmatter
1141/// text so unquoted `field: [[...]]` (a YAML nested-sequence, not a string) is
1142/// recognized exactly like the quoted form.
1143fn check_schema_link(
1144    store: &Store,
1145    rel: &Path,
1146    field: &str,
1147    fm_yaml: &str,
1148    prefix: &Path,
1149    line: Option<u32>,
1150    issues: &mut Vec<Issue>,
1151) {
1152    let prefix_str = prefix.to_string_lossy();
1153    let prefix_str = prefix_str.trim_end_matches('/');
1154    let suggestion = |target_leaf: &str| {
1155        Some(format!(
1156            "expected `link to {prefix_str}/`; replace with [[{prefix_str}/{target_leaf}]]"
1157        ))
1158    };
1159
1160    let links = frontmatter_links_for_key(fm_yaml, field, 2);
1161    if links.is_empty() {
1162        // No wiki-link in the field's value → it's a plain string.
1163        let raw = frontmatter_raw_value_for_key(fm_yaml, field, 2).unwrap_or_default();
1164        let raw = raw.trim().trim_matches('"').trim_matches('\'').trim();
1165        let leaf = slugish(raw);
1166        push(
1167            issues,
1168            Severity::Error,
1169            codes::SCHEMA_LINK_PREFIX_MISMATCH,
1170            rel,
1171            line,
1172            Some(field.to_string()),
1173            format!(
1174                "`{field}` is a plain string {raw:?}, expected a wiki-link under `{prefix_str}/`"
1175            ),
1176            suggestion(&leaf),
1177            vec![],
1178        );
1179        return;
1180    }
1181
1182    for link in links {
1183        if link.target.ends_with(".md") {
1184            let bare = link.target.trim_end_matches(".md");
1185            push(
1186                issues,
1187                Severity::Warning,
1188                codes::WIKI_LINK_HAS_EXTENSION,
1189                rel,
1190                Some(link.line),
1191                Some(field.to_string()),
1192                format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
1193                Some(format!("drop the extension: [[{bare}]]")),
1194                vec![],
1195            );
1196        }
1197        let bare = link.target.trim_end_matches(".md");
1198        if !path_under_prefix(bare, prefix_str) {
1199            let leaf = bare.rsplit('/').next().unwrap_or(bare);
1200            push(
1201                issues,
1202                Severity::Error,
1203                codes::SCHEMA_LINK_PREFIX_MISMATCH,
1204                rel,
1205                line,
1206                Some(field.to_string()),
1207                format!("`{field}` target `{bare}` is not under `{prefix_str}/`"),
1208                suggestion(leaf),
1209                vec![],
1210            );
1211        } else {
1212            // Correct prefix — still surface a broken target so the agent sees
1213            // one consistent vocabulary. Resolve like the graph engine (literal
1214            // path first, then `.md`) so a `link to sources/` field pointing at a
1215            // raw `.eml`/`.pdf` source isn't wrongly flagged broken.
1216            match resolve_wiki_target(store, bare) {
1217                TargetResolution::Exists => {}
1218                TargetResolution::Missing => push(
1219                    issues,
1220                    Severity::Error,
1221                    codes::WIKI_LINK_BROKEN,
1222                    rel,
1223                    line,
1224                    Some(field.to_string()),
1225                    format!("wiki-link target `{bare}` doesn't exist"),
1226                    Some(format!(
1227                        "create `{bare}.md`, or point the link at an existing file"
1228                    )),
1229                    vec![],
1230                ),
1231                TargetResolution::Unsafe => push(
1232                    issues,
1233                    Severity::Error,
1234                    codes::WIKI_LINK_BROKEN,
1235                    rel,
1236                    line,
1237                    Some(field.to_string()),
1238                    format!("wiki-link target `{bare}` is not a safe store-relative path"),
1239                    Some("use a full store-relative path under sources/ or records/".into()),
1240                    vec![],
1241                ),
1242            }
1243        }
1244    }
1245}
1246
1247/// Shape enforcement for a non-link, non-enum schema field.
1248fn check_schema_shape(
1249    rel: &Path,
1250    field: &str,
1251    value: &Value,
1252    shape: Shape,
1253    line: Option<u32>,
1254    issues: &mut Vec<Issue>,
1255) {
1256    let s = scalar_string(value).unwrap_or_default();
1257    let ok = match shape {
1258        Shape::String => true, // any scalar string
1259        Shape::Int => value.is_i64() || value.is_u64() || s.trim().parse::<i64>().is_ok(),
1260        Shape::Bool => value.is_bool() || matches!(s.trim(), "true" | "false"),
1261        Shape::Date => is_iso8601_date_or_datetime(&s),
1262        Shape::Email => is_email(&s),
1263        Shape::Currency => is_currency(&s),
1264        Shape::Url => is_url(&s),
1265    };
1266    if !ok {
1267        push(
1268            issues,
1269            Severity::Error,
1270            codes::SCHEMA_SHAPE_MISMATCH,
1271            rel,
1272            line,
1273            Some(field.to_string()),
1274            format!("`{field}` value {s:?} doesn't match shape {shape:?}"),
1275            Some(shape_suggestion(shape)),
1276            vec![],
1277        );
1278    }
1279}
1280
1281// ─────────────────────────────────────────────────────────────────────────────
1282//  Cross-file: entity-dedup collisions (validate_all only)
1283// ─────────────────────────────────────────────────────────────────────────────
1284
1285/// Hard `DUP_ID` + the soft, schema-declared `DUP_UNIQUE_KEY` collisions.
1286///
1287/// `DUP_ID` is universal (two files with the same explicit `id`).
1288/// `DUP_UNIQUE_KEY` is driven entirely by the store's `DB.md ## Schemas`: each
1289/// `- unique: <field>[, <field> …]` directive on a `### <type>` declares a
1290/// uniqueness constraint, and two records of that type whose declared values
1291/// collide warn. No type carries a built-in dedup key — the store opts in.
1292///
1293/// **Reporting precedence (rule #1 in `corpus-b-edges/EXPECTED/README.md`):** a
1294/// collision group of N files yields exactly ONE issue, not N. Its `file` is the
1295/// lexicographically smallest store-relative path in the group (a total order →
1296/// deterministic); `related` is the rest, sorted. A single-field key anchors to
1297/// that field's line on the reported file and carries it as `key`; a multi-field
1298/// key anchors to line 1 with a null key.
1299fn check_duplicates(store: &Store, parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
1300    // Path → frontmatter YAML, for resolving the anchor field's line on the
1301    // reported (smallest-path) member.
1302    let fm_yaml_of: HashMap<&PathBuf, &str> = parsed
1303        .iter()
1304        .map(|(rel, p)| (rel, p.fm_yaml.as_str()))
1305        .collect();
1306
1307    // ── DUP_ID (hard error): two files with the same explicit `id`. ──────────
1308    let mut by_id: HashMap<String, Vec<PathBuf>> = HashMap::new();
1309    for (rel, p) in parsed {
1310        if let Some(map) = &p.fm {
1311            if let Some(id) = map.get("id").and_then(scalar_string) {
1312                if !id.trim().is_empty() {
1313                    by_id.entry(id).or_default().push(rel.clone());
1314                }
1315            }
1316        }
1317    }
1318    for (id, files) in &by_id {
1319        if files.len() > 1 {
1320            let (reported, related) = canonical_and_related(files);
1321            let line = fm_yaml_of.get(&reported).and_then(|y| fm_key_line(y, "id"));
1322            push(
1323                issues,
1324                Severity::Error,
1325                codes::DUP_ID,
1326                &reported,
1327                line,
1328                Some("id".into()),
1329                format!("id {id:?} is declared by more than one file"),
1330                Some("give each file a unique `id` (or drop it to derive from the path)".into()),
1331                related,
1332            );
1333        }
1334    }
1335
1336    // ── DUP_UNIQUE_KEY (warning): schema-declared `unique:` collisions. ───────
1337    // Every constraint comes from the store's `## Schemas`; a type with no
1338    // `unique:` directive is never dedup-checked. Iteration over the BTreeMap is
1339    // key-ordered, so emitted issues are deterministic across runs.
1340    for (type_name, schema) in &store.config.schemas {
1341        for key_fields in &schema.unique_keys {
1342            soft_dup(parsed, issues, type_name, key_fields, &fm_yaml_of);
1343        }
1344    }
1345}
1346
1347/// Emit ONE `DUP_UNIQUE_KEY` warning per group of ≥2 files of `type_` whose
1348/// declared `key_fields` render to the same token tuple. Files missing any key
1349/// field are skipped — an incomplete key is never a collision.
1350///
1351/// Per reporting rule #1 the issue is keyed on the lexicographically smallest
1352/// store-relative path; `related` is the rest. A single-field key anchors to
1353/// that field's line on the reported file and carries it as `key`; a multi-field
1354/// key anchors to line 1 with a null key. `fm_yaml_of` resolves the field line.
1355fn soft_dup(
1356    parsed: &[(PathBuf, Parsed)],
1357    issues: &mut Vec<Issue>,
1358    type_: &str,
1359    key_fields: &[String],
1360    fm_yaml_of: &HashMap<&PathBuf, &str>,
1361) {
1362    if key_fields.is_empty() {
1363        return;
1364    }
1365    let mut groups: HashMap<Vec<String>, Vec<PathBuf>> = HashMap::new();
1366    for (rel, p) in parsed {
1367        let is_type =
1368            p.fm.as_ref()
1369                .and_then(|m| m.get("type"))
1370                .and_then(scalar_string)
1371                .map(|t| t == type_)
1372                .unwrap_or(false);
1373        if !is_type {
1374            continue;
1375        }
1376        if let Some(key) = dedup_key(p, key_fields) {
1377            groups.entry(key).or_default().push(rel.clone());
1378        }
1379    }
1380    // HashMap iteration is nondeterministic; sort by reported member so the
1381    // emitted issue order is stable across runs.
1382    let mut collisions: Vec<(PathBuf, Vec<PathBuf>)> = groups
1383        .values()
1384        .filter(|files| files.len() > 1)
1385        .map(|files| canonical_and_related(files))
1386        .collect();
1387    collisions.sort_by(|a, b| a.0.cmp(&b.0));
1388
1389    let fields_disp = key_fields.join(", ");
1390    for (reported, related) in collisions {
1391        // Single-field keys anchor to the field's line + carry the key; multi-
1392        // field keys anchor to line 1 with a null key.
1393        let (line, key) = if key_fields.len() == 1 {
1394            (
1395                fm_yaml_of
1396                    .get(&reported)
1397                    .and_then(|y| fm_key_line(y, &key_fields[0])),
1398                Some(key_fields[0].clone()),
1399            )
1400        } else {
1401            (Some(1), None)
1402        };
1403        let n = related.len();
1404        push(
1405            issues,
1406            Severity::Warning,
1407            codes::DUP_UNIQUE_KEY,
1408            &reported,
1409            line,
1410            key,
1411            format!("`{type_}` unique key ({fields_disp}) collides with {n} other record(s)"),
1412            Some("merge with `dbmd rename`, or cross-link with `dbmd link`".into()),
1413            related,
1414        );
1415    }
1416}
1417
1418/// Render a type's `unique:` key for one file: each field's dedup token in
1419/// order, or `None` if any field is absent/empty (an incomplete key never
1420/// collides).
1421fn dedup_key(p: &Parsed, key_fields: &[String]) -> Option<Vec<String>> {
1422    let mut out = Vec::with_capacity(key_fields.len());
1423    for f in key_fields {
1424        out.push(dedup_token(p, f)?);
1425    }
1426    Some(out)
1427}
1428
1429/// One field's normalized dedup token, or `None` when absent/empty. Wiki-link
1430/// values (single or block-sequence list) reduce to their lower-cased target
1431/// path(s); a list collapses to a sorted, de-duplicated set so item order never
1432/// matters. Plain scalars (and YAML scalar lists) lower-case and trim.
1433fn dedup_token(p: &Parsed, field: &str) -> Option<String> {
1434    // Wiki-links first — read from the raw frontmatter text so the unquoted
1435    // `field: [[...]]` (a YAML nested-sequence, not a string) is handled.
1436    let links = frontmatter_links_for_key(&p.fm_yaml, field, 2);
1437    if !links.is_empty() {
1438        let set: BTreeSet<String> = links
1439            .into_iter()
1440            .map(|l| l.target.trim_end_matches(".md").to_lowercase())
1441            .filter(|t| !t.is_empty())
1442            .collect();
1443        return if set.is_empty() {
1444            None
1445        } else {
1446            Some(set.into_iter().collect::<Vec<_>>().join(","))
1447        };
1448    }
1449    match p.fm.as_ref()?.get(field) {
1450        Some(Value::Sequence(items)) => {
1451            let set: BTreeSet<String> = items
1452                .iter()
1453                .filter_map(scalar_string)
1454                .map(|s| s.trim().to_lowercase())
1455                .filter(|t| !t.is_empty())
1456                .collect();
1457            if set.is_empty() {
1458                None
1459            } else {
1460                Some(set.into_iter().collect::<Vec<_>>().join(","))
1461            }
1462        }
1463        Some(v) => {
1464            let s = scalar_string(v)?.trim().to_lowercase();
1465            if s.is_empty() {
1466                None
1467            } else {
1468                Some(s)
1469            }
1470        }
1471        None => None,
1472    }
1473}
1474
1475/// Split a non-empty collision group into `(reported, related)`: the
1476/// lexicographically smallest store-relative path is the reported member; the
1477/// rest, sorted ascending, are `related`. Deterministic because store-relative
1478/// path is a total order — the property reporting rule #1 relies on.
1479fn canonical_and_related(files: &[PathBuf]) -> (PathBuf, Vec<PathBuf>) {
1480    let mut sorted = files.to_vec();
1481    sorted.sort();
1482    let reported = sorted[0].clone();
1483    let related = sorted[1..].to_vec();
1484    (reported, related)
1485}
1486
1487// ─────────────────────────────────────────────────────────────────────────────
1488//  Cross-file: hierarchical index.md + index.jsonl sync (validate_all only)
1489// ─────────────────────────────────────────────────────────────────────────────
1490
1491/// All `INDEX_*` and `INDEX_JSONL_*` checks across the three canonical levels.
1492fn check_indexes(store: &Store, files: &[PathBuf], issues: &mut Vec<Issue>) {
1493    // Group content files by their immediate parent folder (the type-folder,
1494    // *across date shards* — a sharded file's "type folder" is the folder right
1495    // under the layer). We key on the type-folder so shards roll up correctly.
1496    let mut type_folders: BTreeMap<PathBuf, Vec<PathBuf>> = BTreeMap::new();
1497    for rel in files {
1498        if let Some(tf) = type_folder_of(rel) {
1499            type_folders.entry(tf).or_default().push(rel.clone());
1500        }
1501    }
1502
1503    // Layers that actually contain a type-folder. The index WRITER creates a
1504    // layer/root `index.md` ONLY when a type-folder exists to roll up:
1505    // `Index::build_root`/`build_layer` populate `child_counts` from type-folders
1506    // alone, and `rebuild_all`/`write_level` remove the `index.md` when that map
1507    // is empty. A layer with ONLY loose files therefore has NO `index.md` — its
1508    // loose records live in the layer's own `index.jsonl` (checked in the loose
1509    // block below). Gating the `index.md` requirement on type-folder presence
1510    // (not on "any content file") keeps `validate --all` in parity with
1511    // `dbmd index rebuild`: requiring an `index.md` for a loose-only layer would
1512    // demand an artifact the canonical rebuild never creates, permanently
1513    // wedging the sweep on a correct store.
1514    let mut layers_with_type_folders: BTreeSet<&'static str> = BTreeSet::new();
1515    for tf in type_folders.keys() {
1516        match tf.iter().next().and_then(|s| s.to_str()) {
1517            Some("sources") => {
1518                layers_with_type_folders.insert("sources");
1519            }
1520            Some("records") => {
1521                layers_with_type_folders.insert("records");
1522            }
1523            _ => {}
1524        }
1525    }
1526
1527    // ── Root index.md ──── (only when a type-folder exists to roll up) ──────────
1528    if !type_folders.is_empty() {
1529        let root_index = store.root.join("index.md");
1530        if !root_index.is_file() {
1531            push(
1532                issues,
1533                Severity::Error,
1534                codes::INDEX_MISSING,
1535                Path::new("index.md"),
1536                None,
1537                None,
1538                "store has files but no root `index.md`".into(),
1539                Some("run `dbmd index rebuild`".into()),
1540                vec![],
1541            );
1542        } else {
1543            check_index_scope(store, Path::new("index.md"), "root", None, issues);
1544        }
1545    }
1546
1547    // ── Layer index.md ──── (only layers that contain a type-folder) ───────────
1548    for layer in &layers_with_type_folders {
1549        let layer_index_rel = PathBuf::from(layer).join("index.md");
1550        let abs = store.root.join(&layer_index_rel);
1551        if !abs.is_file() {
1552            push(
1553                issues,
1554                Severity::Error,
1555                codes::INDEX_MISSING,
1556                &layer_index_rel,
1557                None,
1558                None,
1559                format!("layer `{layer}/` has files but no `index.md`"),
1560                Some("run `dbmd index rebuild`".into()),
1561                vec![],
1562            );
1563        } else {
1564            check_index_scope(store, &layer_index_rel, "layer", Some(layer), issues);
1565        }
1566    }
1567
1568    // ── Type-folder index.md + index.jsonl ───────────────────────────────────
1569    for (tf, members) in &type_folders {
1570        let index_md_rel = tf.join("index.md");
1571        let index_md_abs = store.root.join(&index_md_rel);
1572        let index_md_present = index_md_abs.is_file();
1573        if !index_md_present {
1574            // The whole folder index is absent → a single `INDEX_MISSING` keyed
1575            // on the FOLDER (not the would-be `index.md` path). When the index is
1576            // entirely missing we do NOT additionally evaluate per-entry
1577            // completeness or the `index.jsonl` twin: one `INDEX_MISSING` covers
1578            // the folder (precedence rule #4 in `corpus-b-edges/EXPECTED`).
1579            push(
1580                issues,
1581                Severity::Error,
1582                codes::INDEX_MISSING,
1583                tf,
1584                None,
1585                None,
1586                format!("non-empty folder `{}` has no index.md", tf.display()),
1587                Some(format!(
1588                    "run `dbmd index rebuild --folder {}`",
1589                    tf.display()
1590                )),
1591                vec![],
1592            );
1593            continue;
1594        }
1595
1596        check_index_scope(store, &index_md_rel, "type-folder", tf.to_str(), issues);
1597        check_type_folder_index_md(store, tf, &index_md_rel, members, issues);
1598
1599        // index.jsonl twin — must exist and be complete (uncapped). Only checked
1600        // when the `index.md` is present (above): a folder whose entire index is
1601        // missing is one `INDEX_MISSING`, not also an `INDEX_JSONL_MISSING`.
1602        let jsonl_rel = tf.join("index.jsonl");
1603        let jsonl_abs = store.root.join(&jsonl_rel);
1604        if !jsonl_abs.is_file() {
1605            push(
1606                issues,
1607                Severity::Error,
1608                codes::INDEX_JSONL_MISSING,
1609                &jsonl_rel,
1610                None,
1611                None,
1612                format!("type-folder `{}/` has no `index.jsonl` twin", tf.display()),
1613                Some("run `dbmd index rebuild`".into()),
1614                vec![],
1615            );
1616        } else {
1617            check_type_folder_index_jsonl(store, tf, &jsonl_rel, members, issues);
1618        }
1619    }
1620
1621    // ── Loose files: content directly at a layer root (no type-folder). ──────
1622    // They are catalogued in the layer's own `index.jsonl` (the layer `index.md`
1623    // stays a type-folder rollup), so structured reads — `query`, dedup, `graph`
1624    // — see them the same way they see canonical files. Require that sidecar and
1625    // sync-check it, so a loose file is never silently absent from the catalog.
1626    // Only genuinely-loose files land here: `type_folder_of` already grouped
1627    // every file two-or-more levels under a layer into its type-folder above.
1628    let mut loose_by_layer: BTreeMap<PathBuf, Vec<PathBuf>> = BTreeMap::new();
1629    for rel in files {
1630        if !is_content_file(rel) || type_folder_of(rel).is_some() {
1631            continue;
1632        }
1633        if let Some(layer_dir) = loose_layer_dir(rel) {
1634            loose_by_layer
1635                .entry(layer_dir)
1636                .or_default()
1637                .push(rel.clone());
1638        }
1639    }
1640    for (layer_dir, members) in &loose_by_layer {
1641        let jsonl_rel = layer_dir.join("index.jsonl");
1642        if !store.root.join(&jsonl_rel).is_file() {
1643            push(
1644                issues,
1645                Severity::Error,
1646                codes::INDEX_JSONL_MISSING,
1647                &jsonl_rel,
1648                None,
1649                None,
1650                format!(
1651                    "loose files at `{}/` are not catalogued — the layer has no `index.jsonl`",
1652                    layer_dir.display()
1653                ),
1654                Some("run `dbmd index rebuild`".into()),
1655                members.clone(),
1656            );
1657        } else {
1658            // `check_type_folder_index_jsonl` ignores its `tf` arg (`let _ = tf`)
1659            // and only checks jsonl-vs-files-vs-frontmatter — exactly the layer
1660            // sidecar's contract, so it is reused verbatim.
1661            check_type_folder_index_jsonl(store, layer_dir, &jsonl_rel, members, issues);
1662        }
1663    }
1664
1665    // ── Orphan index.md: an index file in a folder with no content. ──────────
1666    for rel in walk_index_files(&store.root) {
1667        let parent = rel.parent().unwrap_or(Path::new("")).to_path_buf();
1668        let parent_str = parent.to_string_lossy().to_string();
1669        let is_canonical = parent_str.is_empty() // root
1670            || matches!(parent_str.as_str(), "sources" | "records")
1671            || type_folders.contains_key(&parent);
1672        if !is_canonical {
1673            push(
1674                issues,
1675                Severity::Warning,
1676                codes::INDEX_ORPHAN,
1677                &rel,
1678                None,
1679                None,
1680                format!(
1681                    "`{}` sits in an empty or non-canonical folder",
1682                    rel.display()
1683                ),
1684                Some("remove it, or run `dbmd index rebuild`".into()),
1685                vec![],
1686            );
1687        }
1688    }
1689}
1690
1691/// Check a type-folder `index.md`'s entries against the folder's actual files:
1692/// stale entries (target gone), missing entries (file not listed), and
1693/// summary mismatches.
1694fn check_type_folder_index_md(
1695    store: &Store,
1696    tf: &Path,
1697    index_rel: &Path,
1698    members: &[PathBuf],
1699    issues: &mut Vec<Issue>,
1700) {
1701    let abs = store.root.join(index_rel);
1702    let Ok(text) = std::fs::read_to_string(&abs) else {
1703        return;
1704    };
1705    let entries = parse_index_entries(&text);
1706
1707    let listed: BTreeSet<PathBuf> = entries
1708        .iter()
1709        .map(|e| PathBuf::from(e.target.trim_end_matches(".md")))
1710        .collect();
1711
1712    // Stale entries + summary mismatch.
1713    for entry in &entries {
1714        let bare = entry.target.trim_end_matches(".md");
1715        // Resolve like the graph engine (literal path first, then `.md`) so an
1716        // index entry naming a raw `.eml`/`.pdf` source isn't reported stale.
1717        let target_abs = match resolved_target_abs(store, bare) {
1718            Some(abs) => abs,
1719            None => {
1720                if matches!(resolve_wiki_target(store, bare), TargetResolution::Unsafe) {
1721                    push(
1722                        issues,
1723                        Severity::Error,
1724                        codes::INDEX_STALE_ENTRY,
1725                        index_rel,
1726                        Some(entry.line),
1727                        None,
1728                        format!("index entry `[[{bare}]]` is not a safe store-relative path"),
1729                        Some("run `dbmd index rebuild`".into()),
1730                        vec![],
1731                    );
1732                } else {
1733                    push(
1734                        issues,
1735                        Severity::Error,
1736                        codes::INDEX_STALE_ENTRY,
1737                        index_rel,
1738                        Some(entry.line),
1739                        None,
1740                        format!("index entry `[[{bare}]]` points at a missing file"),
1741                        Some("run `dbmd index rebuild`".into()),
1742                        // The stale target the entry names (the file that no
1743                        // longer exists) — so the agent can locate the dangling
1744                        // reference.
1745                        vec![PathBuf::from(format!("{bare}.md"))],
1746                    );
1747                }
1748                continue;
1749            }
1750        };
1751        // Summary mismatch: the entry text must equal the file's `summary`. A
1752        // bare `- [[path]]` entry (no `— <text>`) when the file HAS a non-empty
1753        // summary is also a mismatch — the SPEC requires every type-folder index
1754        // entry to quote the file's `summary` (`- [[path]] — <summary>`), so a
1755        // missing quote can't validate clean just because there's nothing to
1756        // compare.
1757        if let Some(expected) = read_summary(&target_abs) {
1758            match &entry.summary_text {
1759                // Compare with the SAME whitespace normalization the renderer
1760                // applies when it writes the `index.md` browse line
1761                // (`format_md_entry` -> `collapse_whitespace`). `text_part` is the
1762                // already-collapsed text parsed back out of `index.md`; `expected`
1763                // is the RAW file summary. Comparing a collapsed value against a
1764                // raw one falsely flagged any valid one-line summary that carries
1765                // internal whitespace (a double space, a tab) — a permanent,
1766                // rebuild-immune INDEX_SUMMARY_MISMATCH that wedged the store, since
1767                // `index rebuild` regenerates the byte-identical collapsed line.
1768                // Normalizing both sides makes the check compare like with like.
1769                Some(text_part)
1770                    if crate::summary::collapse_whitespace(text_part)
1771                        != crate::summary::collapse_whitespace(&expected) =>
1772                {
1773                    push(
1774                        issues,
1775                        Severity::Error,
1776                        codes::INDEX_SUMMARY_MISMATCH,
1777                        index_rel,
1778                        Some(entry.line),
1779                        None,
1780                        format!("index entry for `{bare}` text doesn't match the file's `summary`"),
1781                        Some("run `dbmd index rebuild`".into()),
1782                        vec![PathBuf::from(format!("{bare}.md"))],
1783                    );
1784                }
1785                None if !expected.trim().is_empty() => {
1786                    push(
1787                        issues,
1788                        Severity::Error,
1789                        codes::INDEX_SUMMARY_MISMATCH,
1790                        index_rel,
1791                        Some(entry.line),
1792                        None,
1793                        format!("index entry for `{bare}` is missing its summary text (the file has a `summary`)"),
1794                        Some("run `dbmd index rebuild`".into()),
1795                        vec![PathBuf::from(format!("{bare}.md"))],
1796                    );
1797                }
1798                _ => {}
1799            }
1800        }
1801    }
1802
1803    // Missing entries: a member file not listed. Skip the index/log meta files.
1804    // The browse view caps at 500; only flag a missing entry when the folder is
1805    // under the cap (a capped folder legitimately omits older files).
1806    let content_members: Vec<&PathBuf> = members.iter().filter(|m| is_content_file(m)).collect();
1807    if content_members.len() <= 500 {
1808        for m in content_members {
1809            let bare = PathBuf::from(m.to_string_lossy().trim_end_matches(".md").to_string());
1810            if !listed.contains(&bare) {
1811                push(
1812                    issues,
1813                    Severity::Error,
1814                    codes::INDEX_MISSING_ENTRY,
1815                    index_rel,
1816                    None,
1817                    None,
1818                    format!(
1819                        "file `{}` is not listed in its folder's `index.md`",
1820                        m.display()
1821                    ),
1822                    Some("run `dbmd index rebuild`".into()),
1823                    vec![(*m).clone()],
1824                );
1825            }
1826        }
1827    }
1828    let _ = tf;
1829}
1830
1831/// Check a type-folder `index.jsonl` twin: it must list **every** file in the
1832/// folder (uncapped), every record must point at a real file, and each record's
1833/// fields must match the file's frontmatter.
1834fn check_type_folder_index_jsonl(
1835    store: &Store,
1836    tf: &Path,
1837    jsonl_rel: &Path,
1838    members: &[PathBuf],
1839    issues: &mut Vec<Issue>,
1840) {
1841    let abs = store.root.join(jsonl_rel);
1842    let Ok(text) = std::fs::read_to_string(&abs) else {
1843        return;
1844    };
1845
1846    // Parse records (last-write-wins by path), tolerating tombstones/blank lines.
1847    let mut records: BTreeMap<PathBuf, serde_json::Value> = BTreeMap::new();
1848    for (i, line) in text.lines().enumerate() {
1849        let line = line.trim();
1850        if line.is_empty() {
1851            continue;
1852        }
1853        let rec: serde_json::Value = match serde_json::from_str(line) {
1854            Ok(v) => v,
1855            Err(e) => {
1856                push(
1857                    issues,
1858                    Severity::Error,
1859                    codes::INDEX_JSONL_DESYNC,
1860                    jsonl_rel,
1861                    Some((i + 1) as u32),
1862                    None,
1863                    format!("`index.jsonl` line {} is not valid JSON: {e}", i + 1),
1864                    Some("run `dbmd index rebuild`".into()),
1865                    vec![],
1866                );
1867                continue;
1868            }
1869        };
1870        if let Some(path) = rec.get("path").and_then(|v| v.as_str()) {
1871            if !is_safe_store_relative_path(Path::new(path)) {
1872                push(
1873                    issues,
1874                    Severity::Error,
1875                    codes::INDEX_JSONL_DESYNC,
1876                    jsonl_rel,
1877                    Some((i + 1) as u32),
1878                    None,
1879                    format!("`index.jsonl` record path `{path}` is not a safe store-relative path"),
1880                    Some("run `dbmd index rebuild`".into()),
1881                    vec![],
1882                );
1883                continue;
1884            }
1885            records.insert(PathBuf::from(path), rec);
1886        }
1887    }
1888
1889    let member_set: BTreeSet<PathBuf> = members
1890        .iter()
1891        .filter(|m| is_content_file(m))
1892        .cloned()
1893        .collect();
1894
1895    // jsonl record → missing file = desync.
1896    for path in records.keys() {
1897        let target_abs = store.root.join(path);
1898        if !target_abs.is_file() {
1899            push(
1900                issues,
1901                Severity::Error,
1902                codes::INDEX_JSONL_DESYNC,
1903                jsonl_rel,
1904                None,
1905                None,
1906                format!(
1907                    "`index.jsonl` record points at missing file `{}`",
1908                    path.display()
1909                ),
1910                Some("run `dbmd index rebuild`".into()),
1911                vec![],
1912            );
1913        }
1914    }
1915
1916    // file not in jsonl = desync (the jsonl is the complete twin — no cap).
1917    for m in &member_set {
1918        if !records.contains_key(m) {
1919            push(
1920                issues,
1921                Severity::Error,
1922                codes::INDEX_JSONL_DESYNC,
1923                jsonl_rel,
1924                None,
1925                None,
1926                format!(
1927                    "file `{}` is missing from the complete `index.jsonl`",
1928                    m.display()
1929                ),
1930                Some("run `dbmd index rebuild`".into()),
1931                vec![m.clone()],
1932            );
1933        }
1934    }
1935
1936    // Record fields stale vs. frontmatter. SPEC § Validation defines
1937    // `INDEX_JSONL_STALE` as "an `index.jsonl` record's fields don't match the
1938    // file's frontmatter" — ANY field, not just `summary`/`type`. The query and
1939    // search paths read every field straight from these sidecars (`tags`,
1940    // `links`, `created`, `updated`, plus type-specific `email` / `domain` /
1941    // `company` / `amount` / `vendor` …), so a single field left unchecked lets
1942    // a stale value answer queries with data that exists in no `.md` file.
1943    //
1944    // Rather than re-list (and drift from) every projected key, rebuild the
1945    // record the canonical projection would write for this file
1946    // ([`IndexRecord::expected_from_file`], the same path `index rebuild` uses)
1947    // and diff the two as flat JSON maps. Every key the projection emits is
1948    // covered automatically; `path` is the join key and is skipped.
1949    for (path, rec) in &records {
1950        let target_abs = store.root.join(path);
1951        if !target_abs.is_file() {
1952            continue;
1953        }
1954        let Ok(expected) = crate::index::IndexRecord::expected_from_file(&target_abs, path.clone())
1955        else {
1956            continue; // unreadable / unparseable frontmatter is reported elsewhere
1957        };
1958        let Ok(expected_json) = serde_json::to_value(&expected) else {
1959            continue;
1960        };
1961        let (Some(have), Some(want)) = (rec.as_object(), expected_json.as_object()) else {
1962            continue;
1963        };
1964
1965        // Compare the union of keys present on either side; a key the file
1966        // projects but the sidecar omits is just as stale as a wrong value.
1967        let mut mismatched_keys: BTreeSet<&str> = BTreeSet::new();
1968        for key in have.keys().chain(want.keys()) {
1969            if key == "path" {
1970                continue;
1971            }
1972            if have.get(key) != want.get(key) {
1973                mismatched_keys.insert(key);
1974            }
1975        }
1976
1977        if !mismatched_keys.is_empty() {
1978            let keys: Vec<&str> = mismatched_keys.into_iter().collect();
1979            push(
1980                issues,
1981                Severity::Error,
1982                codes::INDEX_JSONL_STALE,
1983                jsonl_rel,
1984                None,
1985                Some(keys.join(",")),
1986                format!(
1987                    "`index.jsonl` record for `{}` is stale ({})",
1988                    path.display(),
1989                    keys.join(", ")
1990                ),
1991                Some("run `dbmd index rebuild`".into()),
1992                vec![path.clone()],
1993            );
1994        }
1995    }
1996    let _ = tf;
1997}
1998
1999/// Check an index's `scope:` frontmatter against its filesystem location.
2000fn check_index_scope(
2001    store: &Store,
2002    index_rel: &Path,
2003    expected_scope: &str,
2004    expected_folder: Option<&str>,
2005    issues: &mut Vec<Issue>,
2006) {
2007    let abs = store.root.join(index_rel);
2008    let Ok(text) = std::fs::read_to_string(&abs) else {
2009        return;
2010    };
2011    let Some((yaml, _, _)) = split_frontmatter(&text) else {
2012        return;
2013    };
2014    let Ok(Value::Mapping(map)) = serde_norway::from_str::<Value>(&yaml) else {
2015        return;
2016    };
2017    let fm = yaml_map_to_btree(&map);
2018
2019    if let Some(scope) = fm.get("scope").and_then(scalar_string) {
2020        // Accept "type-folder" and the SPEC example's looser "folder" alias.
2021        let scope_ok =
2022            scope == expected_scope || (expected_scope == "type-folder" && scope == "folder");
2023        if !scope_ok {
2024            push(
2025                issues,
2026                Severity::Warning,
2027                codes::INDEX_WRONG_SCOPE,
2028                index_rel,
2029                fm_key_line(&yaml, "scope"),
2030                Some("scope".into()),
2031                format!(
2032                    "index `scope: {scope}` doesn't match location (expected `{expected_scope}`)"
2033                ),
2034                Some(format!("set `scope: {expected_scope}`")),
2035                vec![],
2036            );
2037        }
2038    }
2039    // folder: must match for layer/type-folder indexes.
2040    if let Some(expected) = expected_folder {
2041        if let Some(folder) = fm.get("folder").and_then(scalar_string) {
2042            if folder.trim_end_matches('/') != expected.trim_end_matches('/') {
2043                push(
2044                    issues,
2045                    Severity::Warning,
2046                    codes::INDEX_WRONG_SCOPE,
2047                    index_rel,
2048                    fm_key_line(&yaml, "folder"),
2049                    Some("folder".into()),
2050                    format!("index `folder: {folder}` doesn't match location `{expected}`"),
2051                    Some(format!("set `folder: {expected}`")),
2052                    vec![],
2053                );
2054            }
2055        }
2056    }
2057}
2058
2059// ─────────────────────────────────────────────────────────────────────────────
2060//  Cross-file: log.md well-formedness + ordering (validate_all only)
2061// ─────────────────────────────────────────────────────────────────────────────
2062
2063/// `LOG_*` checks: bad timestamps, unknown kinds, out-of-order entries — across
2064/// the active `log.md` AND the rotated `log/<YYYY-MM>.md` archives.
2065///
2066/// [`Log::append`] rolls strictly-prior-month entries into `log/<YYYY-MM>.md`,
2067/// and `Log::tail`/`Log::since` deliberately read those archives back. If the
2068/// LOG_* checks read only the active file, an entry `validate --all` flagged
2069/// while it lived in `log.md` would stop being flagged the moment a newer-month
2070/// append rotated it into an archive — even though the log readers still surface
2071/// that exact entry to the curator. Scanning the archives too keeps validate and
2072/// the readers in agreement after a rotation.
2073///
2074/// Order: archives oldest-month first, then the active `log.md` last — the true
2075/// chronological timeline — so the out-of-order check threads `prev` across the
2076/// rotation boundary the same way it does within a single file.
2077fn check_log(store: &Store, issues: &mut Vec<Issue>) {
2078    let mut prev: Option<DateTime<FixedOffset>> = None;
2079    for rel in log_files_chronological(store) {
2080        check_log_file(store, &rel, &mut prev, issues);
2081    }
2082}
2083
2084/// The log files to scan, in chronological order: every `log/<YYYY-MM>.md`
2085/// archive oldest-month first, then the active `log.md` last. Missing files are
2086/// simply absent from the list.
2087fn log_files_chronological(store: &Store) -> Vec<PathBuf> {
2088    let mut files: Vec<PathBuf> = Vec::new();
2089    let archive_dir = store.root.join("log");
2090    if let Ok(entries) = std::fs::read_dir(&archive_dir) {
2091        let mut archives: Vec<PathBuf> = entries
2092            .flatten()
2093            .map(|e| e.path())
2094            .filter(|p| {
2095                p.is_file()
2096                    && p.file_name()
2097                        .and_then(|s| s.to_str())
2098                        .and_then(|n| n.strip_suffix(".md"))
2099                        .is_some_and(is_year_month_archive)
2100            })
2101            .filter_map(|p| p.strip_prefix(&store.root).ok().map(Path::to_path_buf))
2102            .collect();
2103        // `YYYY-MM` stems sort lexically == chronologically; oldest first.
2104        archives.sort();
2105        files.extend(archives);
2106    }
2107    // The active file holds the current month — newest, so it comes last.
2108    if store.root.join("log.md").is_file() {
2109        files.push(PathBuf::from("log.md"));
2110    }
2111    files
2112}
2113
2114/// Scan one log file's entry headers, threading the running `prev` timestamp so
2115/// the out-of-order check spans file (rotation) boundaries. Issues anchor to the
2116/// given store-relative path so an archived entry points at its archive file.
2117fn check_log_file(
2118    store: &Store,
2119    log_rel: &Path,
2120    prev: &mut Option<DateTime<FixedOffset>>,
2121    issues: &mut Vec<Issue>,
2122) {
2123    let abs = store.root.join(log_rel);
2124    let Ok(text) = std::fs::read_to_string(&abs) else {
2125        return;
2126    };
2127
2128    for (i, line) in text.lines().enumerate() {
2129        if !line.starts_with("## [") {
2130            continue;
2131        }
2132        let line_no = (i + 1) as u32;
2133        match parse_log_header(line) {
2134            None => push(
2135                issues,
2136                Severity::Error,
2137                codes::LOG_BAD_TIMESTAMP,
2138                log_rel,
2139                Some(line_no),
2140                None,
2141                format!("log entry header has an unparseable timestamp: {line:?}"),
2142                Some("use `## [YYYY-MM-DD HH:MM] <kind> | <object>`".into()),
2143                vec![],
2144            ),
2145            Some((ts, kind, _object)) => {
2146                if !RECOGNIZED_LOG_KINDS.contains(&kind.as_str()) {
2147                    push(
2148                        issues,
2149                        Severity::Warning,
2150                        codes::LOG_UNKNOWN_KIND,
2151                        log_rel,
2152                        Some(line_no),
2153                        None,
2154                        format!("log entry kind `{kind}` is not recognized"),
2155                        Some(format!("use one of: {}", RECOGNIZED_LOG_KINDS.join(", "))),
2156                        vec![],
2157                    );
2158                }
2159                if let Some(p) = *prev {
2160                    if ts < p {
2161                        push(
2162                            issues,
2163                            Severity::Warning,
2164                            codes::LOG_OUT_OF_ORDER,
2165                            log_rel,
2166                            Some(line_no),
2167                            None,
2168                            "log entry is older than the entry above it (possible rewrite)".into(),
2169                            Some("append corrective entries; never reorder past ones".into()),
2170                            vec![],
2171                        );
2172                    }
2173                }
2174                *prev = Some(ts);
2175            }
2176        }
2177    }
2178}
2179
2180// ─────────────────────────────────────────────────────────────────────────────
2181//  Self-contained primitives (collapse onto sibling modules once they land)
2182// ─────────────────────────────────────────────────────────────────────────────
2183
2184/// A minimal wiki-link found in a body: target, optional display, 1-based line.
2185#[derive(Debug)]
2186struct Link {
2187    target: String,
2188    line: u32,
2189}
2190
2191/// True if the store marker (`DB.md`, uppercase) is present at the root. On a
2192/// case-insensitive filesystem `db.md` would also match `DB.md`; we require the
2193/// exact-cased directory entry to be present.
2194fn store_marker_present(store: &Store) -> bool {
2195    let want = store.root.join("DB.md");
2196    if !want.is_file() {
2197        return false;
2198    }
2199    // Reject a case-folded match (`db.md`) on case-insensitive filesystems.
2200    match std::fs::read_dir(&store.root) {
2201        Ok(entries) => entries
2202            .flatten()
2203            .any(|e| e.file_name().to_str() == Some("DB.md")),
2204        Err(_) => true, // can't enumerate; trust the is_file() above
2205    }
2206}
2207
2208/// Validate the store's identity file, `DB.md`: its frontmatter `type:` must be
2209/// `db-md`, it must carry both `scope` and `owner`, and its body may contain
2210/// only the three recognized `##` sections (`Agent instructions`, `Policies`,
2211/// `Schemas`).
2212///
2213/// `DB.md` is not a content file (no `summary`), so it is checked here rather
2214/// than through `check_content_file`. The marker presence is established by the
2215/// caller (`store_marker_present`); a malformed-frontmatter `DB.md` still counts
2216/// as a store (the marker is the filename), so we report its shape rather than
2217/// `NOT_A_STORE`. Issues anchor to `DB.md` as the store-relative path.
2218fn check_db_md(store: &Store, issues: &mut Vec<Issue>) {
2219    let rel = Path::new("DB.md");
2220    let abs = store.root.join("DB.md");
2221    let Ok(text) = std::fs::read_to_string(&abs) else {
2222        return; // marker present but unreadable: nothing more to say.
2223    };
2224
2225    let Some((fm_yaml, body, fm_end_line)) = split_frontmatter(&text) else {
2226        // No frontmatter block at all → it cannot declare `type: db-md` and has
2227        // neither required field. Report the type and both missing fields,
2228        // anchored to line 1 (the would-be opening fence).
2229        push(
2230            issues,
2231            Severity::Error,
2232            codes::DB_MD_BAD_TYPE,
2233            rel,
2234            Some(1),
2235            Some("type".into()),
2236            "DB.md has no frontmatter; it must declare `type: db-md`".into(),
2237            Some("add a `---` frontmatter block with `type: db-md`".into()),
2238            vec![],
2239        );
2240        for field in ["scope", "owner"] {
2241            push(
2242                issues,
2243                Severity::Error,
2244                codes::DB_MD_MISSING_FIELD,
2245                rel,
2246                Some(1),
2247                Some(field.into()),
2248                format!("DB.md frontmatter is missing required field `{field}`"),
2249                Some(format!("add `{field}:` to the DB.md frontmatter")),
2250                vec![],
2251            );
2252        }
2253        return;
2254    };
2255
2256    // Parse the frontmatter mapping. If it doesn't parse, we can still say the
2257    // identity contract is unmet (no provable `type: db-md`, no provable fields).
2258    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
2259        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
2260        Ok(Value::Null) => Some(BTreeMap::new()),
2261        _ => None,
2262    };
2263
2264    match &fm {
2265        Some(map) => {
2266            // ── type: db-md ──────────────────────────────────────────────────
2267            let type_ = map.get("type").and_then(scalar_string);
2268            if type_.as_deref() != Some("db-md") {
2269                let (line, msg) = match &type_ {
2270                    Some(t) => (
2271                        fm_key_line(&fm_yaml, "type"),
2272                        format!("DB.md has `type: {t}`; a store's DB.md must be `type: db-md`"),
2273                    ),
2274                    None => (
2275                        Some(1),
2276                        "DB.md frontmatter has no `type:`; it must be `type: db-md`".to_string(),
2277                    ),
2278                };
2279                push(
2280                    issues,
2281                    Severity::Error,
2282                    codes::DB_MD_BAD_TYPE,
2283                    rel,
2284                    line,
2285                    Some("type".into()),
2286                    msg,
2287                    Some("set `type: db-md` in the DB.md frontmatter".into()),
2288                    vec![],
2289                );
2290            }
2291
2292            // ── required fields: scope + owner ───────────────────────────────
2293            for field in ["scope", "owner"] {
2294                let present = map
2295                    .get(field)
2296                    .and_then(scalar_string)
2297                    .map(|s| !s.trim().is_empty())
2298                    .unwrap_or(false);
2299                if !present {
2300                    push(
2301                        issues,
2302                        Severity::Error,
2303                        codes::DB_MD_MISSING_FIELD,
2304                        rel,
2305                        // A present-but-empty field anchors to its line; a fully
2306                        // absent one to the block top.
2307                        fm_key_line_or_top(&fm_yaml, field),
2308                        Some(field.into()),
2309                        format!("DB.md frontmatter is missing required field `{field}`"),
2310                        Some(format!("add `{field}:` to the DB.md frontmatter")),
2311                        vec![],
2312                    );
2313                }
2314            }
2315        }
2316        None => {
2317            // Unparseable frontmatter: the identity contract is unprovable. Emit
2318            // the type error and both field errors, anchored to the block top.
2319            push(
2320                issues,
2321                Severity::Error,
2322                codes::DB_MD_BAD_TYPE,
2323                rel,
2324                Some(1),
2325                Some("type".into()),
2326                "DB.md frontmatter isn't valid YAML; it must declare `type: db-md`".into(),
2327                Some("fix the DB.md frontmatter and set `type: db-md`".into()),
2328                vec![],
2329            );
2330            for field in ["scope", "owner"] {
2331                push(
2332                    issues,
2333                    Severity::Error,
2334                    codes::DB_MD_MISSING_FIELD,
2335                    rel,
2336                    Some(1),
2337                    Some(field.into()),
2338                    format!("DB.md frontmatter is missing required field `{field}`"),
2339                    Some(format!("add `{field}:` to the DB.md frontmatter")),
2340                    vec![],
2341                );
2342            }
2343        }
2344    }
2345
2346    // ── recognized `##` section headers only ─────────────────────────────────
2347    // The body's H2 headings must be one of the four the toolkit reads; any
2348    // other is a likely typo / misplacement (warning — the parser ignores it,
2349    // so the config is not corrupted, but the operator wrote a section that will
2350    // never be read). H3 sub-headings (Frozen pages, Ignored types, `### <type>`
2351    // schema blocks) live under their H2 and are not flagged here.
2352    //
2353    // `## Folders` is recognized: `parse_db_md` reads it into `Config.folders`
2354    // (parser.rs) and the index renders folder display names + descriptions from
2355    // it (index.rs `render_*_md_from_stats`). Flagging it `DB_MD_UNKNOWN_SECTION`
2356    // with "remove this heading" told the operator to delete a working,
2357    // round-tripped config block — destroying curator-authored rollup names. It
2358    // is a real, shipped section; SPEC.md documents it alongside the other three.
2359    for section in crate::parser::extract_sections(&body) {
2360        if section.level != 2 {
2361            continue;
2362        }
2363        let name = section.heading.trim().to_ascii_lowercase();
2364        if matches!(
2365            name.as_str(),
2366            "agent instructions" | "policies" | "schemas" | "folders"
2367        ) {
2368            continue;
2369        }
2370        // `Section::line` is 1-based within the body; the body begins at file
2371        // line `fm_end_line + 1`.
2372        let file_line = fm_end_line + section.line;
2373        push(
2374            issues,
2375            Severity::Warning,
2376            codes::DB_MD_UNKNOWN_SECTION,
2377            rel,
2378            Some(file_line),
2379            None,
2380            format!(
2381                "DB.md has an unrecognized `## {}` section",
2382                section.heading.trim()
2383            ),
2384            Some(
2385                "DB.md sections are `## Agent instructions`, `## Policies`, `## Schemas`, \
2386                 `## Folders` — remove or rename this heading"
2387                    .into(),
2388            ),
2389            vec![],
2390        );
2391    }
2392
2393    // ── `## Schemas` field-declaration lint ──────────────────────────────────
2394    // Without this, every schema misparse is silent: the operator/agent gets no
2395    // signal that DB.md is interpreting their schema differently from what they
2396    // wrote, and downstream records are validated against the degraded schema.
2397    check_db_md_schemas(store, rel, &body, fm_end_line, issues);
2398}
2399
2400/// Lint the parsed `## Schemas` field declarations: an empty field name, a
2401/// duplicate field name within a type, or an unrecognized modifier all parse
2402/// "successfully" into a degraded [`Schema`] today, so a bad declaration never
2403/// surfaces. The parsed schemas live in `store.config.schemas` (directives
2404/// already separated out); this pass reports the suspicious *field* shapes,
2405/// anchored to the `### <type>` heading line so the agent can find the block.
2406fn check_db_md_schemas(
2407    store: &Store,
2408    rel: &Path,
2409    body: &str,
2410    fm_end_line: u32,
2411    issues: &mut Vec<Issue>,
2412) {
2413    if store.config.schemas.is_empty() {
2414        return;
2415    }
2416
2417    // Map each `### <type>` heading (under `## Schemas`) to its file line, so a
2418    // per-type issue can anchor to the declaration block. `extract_sections`
2419    // returns a flat list with 1-based body lines; the body starts at file line
2420    // `fm_end_line + 1`.
2421    let mut type_line: BTreeMap<String, u32> = BTreeMap::new();
2422    let mut current_h2: Option<String> = None;
2423    for section in crate::parser::extract_sections(body) {
2424        match section.level {
2425            2 => current_h2 = Some(section.heading.trim().to_ascii_lowercase()),
2426            3 if current_h2.as_deref() == Some("schemas") => {
2427                // The H3 heading text (as written) is the type name — the same
2428                // key `parse_db_md` inserts into `config.schemas`.
2429                type_line
2430                    .entry(section.heading.trim().to_string())
2431                    .or_insert(fm_end_line + section.line);
2432            }
2433            _ => {}
2434        }
2435    }
2436
2437    for (type_name, schema) in &store.config.schemas {
2438        let line = type_line.get(type_name).copied();
2439        let mut seen: BTreeSet<String> = BTreeSet::new();
2440        for field in &schema.fields {
2441            let name = field.name.trim();
2442
2443            // Empty field name: a `- (string)` / bare `- ` bullet parses to a
2444            // nameless field that can never match a frontmatter key, so its
2445            // required/shape/enum constraints silently never apply.
2446            if name.is_empty() {
2447                push(
2448                    issues,
2449                    Severity::Warning,
2450                    codes::DB_MD_SCHEMA_FIELD,
2451                    rel,
2452                    line,
2453                    None,
2454                    format!("`### {type_name}` has a schema field bullet with no field name"),
2455                    Some(
2456                        "write each field as `- <name> (<modifiers>)`, e.g. `- email (required, email)`"
2457                            .into(),
2458                    ),
2459                    vec![],
2460                );
2461                continue;
2462            }
2463
2464            // Duplicate field name within a type: the second declaration's
2465            // constraints are interpreted independently of the first, so the
2466            // author's intent is ambiguous and likely wrong.
2467            if !seen.insert(name.to_string()) {
2468                push(
2469                    issues,
2470                    Severity::Warning,
2471                    codes::DB_MD_SCHEMA_FIELD,
2472                    rel,
2473                    line,
2474                    Some(name.to_string()),
2475                    format!("`### {type_name}` declares field `{name}` more than once"),
2476                    Some(
2477                        "remove the duplicate field bullet, or merge the modifiers onto one".into(),
2478                    ),
2479                    vec![],
2480                );
2481            }
2482
2483            // Unrecognized modifiers: the parser stashes anything outside the
2484            // known vocabulary (`required` / a shape / `link to …` / `default …`
2485            // / `enum: …`) in `unknown_modifiers`. Surface them as Info so a
2486            // typo'd modifier (`requierd`, `unqiue`) doesn't silently do nothing.
2487            for modifier in &field.unknown_modifiers {
2488                let modifier = modifier.trim();
2489                if modifier.is_empty() {
2490                    continue;
2491                }
2492                push(
2493                    issues,
2494                    Severity::Info,
2495                    codes::DB_MD_SCHEMA_FIELD,
2496                    rel,
2497                    line,
2498                    Some(name.to_string()),
2499                    format!(
2500                        "`### {type_name}` field `{name}` has an unrecognized modifier `{modifier}`"
2501                    ),
2502                    Some(
2503                        "recognized modifiers are `required`, a shape (`string`/`int`/`bool`/`date`/`email`/`currency`/`url`), `link to <prefix>/`, `default <value>`, `enum: <v1>, <v2>, …`"
2504                            .into(),
2505                    ),
2506                    vec![],
2507                );
2508            }
2509        }
2510
2511        // A `unique:` key silently skips any record missing (or leaving empty)
2512        // one of its fields — an incomplete key never collides (`dedup_key`).
2513        // So a key that names a field the schema doesn't mark `required` stops
2514        // checking exactly the records most likely to be re-entered partially
2515        // filled. Surface the gap at the declaration: every key field should
2516        // be a `required` field. (A field declared more than once counts as
2517        // required if any declaration marks it — the duplicate itself is
2518        // already flagged above.)
2519        let mut declared: BTreeMap<&str, bool> = BTreeMap::new();
2520        for f in &schema.fields {
2521            let e = declared.entry(f.name.trim()).or_insert(false);
2522            *e = *e || f.required;
2523        }
2524        let mut flagged: BTreeSet<&str> = BTreeSet::new();
2525        for key_fields in &schema.unique_keys {
2526            for field in key_fields {
2527                let name = field.trim();
2528                if name.is_empty()
2529                    || declared.get(name).copied() == Some(true)
2530                    || !flagged.insert(name)
2531                {
2532                    continue;
2533                }
2534                let message = if declared.contains_key(name) {
2535                    format!(
2536                        "`### {type_name}` `unique:` key field `{name}` is not `required` — a record missing or leaving it empty is silently skipped by the unique check"
2537                    )
2538                } else {
2539                    format!(
2540                        "`### {type_name}` `unique:` key field `{name}` is not declared in the schema, so it can never be `required` — a record missing it is silently skipped by the unique check"
2541                    )
2542                };
2543                push(
2544                    issues,
2545                    Severity::Warning,
2546                    codes::DB_MD_SCHEMA_FIELD,
2547                    rel,
2548                    line,
2549                    Some(name.to_string()),
2550                    message,
2551                    Some(format!(
2552                        "mark `{name}` `required` in `### {type_name}`, or build the `unique:` key from required fields only"
2553                    )),
2554                    vec![],
2555                );
2556            }
2557        }
2558    }
2559}
2560
2561/// The `NOT_A_STORE` issue for a root with no `DB.md`.
2562fn not_a_store_issue(store: &Store) -> Issue {
2563    Issue {
2564        severity: Severity::Error,
2565        code: codes::NOT_A_STORE,
2566        file: store.root.clone(),
2567        line: None,
2568        key: None,
2569        message: format!("{} has no DB.md; not a db.md store", store.root.display()),
2570        suggestion: Some("create a `DB.md` at the store root".into()),
2571        related: vec![],
2572    }
2573}
2574
2575/// True if a store-relative path is a content file: under `sources/` or
2576/// `records/` and not an `index.md`/`index.jsonl`/`log.md`.
2577fn is_content_file(rel: &Path) -> bool {
2578    // Defense in depth: a real content file is always a forward (Normal-only)
2579    // store-relative path. Reject any `..`/absolute/prefix component so a
2580    // malformed object slot judged only by its FIRST component (`records/../..`)
2581    // can never turn a per-file read into a store escape, even if a future caller
2582    // forgets the path-safety gate `changed_objects_since` now applies.
2583    if !is_safe_store_relative_path(rel) {
2584        return false;
2585    }
2586    let Some(first) = rel.iter().next().and_then(|s| s.to_str()) else {
2587        return false;
2588    };
2589    if !matches!(first, "sources" | "records") {
2590        return false;
2591    }
2592    let name = rel.file_name().and_then(|s| s.to_str()).unwrap_or("");
2593    // Only the derived catalog twins are meta INSIDE a layer. `DB.md` / `log.md`
2594    // are reserved meta only at the store ROOT, which the `first` layer check
2595    // above already excludes — so a content file named `log.md` / `DB.md` inside
2596    // a layer (e.g. `records/docs/log.md`) is real content, consistent with
2597    // `Store::walk`.
2598    if matches!(name, "index.md" | "index.jsonl") {
2599        return false;
2600    }
2601    name.ends_with(".md")
2602}
2603
2604/// True for the store's ROOT append-only meta files (`DB.md` / `log.md`): a
2605/// single-component store-relative path whose name is one of those two. An
2606/// in-layer `records/docs/log.md` is real content (multiple components), not a
2607/// root meta file. These reach `check_content_file` only via the working-set
2608/// incoming-linker scan; their bodies are deliberately not link-checked there
2609/// because `validate --all` doesn't link-check them either.
2610fn is_root_meta_file(rel: &Path) -> bool {
2611    let mut comps = rel.components();
2612    let Some(Component::Normal(only)) = comps.next() else {
2613        return false;
2614    };
2615    if comps.next().is_some() {
2616        return false; // has a parent dir → not a root file
2617    }
2618    matches!(only.to_str(), Some("DB.md") | Some("log.md"))
2619}
2620
2621/// True for a derived index-catalog file (`index.md` / `index.jsonl`) at any
2622/// depth. Its entries are GENERATED wiki-links to type-folder members, not
2623/// authored body links: in the working-set scope it is pulled in as an incoming
2624/// linker, but its integrity belongs to `check_indexes` under `--all` (which
2625/// reports a dangling entry as `INDEX_STALE_ENTRY`, not `WIKI_LINK_BROKEN`). So
2626/// `check_content_file` never body-link-checks it, matching `walk_content_files`
2627/// (which skips `index.md` under `--all`).
2628fn is_index_catalog_file(rel: &Path) -> bool {
2629    matches!(
2630        rel.file_name().and_then(|n| n.to_str()),
2631        Some("index.md") | Some("index.jsonl")
2632    )
2633}
2634
2635/// Split a file into `(frontmatter_yaml, body, closing_fence_line)`. The block
2636/// must start at the very first line with `---` and end at the next `---`.
2637/// Returns `None` if there's no leading frontmatter block.
2638fn split_frontmatter(text: &str) -> Option<(String, String, u32)> {
2639    // Tolerate a single leading UTF-8 BOM, matching parser/store/index (which
2640    // already strip it). Without this, a BOM-prefixed file is read as having no
2641    // frontmatter here while the catalog still indexes it — so validate would
2642    // silently skip frontmatter checks on a file the rest of the toolkit sees.
2643    let text = text.strip_prefix('\u{feff}').unwrap_or(text);
2644    let mut lines = text.lines();
2645    let first = lines.next()?;
2646    if first.trim_end() != "---" {
2647        return None;
2648    }
2649    let mut yaml = String::new();
2650    let mut close_line: Option<u32> = None;
2651    // line 1 is the opening fence; YAML starts at line 2.
2652    let mut current = 1u32;
2653    for line in lines {
2654        current += 1;
2655        if line.trim_end() == "---" {
2656            close_line = Some(current);
2657            break;
2658        }
2659        yaml.push_str(line);
2660        yaml.push('\n');
2661    }
2662    let close_line = close_line?;
2663    // Body = everything after the closing fence.
2664    let body: String = text
2665        .lines()
2666        .skip(close_line as usize)
2667        .collect::<Vec<_>>()
2668        .join("\n");
2669    Some((yaml, body, close_line))
2670}
2671
2672/// Read just the `summary` field of a file, or `None` if absent/unparseable.
2673fn read_summary(abs: &Path) -> Option<String> {
2674    let text = std::fs::read_to_string(abs).ok()?;
2675    let (yaml, _, _) = split_frontmatter(&text)?;
2676    let value: Value = serde_norway::from_str(&yaml).ok()?;
2677    if let Value::Mapping(m) = value {
2678        m.get(Value::String("summary".into()))
2679            .and_then(scalar_string)
2680    } else {
2681        None
2682    }
2683}
2684
2685/// Convert a `serde_norway` mapping into a string-keyed [`BTreeMap`], dropping
2686/// non-string keys (frontmatter keys are always strings).
2687fn yaml_map_to_btree(map: &serde_norway::Mapping) -> BTreeMap<String, Value> {
2688    let mut out = BTreeMap::new();
2689    for (k, v) in map {
2690        if let Value::String(s) = k {
2691            out.insert(s.clone(), v.clone());
2692        }
2693    }
2694    out
2695}
2696
2697/// A scalar YAML value as a string (`String`/`Number`/`Bool`); `None` for
2698/// sequences/mappings/null.
2699fn scalar_string(v: &Value) -> Option<String> {
2700    match v {
2701        Value::String(s) => Some(s.clone()),
2702        Value::Number(n) => Some(n.to_string()),
2703        Value::Bool(b) => Some(b.to_string()),
2704        _ => None,
2705    }
2706}
2707
2708/// True if a frontmatter value carries no content for a *required*-field check:
2709/// a YAML `null` (`name:`), an empty sequence (`name: []`), an empty mapping
2710/// (`name: {}`), or a blank/whitespace-only scalar (`name: ""`). A non-empty
2711/// list or mapping is NOT treated as empty here — a structurally-wrong value on
2712/// a shape/enum field is caught by the later non-scalar shape check, not by the
2713/// required-presence check.
2714fn is_empty_value(v: &Value) -> bool {
2715    match v {
2716        Value::Null => true,
2717        Value::Sequence(items) => items.is_empty(),
2718        Value::Mapping(map) => map.is_empty(),
2719        other => scalar_string(other)
2720            .map(|s| s.trim().is_empty())
2721            .unwrap_or(true),
2722    }
2723}
2724
2725/// True if `tags` is a flat YAML sequence of scalars. A mapping, a scalar, or a
2726/// sequence containing a nested sequence/mapping → false (`TAGS_MALFORMED`).
2727fn is_flat_scalar_list(v: &Value) -> bool {
2728    match v {
2729        Value::Sequence(items) => items.iter().all(|it| scalar_string(it).is_some()),
2730        _ => false,
2731    }
2732}
2733
2734/// Extract every frontmatter wiki-link, returning `(key, Link)` pairs with the
2735/// link's 1-based file line. **Text-based, by necessity:** an unquoted
2736/// `company: [[records/companies/x]]` parses in YAML as a nested *sequence*, not
2737/// a string (because `[[x]]` is YAML flow-list-in-a-list); a quoted
2738/// `"[[...]]"` parses as a string. Scanning the raw frontmatter text catches
2739/// both forms uniformly, the way the link textually appears — the doctrine view.
2740///
2741/// `fm_start_line` is the file line of the first YAML line (file line 2, since
2742/// line 1 is the opening `---`), so the returned `Link::line` is absolute.
2743fn frontmatter_link_fields_text(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, Link)> {
2744    let mut out = Vec::new();
2745    for (key, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2746        for link in links {
2747            out.push((key.clone(), link));
2748        }
2749    }
2750    out
2751}
2752
2753/// The wiki-link targets declared under a single top-level frontmatter key
2754/// (text-based; handles quoted + unquoted forms). Empty if the key is absent or
2755/// carries no `[[...]]`.
2756fn frontmatter_links_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Vec<Link> {
2757    for (k, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2758        if k == key {
2759            return links;
2760        }
2761    }
2762    Vec::new()
2763}
2764
2765/// The raw value text under a single top-level frontmatter key (the remainder of
2766/// the key line plus any indented continuation/sequence lines), trimmed. Used to
2767/// decide whether a `link to` field holds a plain string vs. a wiki-link.
2768fn frontmatter_raw_value_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Option<String> {
2769    for (k, value_text, _links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2770        if k == key {
2771            return Some(value_text);
2772        }
2773    }
2774    None
2775}
2776
2777/// Split a frontmatter YAML block into `(key, raw_value_text, wiki_links)` for
2778/// each top-level key. A top-level key is a line with no leading indentation in
2779/// `name:` form; its value spans the rest of that line plus any deeper-indented
2780/// continuation lines (block scalars, block sequences) until the next top-level
2781/// key. Wiki-links are every `[[...]]` found anywhere in that span, with their
2782/// absolute file line.
2783fn frontmatter_key_blocks(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, String, Vec<Link>)> {
2784    let mut blocks: Vec<(String, String, Vec<Link>)> = Vec::new();
2785    let mut current: Option<(String, String, Vec<Link>)> = None;
2786
2787    for (idx, raw_line) in fm_yaml.lines().enumerate() {
2788        let file_line = fm_start_line + idx as u32;
2789        let indented = raw_line.starts_with(' ') || raw_line.starts_with('\t');
2790        let trimmed = raw_line.trim();
2791
2792        // A new top-level key: no indentation, `name:` prefix, not a list dash or
2793        // comment. (Indented or dash lines belong to the current key's value.)
2794        let new_key = if !indented && !trimmed.starts_with('#') && !trimmed.starts_with('-') {
2795            top_level_key(raw_line)
2796        } else {
2797            None
2798        };
2799
2800        if let Some((key, after)) = new_key {
2801            if let Some(done) = current.take() {
2802                blocks.push(done);
2803            }
2804            let mut links = Vec::new();
2805            collect_line_links(after, file_line, &mut links);
2806            current = Some((key, after.trim().to_string(), links));
2807        } else if let Some((_k, value_text, links)) = current.as_mut() {
2808            // Continuation of the current key's value (indented or dash line).
2809            if !value_text.is_empty() {
2810                value_text.push('\n');
2811            }
2812            value_text.push_str(trimmed);
2813            collect_line_links(raw_line, file_line, links);
2814        }
2815    }
2816    if let Some(done) = current.take() {
2817        blocks.push(done);
2818    }
2819    blocks
2820}
2821
2822/// Parse a top-level frontmatter key line into `(key, value_after_colon)`.
2823/// `None` if the line isn't a `name:` mapping entry.
2824fn top_level_key(line: &str) -> Option<(String, &str)> {
2825    let (key, rest) = line.split_once(':')?;
2826    let key = key.trim();
2827    if key.is_empty()
2828        || !key
2829            .chars()
2830            .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
2831    {
2832        return None;
2833    }
2834    Some((key.to_string(), rest))
2835}
2836
2837/// Append every `[[target]]` / `[[target|display]]` found in `s` to `links`,
2838/// each tagged with `file_line`.
2839fn collect_line_links(s: &str, file_line: u32, links: &mut Vec<Link>) {
2840    let bytes = s.as_bytes();
2841    let mut i = 0;
2842    while i + 1 < bytes.len() {
2843        if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2844            if let Some(close) = s[i + 2..].find("]]") {
2845                let inner = &s[i + 2..i + 2 + close];
2846                // Guard against `[[[` (nested) double-counting: the inner must
2847                // not itself open another `[[`.
2848                let target = inner
2849                    .trim_start_matches('[')
2850                    .split('|')
2851                    .next()
2852                    .unwrap_or(inner)
2853                    .trim()
2854                    .to_string();
2855                if !target.is_empty() {
2856                    links.push(Link {
2857                        target,
2858                        line: file_line,
2859                    });
2860                }
2861                i = i + 2 + close + 2;
2862                continue;
2863            }
2864        }
2865        i += 1;
2866    }
2867}
2868
2869/// Extract every `[[...]]` wiki-link from a body, with 1-based line numbers.
2870/// Skips fenced code blocks, so example links in docs don't trip the validator.
2871///
2872/// Fence tracking matches the toolkit's parser ([`crate::parser`]'s
2873/// `extract_sections`): an open fence is `(fence char, run length)` and closes
2874/// only on a line that is the **same** fence character with a run **at least as
2875/// long**. A naive "toggle a bool on any ``` or ~~~ line" inverts the state when
2876/// a `~~~` block legally contains a ```` ``` ```` line (the standard way to
2877/// document a backtick fence) — the inner backtick line would flip `in_fence`
2878/// off and the demo `[[…]]` inside the code block would be checked as a live
2879/// link, falsely flagging a legal store.
2880fn extract_wiki_links(body: &str) -> Vec<Link> {
2881    let mut out = Vec::new();
2882    let mut fence: Option<(u8, usize)> = None;
2883    for (idx, line) in body.lines().enumerate() {
2884        let content = line.trim_end_matches('\r');
2885        if let Some(f) = fence {
2886            // Inside a fence: the only thing that matters is whether THIS line
2887            // closes it (matching char, run ≥ the opening run). Everything else
2888            // is opaque code — no link extraction.
2889            if fence_closes(content, f) {
2890                fence = None;
2891            }
2892            continue;
2893        }
2894        if let Some(opened) = fence_opens(content) {
2895            fence = Some(opened);
2896            continue;
2897        }
2898        let line_no = (idx + 1) as u32;
2899        let bytes = line.as_bytes();
2900        let mut i = 0;
2901        while i + 1 < bytes.len() {
2902            if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2903                if let Some(close) = line[i + 2..].find("]]") {
2904                    let inner = &line[i + 2..i + 2 + close];
2905                    let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
2906                    // Skip a triple-bracket `[[[…` opening: the inner content
2907                    // starts with `[`, so this is the rejected flow-form list
2908                    // mis-encoding (`[[[a]], [[b]]]`), not a real wiki-link. A
2909                    // legitimate target never starts with `[`. The frontmatter
2910                    // `WIKI_LINK_FLOW_FORM_LIST` check already owns that error;
2911                    // extracting a bogus body link here would double-report it as
2912                    // a spurious `WIKI_LINK_SHORT_FORM`.
2913                    if !target.is_empty() && !target.starts_with('[') {
2914                        out.push(Link {
2915                            target,
2916                            line: line_no,
2917                        });
2918                    }
2919                    i = i + 2 + close + 2;
2920                    continue;
2921                }
2922            }
2923            i += 1;
2924        }
2925    }
2926    out
2927}
2928
2929/// If `line` opens a fenced code block, return `(fence byte, run length)`. A
2930/// local mirror of the parser's `opening_fence` so the validator's fence
2931/// tracking matches the rest of the toolkit: a fence is ``` ``` ``` or `~~~`
2932/// (run ≥ 3) at ≤ 3 spaces of indent, and a backtick fence's info string may
2933/// not itself contain a backtick.
2934fn fence_opens(line: &str) -> Option<(u8, usize)> {
2935    let indent = line.len() - line.trim_start_matches(' ').len();
2936    if indent > 3 {
2937        return None;
2938    }
2939    let rest = &line[indent..];
2940    let byte = rest.bytes().next()?;
2941    if byte != b'`' && byte != b'~' {
2942        return None;
2943    }
2944    let run = rest.len() - rest.trim_start_matches(byte as char).len();
2945    if run < 3 {
2946        return None;
2947    }
2948    // A backtick fence's info string may not itself contain a backtick.
2949    if byte == b'`' && rest[run..].contains('`') {
2950        return None;
2951    }
2952    Some((byte, run))
2953}
2954
2955/// True if `line` closes the currently open `fence`: same char, run at least as
2956/// long, nothing but trailing whitespace after. Local mirror of the parser's
2957/// `is_closing_fence` — so an inner fence of the *other* character (a ``` ``` ```
2958/// line inside a `~~~` block) does NOT close the outer fence.
2959fn fence_closes(line: &str, fence: (u8, usize)) -> bool {
2960    let (byte, open_len) = fence;
2961    let indent = line.len() - line.trim_start_matches(' ').len();
2962    if indent > 3 {
2963        return false;
2964    }
2965    let rest = &line[indent..];
2966    let run = rest.len() - rest.trim_start_matches(byte as char).len();
2967    if run < open_len {
2968        return false;
2969    }
2970    rest[run..].trim().is_empty()
2971}
2972
2973/// Detect the frontmatter INLINE flow-form wiki-link-list mis-encoding —
2974/// `attendees: [[[a]], [[b]]]` — and return the offending keys.
2975///
2976/// **Scoped to the inline value on the key line.** The SPEC's canonical
2977/// list-of-links form is the *unquoted YAML block sequence* (`- [[a]]` per
2978/// indented line), which is explicitly correct (SPEC § Linking) and MUST NOT be
2979/// flagged — even though, parsed whole, it nests the same way the rejected
2980/// inline flow form does. So this check looks only at the value written *inline*
2981/// after the colon: if it opens a flow sequence (`[…]`) whose parsed shape is a
2982/// nested sequence (a list whose items are themselves lists — the wiki-link-list
2983/// mis-encoding), it is flagged. A key with no inline value (the block form,
2984/// whose items live on continuation lines) is never inspected here.
2985///
2986/// Parsing the inline value (rather than a literal `starts_with("[[[")` text
2987/// test) is what catches the whitespace variant `attendees: [ [[a]] ]`, which
2988/// encodes the identical nested sequence but evaded the old prefix match.
2989fn detect_flow_form_link_lists(fm_yaml: &str) -> Vec<String> {
2990    let mut out = Vec::new();
2991    for line in fm_yaml.lines() {
2992        // Top-level key lines only (no indentation, not a comment or list dash).
2993        if line.starts_with(' ') || line.starts_with('\t') {
2994            continue;
2995        }
2996        let Some((key, rest)) = line.split_once(':') else {
2997            continue;
2998        };
2999        let key = key.trim();
3000        if key.is_empty()
3001            || key.starts_with('#')
3002            || key.starts_with('-')
3003            || !key
3004                .chars()
3005                .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
3006        {
3007            continue;
3008        }
3009        let rest = rest.trim();
3010        // Only an inline flow sequence (`[…]`) on the key line is a candidate;
3011        // the unquoted block form has an empty inline value and is never flagged.
3012        if !rest.starts_with('[') {
3013            continue;
3014        }
3015        // Parse just the inline value and test its shape: a list whose items are
3016        // themselves lists is the wiki-link-list mis-encoding (`[[[a]]]` parses
3017        // to `Seq[Seq[Seq[String]]]`; the scalar inline link `[[a]]` is only
3018        // `Seq[Seq[String]]` and is NOT flagged).
3019        if let Ok(Value::Sequence(items)) = serde_norway::from_str::<Value>(rest) {
3020            let nested = items.iter().any(|item| match item {
3021                Value::Sequence(inner) => inner.iter().any(|x| matches!(x, Value::Sequence(_))),
3022                _ => false,
3023            });
3024            if nested {
3025                out.push(key.to_string());
3026            }
3027        }
3028    }
3029    out
3030}
3031
3032/// True if a bare target (no `.md`) is a full store-relative path: it contains a
3033/// `/` and its first segment is a known layer.
3034fn is_full_store_path(bare: &str) -> bool {
3035    let mut parts = bare.splitn(2, '/');
3036    let first = parts.next().unwrap_or("");
3037    let has_rest = parts.next().map(|r| !r.is_empty()).unwrap_or(false);
3038    matches!(first, "sources" | "records") && has_rest
3039}
3040
3041/// True if a path contains only normal relative components. Validator inputs
3042/// come from user-authored markdown/JSON sidecars; never let absolute paths,
3043/// platform prefixes, or `..` turn a validation probe into a filesystem escape.
3044fn is_safe_store_relative_path(path: &Path) -> bool {
3045    let mut saw_component = false;
3046    for component in path.components() {
3047        match component {
3048            Component::Normal(_) => saw_component = true,
3049            Component::CurDir => {}
3050            Component::ParentDir | Component::RootDir | Component::Prefix(_) => return false,
3051        }
3052    }
3053    saw_component
3054}
3055
3056fn safe_md_target_rel(bare: &str) -> Option<PathBuf> {
3057    let path = Path::new(bare);
3058    if !is_safe_store_relative_path(path) {
3059        return None;
3060    }
3061    Some(PathBuf::from(format!("{bare}.md")))
3062}
3063
3064/// How a wiki-link / index-entry target resolves on disk.
3065enum TargetResolution {
3066    /// The target exists (either as the literal path or with a `.md` suffix).
3067    Exists,
3068    /// The target is a safe store-relative path but no file exists for it.
3069    Missing,
3070    /// The target escapes the store (absolute, `..`, prefix) — never probe it.
3071    Unsafe,
3072}
3073
3074/// Resolve a bare wiki-link / index-entry target the way the graph engine does
3075/// ([`crate::graph`]'s `resolve_existing`): try the path **as written** first
3076/// (so a link to a raw non-`.md` source file kept verbatim under `sources/` —
3077/// `[[sources/emails/x.eml]]`, `[[sources/contracts/y.pdf]]` — resolves to the
3078/// real file), then the `.md`-appended path (the common case for content
3079/// pages). Without trying the literal path first, a legal link to a raw source
3080/// file is wrongly flagged `WIKI_LINK_BROKEN` even though `graph backlinks`
3081/// resolves it.
3082fn resolve_wiki_target(store: &Store, bare: &str) -> TargetResolution {
3083    // The literal path and the `.md`-appended path share the same safety check
3084    // (`safe_md_target_rel` only differs by appending `.md`), so an unsafe bare
3085    // target is unsafe in both forms.
3086    if !is_safe_store_relative_path(Path::new(bare)) {
3087        return TargetResolution::Unsafe;
3088    }
3089    match resolved_target_abs(store, bare) {
3090        Some(_) => TargetResolution::Exists,
3091        None => TargetResolution::Missing,
3092    }
3093}
3094
3095/// The absolute on-disk path a bare wiki-link / index-entry target resolves to,
3096/// trying the literal path first, then `.md`-appended — mirroring the graph
3097/// engine. `None` when neither exists, or when the bare target escapes the store
3098/// (callers that need to distinguish unsafe from merely-missing use
3099/// [`resolve_wiki_target`]).
3100///
3101/// **Existence is EXACT-CASE, deliberately platform-independent.** A db.md store
3102/// is Git-synced across machines, so a `validate --all` that passes on the
3103/// author's box must guarantee link integrity on the box that serves the store.
3104/// Bare `Path::is_file()` honors the *host* filesystem's case sensitivity: on
3105/// case-insensitive APFS/macOS (or NTFS) a wrong-case link `[[records/x/BOB]]`
3106/// resolves to the on-disk `records/x/bob.md` and passes — but on case-sensitive
3107/// Linux that file genuinely does not exist (`WIKI_LINK_BROKEN`, per SPEC.md
3108/// § Validation: "target file doesn't exist"). To stay platform-independent we
3109/// confirm not just that *a* file exists for the candidate but that its real
3110/// on-disk casing matches the requested store-relative path character-for-
3111/// character (via [`disk_case_matches`]); a case mismatch is treated as NOT
3112/// found, so macOS reports the same broken links Linux would.
3113///
3114/// NOTE on the residual validate-vs-graph divergence on macOS: the graph engine
3115/// ([`crate::graph`]) intentionally mirrors host `is_file()` + ASCII-lowercased
3116/// keys for its internal backlink/rename bookkeeping on a *single* host, so on
3117/// case-insensitive macOS `graph backlinks` will still resolve a wrong-case link
3118/// that `validate` now flags. That divergence is by design: the graph's job is
3119/// single-host consistency; `validate`'s job is cross-platform link integrity.
3120fn resolved_target_abs(store: &Store, bare: &str) -> Option<PathBuf> {
3121    if !is_safe_store_relative_path(Path::new(bare)) {
3122        return None;
3123    }
3124    // The literal path, as written (e.g. an `.eml`/`.pdf` source file kept
3125    // verbatim under `sources/`).
3126    let literal = store.root.join(bare);
3127    if literal.is_file() && disk_case_matches(store, &literal, bare) {
3128        return Some(literal);
3129    }
3130    // The `.md`-appended path (a content page referenced without its extension).
3131    let with_md_rel = format!("{bare}.md");
3132    let with_md = store.root.join(&with_md_rel);
3133    if with_md.is_file() && disk_case_matches(store, &with_md, &with_md_rel) {
3134        return Some(with_md);
3135    }
3136    None
3137}
3138
3139/// True if `abs` (already confirmed to be an existing file under `store.root`)
3140/// has the exact on-disk casing of the requested store-relative path `requested`.
3141///
3142/// Makes wiki-link existence resolution platform-independent: on case-insensitive
3143/// filesystems (APFS/macOS, NTFS) `Path::is_file()` says yes to a wrong-case
3144/// path, so we canonicalize the candidate — which returns the *real* on-disk
3145/// casing — and compare its store-relative portion to `requested`
3146/// case-sensitively. A mismatch means the file the link actually names does not
3147/// exist on a case-sensitive host, so the caller treats it as not found.
3148///
3149/// Conservative on `canonicalize` failure: if we cannot read the real path (a
3150/// transient FS error, a symlink we cannot resolve, a root that is itself a
3151/// symlink we cannot strip), we fall back to accepting the `is_file()` result
3152/// rather than producing a spurious `WIKI_LINK_BROKEN`. This keeps the check
3153/// additive — it only ever *adds* the case-mismatch detection; it never makes a
3154/// genuinely-resolvable correct-case link fail.
3155fn disk_case_matches(store: &Store, abs: &Path, requested: &str) -> bool {
3156    let Ok(canon_abs) = abs.canonicalize() else {
3157        return true; // cannot read real casing — don't invent a broken link
3158    };
3159    // Strip the store root (also canonicalized so a symlinked root still cancels)
3160    // to get the real on-disk store-relative path, then compare to what the link
3161    // asked for. `canonicalize` on the root may itself fail (e.g. the root no
3162    // longer exists by the time we probe) — be conservative there too.
3163    let Ok(canon_root) = store.root.canonicalize() else {
3164        return true;
3165    };
3166    let Ok(disk_rel) = canon_abs.strip_prefix(&canon_root) else {
3167        // The real file lives outside the (canonical) root — e.g. reached via a
3168        // symlink in the store. Containment is already enforced by
3169        // `is_safe_store_relative_path`; here we simply cannot make a
3170        // case-comparison, so don't manufacture a broken link.
3171        return true;
3172    };
3173    // Compare store-relative paths component-by-component, case-sensitively,
3174    // independent of the host's path separator and case folding.
3175    disk_rel == Path::new(requested)
3176}
3177
3178/// True if a bare target path is under `prefix` (both `.md`-stripped).
3179fn path_under_prefix(bare: &str, prefix: &str) -> bool {
3180    let prefix = prefix.trim_end_matches('/');
3181    bare == prefix || bare.starts_with(&format!("{prefix}/"))
3182}
3183
3184/// The type-folder for a store-relative content path: `<layer>/<type-folder>`
3185/// (the folder directly under the layer; date-shards roll up to it). `None` for
3186/// files directly in a layer folder or outside the two layers.
3187fn type_folder_of(rel: &Path) -> Option<PathBuf> {
3188    let comps: Vec<&str> = rel.iter().filter_map(|s| s.to_str()).collect();
3189    if comps.len() < 3 {
3190        return None; // need layer/type-folder/file at minimum
3191    }
3192    if !matches!(comps[0], "sources" | "records") {
3193        return None;
3194    }
3195    Some(PathBuf::from(comps[0]).join(comps[1]))
3196}
3197
3198/// The layer dir a *loose* content file sits directly in (`records`/`sources`):
3199/// exactly two path components, the first a known layer. `None` for a file
3200/// inside a type-folder or outside any layer. Counterpart to the index crate's
3201/// `loose_layer_of`, kept local so `validate` needs no index internals.
3202fn loose_layer_dir(rel: &Path) -> Option<PathBuf> {
3203    let comps: Vec<&str> = rel.iter().filter_map(|s| s.to_str()).collect();
3204    if comps.len() != 2 || !matches!(comps[0], "sources" | "records") {
3205        return None;
3206    }
3207    Some(PathBuf::from(comps[0]))
3208}
3209
3210/// **SWEEP.** Walk every `.md` content file under `sources/`/`records/`,
3211/// returning store-relative paths to be parsed in full. Skips hidden dirs and
3212/// the index twin (`index.jsonl`). Used only by `validate_all`; the working-set
3213/// incoming-linker scan rides the embedded-ripgrep `Store::find_links_to_any`
3214/// (a single presence-only pass), so the loop default never walks-and-*parses*
3215/// the whole content tree.
3216///
3217/// **`log/` is NOT pruned here.** Only the *root-level* `log/` rotation archive
3218/// is reserved (`Store::is_in_log_dir` checks only the first path component);
3219/// the walk roots are the two layers, so the root archive is already out of
3220/// scope. A `log`-named folder *inside* a layer (e.g. `records/log/` — a
3221/// decision log) is real content (see `is_content_file`), so pruning every
3222/// `name == "log"` made `--all` silently skip those files — reporting fewer
3223/// errors than the default working-set scope on the same store.
3224fn walk_content_files(root: &Path) -> Vec<PathBuf> {
3225    let mut out = Vec::new();
3226    for layer in ["sources", "records"] {
3227        let base = root.join(layer);
3228        if !base.is_dir() {
3229            continue;
3230        }
3231        for entry in walkdir::WalkDir::new(&base)
3232            // Follow symlinks, matching the loop-default `md_walker`
3233            // (store.rs `follow_links(true)`): a content file that is a symlink
3234            // into the store, or that lives in a symlinked-in type-folder, is
3235            // checked by `dbmd validate` (the loop default rides `Store::walk` /
3236            // `walk_all_md`, both following symlinks). Without this the `--all`
3237            // sweep silently SKIPPED such files, so the authoritative superset
3238            // reported FEWER issues than the loop scope on the same store —
3239            // inverting the `--all`-is-the-superset contract. walkdir's loop
3240            // detection drops a symlink cycle (yields an Err that `.flatten()`
3241            // discards), so this cannot hang.
3242            .follow_links(true)
3243            .into_iter()
3244            .filter_entry(|e| {
3245                let name = e.file_name().to_str().unwrap_or("");
3246                !name.starts_with('.')
3247            })
3248            .flatten()
3249        {
3250            if !entry.file_type().is_file() {
3251                continue;
3252            }
3253            let name = entry.file_name().to_str().unwrap_or("");
3254            if name.ends_with(".md") && name != "index.md" {
3255                if let Ok(rel) = entry.path().strip_prefix(root) {
3256                    out.push(rel.to_path_buf());
3257                }
3258            }
3259        }
3260    }
3261    out.sort();
3262    out
3263}
3264
3265/// Every `index.md` under the store (root + layers + type-folders), as
3266/// store-relative paths. Used to detect orphan indexes. Like
3267/// [`walk_content_files`], a `log`-named folder *inside* a layer is real content
3268/// and its `index.md` is not pruned (only the root-level `log/` archive is
3269/// reserved, and the walk roots are the two layers, so it is already
3270/// out of scope).
3271fn walk_index_files(root: &Path) -> Vec<PathBuf> {
3272    let mut out = Vec::new();
3273    if root.join("index.md").is_file() {
3274        out.push(PathBuf::from("index.md"));
3275    }
3276    for layer in ["sources", "records"] {
3277        let base = root.join(layer);
3278        if !base.is_dir() {
3279            continue;
3280        }
3281        for entry in walkdir::WalkDir::new(&base)
3282            // Follow symlinks, matching the loop-default `md_walker`
3283            // (store.rs `follow_links(true)`): a content file that is a symlink
3284            // into the store, or that lives in a symlinked-in type-folder, is
3285            // checked by `dbmd validate` (the loop default rides `Store::walk` /
3286            // `walk_all_md`, both following symlinks). Without this the `--all`
3287            // sweep silently SKIPPED such files, so the authoritative superset
3288            // reported FEWER issues than the loop scope on the same store —
3289            // inverting the `--all`-is-the-superset contract. walkdir's loop
3290            // detection drops a symlink cycle (yields an Err that `.flatten()`
3291            // discards), so this cannot hang.
3292            .follow_links(true)
3293            .into_iter()
3294            .filter_entry(|e| {
3295                let name = e.file_name().to_str().unwrap_or("");
3296                !name.starts_with('.')
3297            })
3298            .flatten()
3299        {
3300            if entry.file_type().is_file() && entry.file_name().to_str() == Some("index.md") {
3301                if let Ok(rel) = entry.path().strip_prefix(root) {
3302                    out.push(rel.to_path_buf());
3303                }
3304            }
3305        }
3306    }
3307    out.sort();
3308    out
3309}
3310
3311/// A parsed `index.md` entry line: the wiki-link target, the optional summary
3312/// text after the `—`, and the 1-based line number.
3313struct IndexEntry {
3314    target: String,
3315    summary_text: Option<String>,
3316    line: u32,
3317}
3318
3319/// Parse the `- [[<path>]] — <summary>` entry lines of an `index.md`. Stops at a
3320/// `## More` footer (those lines aren't file entries). Root/layer entries with a
3321/// `|display` segment and a `(N)` count are parsed too — the target is the bare
3322/// path, the summary text is whatever follows the em dash.
3323fn parse_index_entries(text: &str) -> Vec<IndexEntry> {
3324    let mut out = Vec::new();
3325    let mut in_more = false;
3326    for (idx, line) in text.lines().enumerate() {
3327        let trimmed = line.trim_start();
3328        if trimmed.starts_with("## More") {
3329            in_more = true;
3330            continue;
3331        }
3332        if in_more {
3333            continue;
3334        }
3335        if !trimmed.starts_with("- ") {
3336            continue;
3337        }
3338        // Find the first `[[...]]`.
3339        let Some(open) = trimmed.find("[[") else {
3340            continue;
3341        };
3342        let Some(close_rel) = trimmed[open + 2..].find("]]") else {
3343            continue;
3344        };
3345        let inner = &trimmed[open + 2..open + 2 + close_rel];
3346        let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
3347
3348        // Summary text: whatever follows the first em dash (`—`) or ` - `.
3349        let after = &trimmed[open + 2 + close_rel + 2..];
3350        let summary_text = extract_index_entry_summary(after);
3351
3352        out.push(IndexEntry {
3353            target,
3354            summary_text,
3355            line: (idx + 1) as u32,
3356        });
3357    }
3358    out
3359}
3360
3361/// Pull the summary portion out of the text trailing an index entry's
3362/// wiki-link: drop a leading `(N files)` count, then the `—`/`-` separator, then
3363/// strip a trailing `  ·  #tag` suffix **only when it is a genuine tag block**
3364/// (so a literal `·` inside the summary text is preserved, not mistaken for the
3365/// renderer's tag separator).
3366fn extract_index_entry_summary(after: &str) -> Option<String> {
3367    let mut s = after.trim();
3368    // Drop a leading "(N ...)" count segment, if present.
3369    if s.starts_with('(') {
3370        if let Some(close) = s.find(')') {
3371            s = s[close + 1..].trim_start();
3372        }
3373    }
3374    // Require an em dash or hyphen separator before the summary.
3375    let s = if let Some(rest) = s.strip_prefix('—') {
3376        rest.trim()
3377    } else if let Some(rest) = s.strip_prefix('-') {
3378        rest.trim()
3379    } else {
3380        return None;
3381    };
3382    if s.is_empty() {
3383        return None;
3384    }
3385    // Strip a trailing tag block — but ONLY when it matches the EXACT delimiter
3386    // the renderer emits: `  ·  #tag #tag` (a *double*-spaced middot, per
3387    // `crate::index::format_md_entry`'s `format!("  ·  {tags}")`), dropped when
3388    // the file has no tags. The previous code also accepted a *single*-spaced
3389    // ` · ` separator, which collided with a legal summary whose own text ends
3390    // in a single-spaced middot-plus-hashtag tail — e.g. a tagless file with
3391    // `summary: "Standup notes · #standup"`. The renderer round-trips that
3392    // summary verbatim (no tag block, since there are no tags), but the loose
3393    // strip mistook the ` · #standup` for the renderer's tag suffix, compared
3394    // `"Standup notes"` against the file's full summary, and emitted a spurious
3395    // `INDEX_SUMMARY_MISMATCH` that `dbmd index rebuild` could never fix
3396    // (rebuild regenerates the identical line). Matching the renderer's exact
3397    // double-spaced delimiter makes the comparison round-trip. `rsplit_once`
3398    // matches from the right so only the real trailing tag block is considered.
3399    let s = match s.rsplit_once("  ·  ") {
3400        Some((summary, tags)) if is_tag_suffix(tags) => summary.trim(),
3401        _ => s,
3402    };
3403    Some(s.to_string())
3404}
3405
3406/// True if `s` is a non-empty tag block: one or more whitespace-separated tokens
3407/// each starting with `#`, the exact shape the index renderer appends after the
3408/// `·` separator (`crate::index::format_md_entry`). Used to distinguish the
3409/// renderer's `  ·  #tag` suffix from a literal `·` inside the summary text.
3410fn is_tag_suffix(s: &str) -> bool {
3411    let mut any = false;
3412    for tok in s.split_whitespace() {
3413        if !tok.starts_with('#') || tok.len() < 2 {
3414            return false;
3415        }
3416        any = true;
3417    }
3418    any
3419}
3420
3421/// Parse a `log.md` entry header `## [YYYY-MM-DD HH:MM] <kind> | <object>`.
3422/// Returns `(timestamp, kind, object)`; `None` if the timestamp is unparseable
3423/// or the header isn't well-formed.
3424fn parse_log_header(line: &str) -> Option<(DateTime<FixedOffset>, String, Option<String>)> {
3425    let rest = line.strip_prefix("## [")?;
3426    let close = rest.find(']')?;
3427    let ts_str = &rest[..close];
3428    let tail = rest[close + 1..].trim();
3429
3430    // Parse `YYYY-MM-DD HH:MM` (the SPEC header form) as a naive local time and
3431    // attach a zero offset — the log header carries minute precision, no zone.
3432    let naive = NaiveDateTime::parse_from_str(ts_str.trim(), "%Y-%m-%d %H:%M").ok()?;
3433    let offset = FixedOffset::east_opt(0)?;
3434    let ts = naive.and_local_timezone(offset).single()?;
3435
3436    // kind | object
3437    let (kind, object) = match tail.split_once('|') {
3438        Some((k, o)) => {
3439            let o = o.trim();
3440            (
3441                k.trim().to_string(),
3442                if o.is_empty() {
3443                    None
3444                } else {
3445                    Some(o.to_string())
3446                },
3447            )
3448        }
3449        None => (tail.to_string(), None),
3450    };
3451    if kind.is_empty() {
3452        return None;
3453    }
3454    Some((ts, kind, object))
3455}
3456
3457/// Every log file that holds entries for the working-set scan: the active
3458/// `log.md` plus every `log/<YYYY-MM>.md` archive. [`Log::append`] rotates
3459/// strictly-prior-month entries into the archives, so the active file alone is
3460/// NOT the full timeline — both the last `validate` cutoff and a changed-but-
3461/// unvalidated object can live in an archive after a month rollover. Reading the
3462/// archives here keeps the working-set readers in sync with the rest of the log
3463/// layer (`Log::since`/`Log::tail`), which deliberately cross archives, and
3464/// prevents `dbmd validate` from silently skipping archived changed files. Reads
3465/// only log headers, never the content store, so the loop budget is preserved.
3466fn log_files_for_working_set(store: &Store) -> Vec<PathBuf> {
3467    let mut files = vec![store.root.join("log.md")];
3468    let archive_dir = store.root.join("log");
3469    if let Ok(entries) = std::fs::read_dir(&archive_dir) {
3470        let mut archives: Vec<PathBuf> = entries
3471            .flatten()
3472            .map(|e| e.path())
3473            .filter(|p| {
3474                p.is_file()
3475                    && p.file_name()
3476                        .and_then(|s| s.to_str())
3477                        .and_then(|n| n.strip_suffix(".md"))
3478                        .is_some_and(is_year_month_archive)
3479            })
3480            .collect();
3481        // Deterministic order (oldest month first); the callers fold across all
3482        // files so order doesn't affect the result, but a stable order keeps the
3483        // scan reproducible.
3484        archives.sort();
3485        files.extend(archives);
3486    }
3487    files
3488}
3489
3490/// True if `s` looks like a `YYYY-MM` archive stem (4 digits, `-`, 2 digits) —
3491/// the `log/<YYYY-MM>.md` naming the rotation in [`crate::log`] emits.
3492fn is_year_month_archive(s: &str) -> bool {
3493    let b = s.as_bytes();
3494    b.len() == 7
3495        && b[..4].iter().all(u8::is_ascii_digit)
3496        && b[4] == b'-'
3497        && b[5..7].iter().all(u8::is_ascii_digit)
3498}
3499
3500/// The timestamp of the most recent `validate` entry across the active `log.md`
3501/// **and** the `log/<YYYY-MM>.md` archives — the default working-set cutoff.
3502/// Reads only headers; never the whole store. Archive-aware so a `validate`
3503/// entry that rotated into an archive after a month rollover still anchors the
3504/// cutoff (without this, the cutoff silently resets to `None`).
3505fn last_validate_at(store: &Store) -> Option<DateTime<FixedOffset>> {
3506    let mut latest: Option<DateTime<FixedOffset>> = None;
3507    for file in log_files_for_working_set(store) {
3508        let Ok(text) = std::fs::read_to_string(&file) else {
3509            continue;
3510        };
3511        for line in text.lines() {
3512            if !line.starts_with("## [") {
3513                continue;
3514            }
3515            if let Some((ts, kind, _)) = parse_log_header(line) {
3516                if kind == "validate" {
3517                    latest = Some(match latest {
3518                        Some(p) if p >= ts => p,
3519                        _ => ts,
3520                    });
3521                }
3522            }
3523        }
3524    }
3525    latest
3526}
3527
3528/// The set of content objects changed since `cutoff`, read from log entries
3529/// whose kind mutates a file. When `cutoff` is `None`, every mutating entry
3530/// counts (no prior validate window). Returns store-relative `.md` paths.
3531///
3532/// Scans the active `log.md` **and** every `log/<YYYY-MM>.md` archive: after a
3533/// month rollover [`Log::append`] rotates prior-month entries out of the active
3534/// file, so an object changed-but-never-validated in a prior month lives only in
3535/// an archive. Reading the archives here is what keeps `dbmd validate` from
3536/// silently skipping those files. Reads only log headers, never the content
3537/// store.
3538fn changed_objects_since(
3539    store: &Store,
3540    cutoff: Option<DateTime<FixedOffset>>,
3541) -> BTreeSet<PathBuf> {
3542    let mut out = BTreeSet::new();
3543    for file in log_files_for_working_set(store) {
3544        let Ok(text) = std::fs::read_to_string(&file) else {
3545            continue;
3546        };
3547        for line in text.lines() {
3548            if !line.starts_with("## [") {
3549                continue;
3550            }
3551            let Some((ts, kind, object)) = parse_log_header(line) else {
3552                continue;
3553            };
3554            if let Some(c) = cutoff {
3555                if ts < c {
3556                    continue;
3557                }
3558            }
3559            if !matches!(
3560                kind.as_str(),
3561                "create" | "update" | "ingest" | "rename" | "delete" | "link"
3562            ) {
3563                continue;
3564            }
3565            if let Some(obj) = object {
3566                // The object slot is a store-relative path (or a wiki-link target).
3567                let bare = obj
3568                    .trim()
3569                    .trim_start_matches("[[")
3570                    .trim_end_matches("]]")
3571                    .split('|')
3572                    .next()
3573                    .unwrap_or("")
3574                    .trim()
3575                    .trim_end_matches(".md")
3576                    .to_string();
3577                if bare.is_empty() {
3578                    continue;
3579                }
3580                // Containment: the object slot is a log-header field that can
3581                // carry a `..`/absolute/prefix path (a hand-edited or
3582                // merge-malformed log line). Route it through the same safety gate
3583                // every other disk-touching validator path uses
3584                // (`safe_md_target_rel`, which `link_target_type` already applies)
3585                // so a `records/../../leaky` object cannot make
3586                // `validate_working_set` read + frontmatter-report on a file
3587                // OUTSIDE the store root. An unsafe object is dropped from the
3588                // changed set rather than probed.
3589                if let Some(rel) = safe_md_target_rel(&bare) {
3590                    out.insert(rel);
3591                }
3592            }
3593        }
3594    }
3595    out
3596}
3597
3598/// The result of the [`derived_from_ignored_type`] policy check: the
3599/// `derived_from` target that resolves to an ignored-type record, plus that
3600/// record's type. Carries exactly what both the validate finding and the
3601/// write-time warning need to render their message.
3602#[derive(Debug, Clone, PartialEq, Eq)]
3603pub struct DerivedFromIgnored {
3604    /// The `derived_from` wiki-link target as written (bare store-relative path,
3605    /// no `.md`).
3606    pub target: String,
3607    /// The resolved `type` of that target, which is present in
3608    /// `store.config.ignored_types`.
3609    pub target_type: String,
3610}
3611
3612/// **The single authoritative `### Ignored types` derivation check.** Decides
3613/// whether a conclusion record derives from an ignored-type record: the
3614/// `meta-type` must be `conclusion`, `### Ignored types` must be non-empty, and
3615/// some `derived_from` target must resolve to a record whose `type` is in
3616/// `ignored_types`. Returns the first such target (and its type), or `None`.
3617///
3618/// Both surfaces call this so the policy lives in exactly one place:
3619/// [`check_content_file`] (read side — `dbmd validate`) feeds it the
3620/// `derived_from` targets it scanned from the raw frontmatter, and the write
3621/// surface (`dbmd write`) feeds it the targets from the composed frontmatter.
3622/// The link *extraction* differs per surface (text-scan with line numbers vs.
3623/// the parsed `Frontmatter`); the *decision* — type gate, target-type
3624/// resolution, and `ignored_types` membership — does not.
3625pub fn derived_from_ignored_type<I, S>(
3626    store: &Store,
3627    meta_type: &str,
3628    derived_from_targets: I,
3629) -> Option<DerivedFromIgnored>
3630where
3631    I: IntoIterator<Item = S>,
3632    S: AsRef<str>,
3633{
3634    if meta_type != "conclusion" || store.config.ignored_types.is_empty() {
3635        return None;
3636    }
3637    for target in derived_from_targets {
3638        let target = target.as_ref();
3639        if let Some(target_type) = link_target_type(store, target) {
3640            if store.config.ignored_types.contains(&target_type) {
3641                return Some(DerivedFromIgnored {
3642                    target: target.to_string(),
3643                    target_type,
3644                });
3645            }
3646        }
3647    }
3648    None
3649}
3650
3651/// Resolve the `type` of a wiki-link target file (bare, no `.md`), or `None`.
3652fn link_target_type(store: &Store, target: &str) -> Option<String> {
3653    let bare = target.trim_end_matches(".md");
3654    let abs = store.root.join(safe_md_target_rel(bare)?);
3655    let text = std::fs::read_to_string(&abs).ok()?;
3656    let (yaml, _, _) = split_frontmatter(&text)?;
3657    let value: Value = serde_norway::from_str(&yaml).ok()?;
3658    if let Value::Mapping(m) = value {
3659        m.get(Value::String("type".into())).and_then(scalar_string)
3660    } else {
3661        None
3662    }
3663}
3664
3665// ── Shape validators ─────────────────────────────────────────────────────────
3666
3667/// True if a string is RFC3339 / ISO-8601 with a time + zone (the
3668/// `created`/`updated` contract: `2026-05-27T08:00:00-07:00`).
3669fn is_iso8601(s: &str) -> bool {
3670    DateTime::parse_from_rfc3339(s.trim()).is_ok()
3671}
3672
3673/// True if a string is an ISO-8601 *date* (`2026-05-27`) or a full RFC3339
3674/// datetime. Type-specific date fields (`expense.date`, `contact.last_touch`)
3675/// accept the date-only form per the SPEC's worked example.
3676fn is_iso8601_date_or_datetime(s: &str) -> bool {
3677    let s = s.trim();
3678    if DateTime::parse_from_rfc3339(s).is_ok() {
3679        return true;
3680    }
3681    chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_ok()
3682}
3683
3684/// True for `<local>@<domain>` with a non-empty local part and a dotted domain.
3685/// There must be exactly one `@`: a domain that still contains an `@` after the
3686/// split (the common double-`@` typo `sarah@@acme.com`, or `a@b@c.com`) is
3687/// rejected — without this the domain `@acme.com` passed every other check.
3688fn is_email(s: &str) -> bool {
3689    let s = s.trim();
3690    let Some((local, domain)) = s.split_once('@') else {
3691        return false;
3692    };
3693    !local.is_empty()
3694        && !domain.contains('@')
3695        && domain.contains('.')
3696        && !domain.starts_with('.')
3697        && !domain.ends_with('.')
3698        && !domain.contains(' ')
3699        && !local.contains(' ')
3700}
3701
3702/// True for a currency amount: an optional symbol or 3-letter ISO code, then a
3703/// plain decimal number with optional thousands separators and ≤ 2 decimals.
3704///
3705/// The numeric part is validated by hand (not `f64::parse`) so the non-numeric
3706/// floats `f64` accepts — `inf`, `-inf`, `NaN`, and `1e3`-style exponents — are
3707/// rejected, and the ≤ 2-decimal rule is actually enforced.
3708fn is_currency(s: &str) -> bool {
3709    let mut t = s.trim();
3710    // Strip a leading currency symbol …
3711    for sym in ["$", "€", "£", "¥"] {
3712        if let Some(rest) = t.strip_prefix(sym) {
3713            t = rest.trim_start();
3714            break;
3715        }
3716    }
3717    // … or a leading 3-letter ISO-4217-ish code (`USD 100`, `EUR 9.50`). The
3718    // code must be exactly three ASCII letters and separated from the number by
3719    // whitespace, so a bare `USD` with no amount still fails.
3720    if let Some((head, rest)) = t.split_once(char::is_whitespace) {
3721        if head.len() == 3 && head.chars().all(|c| c.is_ascii_alphabetic()) {
3722            t = rest.trim_start();
3723        }
3724    }
3725
3726    let cleaned: String = t.chars().filter(|c| *c != ',').collect();
3727    is_plain_amount(cleaned.trim())
3728}
3729
3730/// True for a bare decimal amount: optional sign, ≥ 1 digit, an optional
3731/// fractional part of 1–2 digits. No exponents, no `inf`/`NaN`, no empty string.
3732fn is_plain_amount(s: &str) -> bool {
3733    let digits = s.strip_prefix(['+', '-']).unwrap_or(s);
3734    let (int_part, frac_part) = match digits.split_once('.') {
3735        Some((i, f)) => (i, Some(f)),
3736        None => (digits, None),
3737    };
3738    if int_part.is_empty() || !int_part.bytes().all(|b| b.is_ascii_digit()) {
3739        return false;
3740    }
3741    match frac_part {
3742        None => true,
3743        Some(f) => (1..=2).contains(&f.len()) && f.bytes().all(|b| b.is_ascii_digit()),
3744    }
3745}
3746
3747/// True for an http(s) URL: a recognized scheme prefix with at least one
3748/// character after it. The length guard uses the *matched* scheme's own length,
3749/// so a single-character host on the shorter `http://` scheme (`http://x`, 8
3750/// bytes — e.g. an intranet/container hostname) is accepted; a bare scheme with
3751/// nothing after it (`http://`, `https://`) is rejected.
3752fn is_url(s: &str) -> bool {
3753    let s = s.trim();
3754    for scheme in ["http://", "https://"] {
3755        if let Some(rest) = s.strip_prefix(scheme) {
3756            return !rest.is_empty();
3757        }
3758    }
3759    false
3760}
3761
3762/// A short, deterministic suggestion for a `SCHEMA_SHAPE_MISMATCH`.
3763fn shape_suggestion(shape: Shape) -> String {
3764    match shape {
3765        Shape::String => "use a scalar string".into(),
3766        Shape::Int => "use an integer".into(),
3767        Shape::Bool => "use `true` or `false`".into(),
3768        Shape::Date => "use an ISO-8601 date, e.g. 2026-05-27".into(),
3769        Shape::Email => "use a `<local>@<domain>` address".into(),
3770        Shape::Currency => "use a numeric amount, e.g. 1234.56".into(),
3771        Shape::Url => "use an http(s) URL".into(),
3772    }
3773}
3774
3775/// Suggest a full-path rewrite for a short-form wiki-link. Without the layer we
3776/// can't know the folder, so the suggestion is generic but actionable.
3777fn short_form_suggestion(bare: &str) -> Option<String> {
3778    Some(format!(
3779        "use a full store-relative path, e.g. [[records/contacts/{}]]",
3780        slugish(bare)
3781    ))
3782}
3783
3784/// A filesystem-ish leaf for a plain string (lowercase, spaces → hyphens).
3785fn slugish(s: &str) -> String {
3786    s.trim()
3787        .to_lowercase()
3788        .chars()
3789        .map(|c| if c.is_whitespace() { '-' } else { c })
3790        .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '/' || *c == '_')
3791        .collect()
3792}
3793
3794/// Cross-file asset-manifest integrity (the `--all` sweep). Text-only: it never
3795/// hashes a byte or reads an asset file's contents — byte presence and hash
3796/// correctness are `dbmd assets verify`, not `validate`, so a fresh clone with
3797/// no restored bytes still passes. Cross-checks `assets.jsonl` against every
3798/// content file's `asset`/`assets` declarations.
3799fn check_assets(store: &Store, parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
3800    use crate::assets;
3801
3802    let manifest_rel = Path::new(assets::MANIFEST_FILE);
3803    let manifest_abs = store.root.join(assets::MANIFEST_FILE);
3804
3805    // Lenient manifest read: a malformed line is reported, not fatal.
3806    let mut manifest: BTreeMap<String, assets::AssetRecord> = BTreeMap::new();
3807    if let Ok(text) = std::fs::read_to_string(&manifest_abs) {
3808        for (i, line) in text.lines().enumerate() {
3809            if line.trim().is_empty() {
3810                continue;
3811            }
3812            match serde_json::from_str::<assets::AssetRecord>(line) {
3813                Ok(rec) => {
3814                    manifest.insert(rec.path.clone(), rec);
3815                }
3816                Err(e) => push(
3817                    issues,
3818                    Severity::Error,
3819                    codes::ASSET_MANIFEST_MALFORMED,
3820                    manifest_rel,
3821                    Some((i as u32) + 1),
3822                    None,
3823                    format!("invalid {} record: {e}", assets::MANIFEST_FILE),
3824                    Some("run `dbmd assets scan` to rebuild the manifest".to_string()),
3825                    vec![],
3826                ),
3827            }
3828        }
3829    }
3830
3831    // Per-wrapper declarations: every declared asset must be in the manifest and
3832    // must not point at a markdown content file.
3833    let mut declared: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
3834    for (rel, p) in parsed {
3835        let Some(map) = &p.fm else {
3836            continue;
3837        };
3838        for decl in assets::declarations_from_yaml_map(map) {
3839            let norm = match assets::normalize_asset_path(&decl.path) {
3840                Ok(n) => n,
3841                Err(_) => continue, // a bad declared path is surfaced by `scan`, not here
3842            };
3843            declared.insert(norm.clone());
3844            let is_md = Path::new(&norm)
3845                .extension()
3846                .and_then(|e| e.to_str())
3847                .map(|e| e.eq_ignore_ascii_case("md"))
3848                .unwrap_or(false);
3849            if is_md {
3850                push(
3851                    issues,
3852                    Severity::Warning,
3853                    codes::ASSET_PATH_IS_CONTENT,
3854                    rel,
3855                    None,
3856                    Some("asset".to_string()),
3857                    format!("asset path `{norm}` points at a markdown content file"),
3858                    Some("assets are raw binaries; reference a non-markdown path".to_string()),
3859                    vec![PathBuf::from(&norm)],
3860                );
3861            }
3862            if !manifest.contains_key(&norm) {
3863                push(
3864                    issues,
3865                    Severity::Error,
3866                    codes::ASSET_UNDECLARED,
3867                    rel,
3868                    None,
3869                    Some("asset".to_string()),
3870                    format!(
3871                        "references asset `{norm}` with no record in {}",
3872                        assets::MANIFEST_FILE
3873                    ),
3874                    Some("run `dbmd assets scan` to catalog it".to_string()),
3875                    vec![PathBuf::from(&norm)],
3876                );
3877            }
3878        }
3879    }
3880
3881    // Per-record: wrapper existence + orphan detection.
3882    for (path, rec) in &manifest {
3883        for w in &rec.wrappers {
3884            if !store.root.join(w).is_file() {
3885                push(
3886                    issues,
3887                    Severity::Error,
3888                    codes::ASSET_WRAPPER_BROKEN,
3889                    Path::new(path),
3890                    None,
3891                    None,
3892                    format!("manifest record for `{path}` names a missing wrapper `{w}`"),
3893                    Some("run `dbmd assets scan` to reconcile the manifest".to_string()),
3894                    vec![PathBuf::from(w)],
3895                );
3896            }
3897        }
3898        if !declared.contains(path) {
3899            push(
3900                issues,
3901                Severity::Warning,
3902                codes::ASSET_MANIFEST_ORPHAN,
3903                Path::new(path),
3904                None,
3905                None,
3906                format!(
3907                    "`{path}` is in {} but no wrapper references it",
3908                    assets::MANIFEST_FILE
3909                ),
3910                Some("run `dbmd assets scan` to drop the orphan, or add a wrapper".to_string()),
3911                vec![],
3912            );
3913        }
3914    }
3915}
3916
3917/// Push a fully-formed [`Issue`].
3918#[allow(clippy::too_many_arguments)]
3919fn push(
3920    issues: &mut Vec<Issue>,
3921    severity: Severity,
3922    code: &'static str,
3923    file: &Path,
3924    line: Option<u32>,
3925    key: Option<String>,
3926    message: String,
3927    suggestion: Option<String>,
3928    related: Vec<PathBuf>,
3929) {
3930    issues.push(Issue {
3931        severity,
3932        code,
3933        file: file.to_path_buf(),
3934        line,
3935        key,
3936        message,
3937        suggestion,
3938        related,
3939    });
3940}
3941
3942/// 1-based line of a top-level frontmatter key inside the YAML block, offset to
3943/// the file (the YAML starts at file line 2). `None` if not found.
3944fn fm_key_line(fm_yaml: &str, key: &str) -> Option<u32> {
3945    for (i, line) in fm_yaml.lines().enumerate() {
3946        let trimmed = line.trim_start();
3947        // A top-level key line: `key:` with no leading list dash.
3948        if let Some(rest) = trimmed.strip_prefix(key) {
3949            if rest.starts_with(':') && line.starts_with(key) {
3950                // +2: file line 1 is the opening `---`, YAML line 0 → file line 2.
3951                return Some((i as u32) + 2);
3952            }
3953        }
3954    }
3955    None
3956}
3957
3958/// The line a *field-absence* issue (a required key that is missing entirely)
3959/// anchors to: the key's line when present, else line `1` — the frontmatter
3960/// block's opening `---`. A missing key has no line of its own; anchoring it to
3961/// the block top gives the agent (and the `EXPECTED` golden) a stable, non-null
3962/// line to point at instead of an unhelpful `null`.
3963fn fm_key_line_or_top(fm_yaml: &str, key: &str) -> Option<u32> {
3964    fm_key_line(fm_yaml, key).or(Some(1))
3965}
3966
3967/// A stable sort order for issues: by file, then line, then code. Keeps `--json`
3968/// output deterministic across runs.
3969fn issue_order(a: &Issue, b: &Issue) -> std::cmp::Ordering {
3970    a.file
3971        .cmp(&b.file)
3972        .then(a.line.cmp(&b.line))
3973        .then(a.code.cmp(b.code))
3974        .then(a.key.cmp(&b.key))
3975}
3976
3977// ═════════════════════════════════════════════════════════════════════════════
3978//  Tests
3979// ═════════════════════════════════════════════════════════════════════════════
3980
3981#[cfg(test)]
3982mod tests {
3983    use super::*;
3984    use crate::parser::{Config, FieldSpec};
3985    use std::fs;
3986    use tempfile::TempDir;
3987
3988    #[test]
3989    fn split_frontmatter_tolerates_leading_bom() {
3990        // Regression (finding #19 cross-module): a UTF-8 BOM before the opening
3991        // fence must not make validate treat the file as frontmatter-less while
3992        // the catalog indexes it. Pre-fix `first.trim_end() != "---"` was true
3993        // for `\u{feff}---` and the function returned None.
3994        let text = "\u{feff}---\ntype: contact\nsummary: hi\n---\nbody\n";
3995        let parsed = split_frontmatter(text);
3996        assert!(
3997            parsed.is_some(),
3998            "a leading BOM must not hide frontmatter from validate"
3999        );
4000        let (yaml, body, close_line) = parsed.unwrap();
4001        assert_eq!(yaml, "type: contact\nsummary: hi\n");
4002        assert_eq!(body, "body");
4003        assert_eq!(close_line, 4, "BOM is inline on line 1, not a new line");
4004    }
4005
4006    /// A test store builder over a real tempdir. Every helper writes real files
4007    /// so the assertions exercise real behavior, not mocks.
4008    struct Fixture {
4009        dir: TempDir,
4010        config: Config,
4011    }
4012
4013    impl Fixture {
4014        /// A fresh store with a **valid** `DB.md` (the identity contract:
4015        /// `type: db-md` + `scope` + `owner`) and the two layer dirs. A valid
4016        /// DB.md keeps `check_db_md` silent so a "clean store" fixture is truly
4017        /// clean; tests that want a broken DB.md write their own via `write`.
4018        fn new() -> Self {
4019            let dir = TempDir::new().unwrap();
4020            fs::write(
4021                dir.path().join("DB.md"),
4022                "---\ntype: db-md\nscope: company\nowner: Test\n---\n",
4023            )
4024            .unwrap();
4025            for layer in ["sources", "records"] {
4026                fs::create_dir_all(dir.path().join(layer)).unwrap();
4027            }
4028            Fixture {
4029                dir,
4030                config: Config::default(),
4031            }
4032        }
4033
4034        /// A store with no `DB.md` marker.
4035        fn bare() -> Self {
4036            let dir = TempDir::new().unwrap();
4037            Fixture {
4038                dir,
4039                config: Config::default(),
4040            }
4041        }
4042
4043        /// Write a file at a store-relative path, creating parent dirs.
4044        fn write(&self, rel: &str, contents: &str) {
4045            let abs = self.dir.path().join(rel);
4046            fs::create_dir_all(abs.parent().unwrap()).unwrap();
4047            fs::write(abs, contents).unwrap();
4048        }
4049
4050        fn store(&self) -> Store {
4051            Store {
4052                root: self.dir.path().to_path_buf(),
4053                config: self.config.clone(),
4054            }
4055        }
4056
4057        fn store_all(&self) -> Vec<Issue> {
4058            validate_all(&self.store()).unwrap()
4059        }
4060
4061        /// Write the canonical `index.md` + `index.jsonl` at every level via the
4062        /// real builder ([`crate::index::Index::rebuild_all`]) — the same
4063        /// projection a `dbmd index rebuild` produces. Use this (rather than a
4064        /// hand-typed sidecar line) whenever a test asserts a *clean* store, so
4065        /// the sidecar carries the COMPLETE per-field projection and the fixture
4066        /// can't silently drift from what the index writer emits.
4067        fn rebuild_indexes(&self) {
4068            crate::index::Index::rebuild_all(&self.store()).unwrap();
4069        }
4070    }
4071
4072    /// True if any issue has this code.
4073    fn has(issues: &[Issue], code: &str) -> bool {
4074        issues.iter().any(|i| i.code == code)
4075    }
4076
4077    /// Count issues with a code.
4078    fn count(issues: &[Issue], code: &str) -> usize {
4079        issues.iter().filter(|i| i.code == code).count()
4080    }
4081
4082    /// The first issue with a code, or panic.
4083    fn find<'a>(issues: &'a [Issue], code: &str) -> &'a Issue {
4084        issues
4085            .iter()
4086            .find(|i| i.code == code)
4087            .unwrap_or_else(|| panic!("expected an issue with code {code}; got {issues:#?}"))
4088    }
4089
4090    /// A minimal valid `contact` body for reuse.
4091    fn valid_contact(summary: &str) -> String {
4092        format!(
4093            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{summary}\"\nname: A\n---\n\n# A\n"
4094        )
4095    }
4096
4097    // ── store marker ──────────────────────────────────────────────────────────
4098
4099    #[test]
4100    fn not_a_store_when_db_md_absent() {
4101        let fx = Fixture::bare();
4102        let issues = fx.store_all();
4103        assert_eq!(issues.len(), 1, "only NOT_A_STORE expected: {issues:#?}");
4104        assert_eq!(issues[0].code, codes::NOT_A_STORE);
4105        assert!(issues[0].is_error());
4106    }
4107
4108    #[test]
4109    fn working_set_also_reports_not_a_store() {
4110        let fx = Fixture::bare();
4111        let issues = validate_working_set(&fx.store(), None).unwrap();
4112        assert!(has(&issues, codes::NOT_A_STORE));
4113    }
4114
4115    #[test]
4116    fn clean_store_has_no_issues() {
4117        let fx = Fixture::new();
4118        fx.write("records/contacts/a.md", &valid_contact("A contact"));
4119        // Build the canonical indexes (complete per-field jsonl included) the
4120        // same way `dbmd index rebuild` does, so a freshly-rebuilt store is
4121        // proven clean across every projected field, not just summary/type.
4122        fx.rebuild_indexes();
4123        let issues = fx.store_all();
4124        assert!(
4125            issues.is_empty(),
4126            "expected a clean store, got: {issues:#?}"
4127        );
4128    }
4129
4130    // ── meta-type closed enum ─────────────────────────────────────────────────
4131
4132    /// Regression (adversarial review): a NON-SCALAR `meta-type` (a YAML list or
4133    /// mapping) must be rejected with `FM_BAD_META_TYPE`, not silently slip past
4134    /// the enum check (and then get reclassified as the default `fact`). Pre-fix
4135    /// the check was gated on `and_then(scalar_string)`, which returned `None`
4136    /// for a sequence/mapping and short-circuited the whole branch.
4137    #[test]
4138    fn meta_type_enum_is_closed_for_scalars_and_non_scalars() {
4139        let fx = Fixture::new();
4140        let body = |mt: &str| {
4141            format!(
4142                "---\ntype: profile\nmeta-type: {mt}\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n"
4143            )
4144        };
4145
4146        // Valid enum members + absent (default fact) → no FM_BAD_META_TYPE.
4147        for ok in ["fact", "operational", "conclusion"] {
4148            fx.write("records/profiles/ok.md", &body(ok));
4149            let issues = validate_working_set(&fx.store(), None).unwrap();
4150            assert!(
4151                !has(&issues, codes::FM_BAD_META_TYPE),
4152                "`meta-type: {ok}` must be accepted; got {issues:#?}"
4153            );
4154        }
4155        fx.write(
4156            "records/profiles/absent.md",
4157            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n",
4158        );
4159        assert!(
4160            !has(
4161                &validate_working_set(&fx.store(), None).unwrap(),
4162                codes::FM_BAD_META_TYPE
4163            ),
4164            "an absent meta-type is the default `fact` and must be accepted"
4165        );
4166
4167        // Scalar-but-wrong, AND non-scalar (list / mapping) → FM_BAD_META_TYPE.
4168        for bad in ["xyz", "Fact", "[fact, conclusion]", "{kind: conclusion}"] {
4169            let fx2 = Fixture::new();
4170            fx2.write("records/profiles/bad.md", &body(bad));
4171            let issues = validate_working_set(&fx2.store(), None).unwrap();
4172            assert!(
4173                has(&issues, codes::FM_BAD_META_TYPE),
4174                "`meta-type: {bad}` must be rejected with FM_BAD_META_TYPE; got {issues:#?}"
4175            );
4176        }
4177    }
4178
4179    // ── DB.md structure ───────────────────────────────────────────────────────
4180
4181    /// The `Fixture::new` DB.md is valid → no `DB_MD_*` issue. This pins the
4182    /// "valid identity file is silent" half (a bug that flagged a valid DB.md
4183    /// would fail here).
4184    #[test]
4185    fn valid_db_md_emits_no_structure_issue() {
4186        let fx = Fixture::new();
4187        let issues = fx.store_all();
4188        assert!(
4189            !has(&issues, codes::DB_MD_BAD_TYPE)
4190                && !has(&issues, codes::DB_MD_MISSING_FIELD)
4191                && !has(&issues, codes::DB_MD_UNKNOWN_SECTION),
4192            "a valid DB.md (type: db-md + scope + owner, recognized sections) is silent: {issues:#?}"
4193        );
4194    }
4195
4196    /// A DB.md whose `type:` isn't `db-md` → `DB_MD_BAD_TYPE`, keyed on `type`,
4197    /// anchored to the `type:` line (file line 2). Failing to read the type, or
4198    /// accepting a non-`db-md` type, breaks this.
4199    #[test]
4200    fn db_md_wrong_type_is_error() {
4201        let fx = Fixture::new();
4202        fx.write("DB.md", "---\ntype: notes\nscope: company\nowner: T\n---\n");
4203        let issues = fx.store_all();
4204        let i = find(&issues, codes::DB_MD_BAD_TYPE);
4205        assert!(i.is_error());
4206        assert_eq!(i.file, PathBuf::from("DB.md"));
4207        assert_eq!(i.key.as_deref(), Some("type"));
4208        assert_eq!(i.line, Some(2), "anchors to the `type:` line");
4209    }
4210
4211    /// A DB.md missing `scope` and `owner` → one `DB_MD_MISSING_FIELD` per
4212    /// absent field, each keyed on its field name, anchored to the block top.
4213    #[test]
4214    fn db_md_missing_scope_and_owner_each_report() {
4215        let fx = Fixture::new();
4216        fx.write("DB.md", "---\ntype: db-md\n---\n");
4217        let issues = fx.store_all();
4218        assert_eq!(
4219            count(&issues, codes::DB_MD_MISSING_FIELD),
4220            2,
4221            "both scope and owner absent → two issues: {issues:#?}"
4222        );
4223        let keys: BTreeSet<Option<String>> = issues
4224            .iter()
4225            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
4226            .map(|i| i.key.clone())
4227            .collect();
4228        assert_eq!(
4229            keys,
4230            BTreeSet::from([Some("scope".to_string()), Some("owner".to_string())]),
4231            "one issue keyed on each missing field"
4232        );
4233        for i in issues
4234            .iter()
4235            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
4236        {
4237            assert!(i.is_error());
4238            assert_eq!(i.line, Some(1), "absent field anchors to the block top");
4239        }
4240    }
4241
4242    /// A present-but-blank required field is still missing (`DB_MD_MISSING_FIELD`),
4243    /// anchored to its own line — guarding against an "is the key textually
4244    /// present?" shortcut that would miss `owner:` with an empty value.
4245    #[test]
4246    fn db_md_blank_required_field_is_missing() {
4247        let fx = Fixture::new();
4248        fx.write(
4249            "DB.md",
4250            "---\ntype: db-md\nscope: company\nowner: \"\"\n---\n",
4251        );
4252        let issues = fx.store_all();
4253        let i = find(&issues, codes::DB_MD_MISSING_FIELD);
4254        assert_eq!(i.key.as_deref(), Some("owner"));
4255        assert_eq!(
4256            i.line,
4257            Some(4),
4258            "a present-but-empty field anchors to its line"
4259        );
4260        assert!(
4261            count(&issues, codes::DB_MD_MISSING_FIELD) == 1,
4262            "scope is present and non-empty → only owner reported"
4263        );
4264    }
4265
4266    /// An unrecognized `##` section → `DB_MD_UNKNOWN_SECTION` (warning), anchored
4267    /// to the heading's file line; the three recognized sections stay silent.
4268    #[test]
4269    fn db_md_unknown_section_is_warning() {
4270        let fx = Fixture::new();
4271        fx.write(
4272            "DB.md",
4273            // line 1 `---`, 2 type, 3 scope, 4 owner, 5 `---`, 6 blank,
4274            // 7 `## Agent instructions`, 8 blank, 9 prose, 10 blank,
4275            // 11 `## Glossary`.
4276            "---\ntype: db-md\nscope: company\nowner: T\n---\n\n## Agent instructions\n\nbe good\n\n## Glossary\n\nterms\n",
4277        );
4278        let issues = fx.store_all();
4279        let i = find(&issues, codes::DB_MD_UNKNOWN_SECTION);
4280        assert!(!i.is_error(), "unknown section is a warning, not an error");
4281        assert_eq!(i.severity, Severity::Warning);
4282        assert_eq!(
4283            i.line,
4284            Some(11),
4285            "anchors to the `## Glossary` heading line"
4286        );
4287        assert!(
4288            i.message.contains("Glossary"),
4289            "the message names the offending section: {}",
4290            i.message
4291        );
4292        // The recognized `## Agent instructions` section did NOT fire.
4293        assert_eq!(
4294            count(&issues, codes::DB_MD_UNKNOWN_SECTION),
4295            1,
4296            "only the unrecognized section is flagged: {issues:#?}"
4297        );
4298    }
4299
4300    /// A DB.md with no frontmatter at all → `DB_MD_BAD_TYPE` plus both
4301    /// `DB_MD_MISSING_FIELD`s (no provable type, no provable fields).
4302    #[test]
4303    fn db_md_no_frontmatter_reports_type_and_both_fields() {
4304        let fx = Fixture::new();
4305        fx.write("DB.md", "# just a heading, no frontmatter\n");
4306        let issues = fx.store_all();
4307        assert!(has(&issues, codes::DB_MD_BAD_TYPE));
4308        assert_eq!(count(&issues, codes::DB_MD_MISSING_FIELD), 2);
4309    }
4310
4311    // ── frontmatter ─────────────────────────────────────────────────────────
4312
4313    #[test]
4314    fn missing_type_is_error() {
4315        let fx = Fixture::new();
4316        fx.write(
4317            "records/contacts/a.md",
4318            "---\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\n# A\n",
4319        );
4320        let issues = fx.store_all();
4321        assert!(has(&issues, codes::FM_MISSING_TYPE));
4322        assert!(find(&issues, codes::FM_MISSING_TYPE).is_error());
4323    }
4324
4325    #[test]
4326    fn missing_universal_timestamps_are_errors_on_content_files() {
4327        let fx = Fixture::new();
4328        fx.write(
4329            "records/contacts/a.md",
4330            "---\ntype: contact\nsummary: x\nname: A\n---\n\n# A\n",
4331        );
4332        let issues = fx.store_all();
4333
4334        let missing_created = find(&issues, codes::FM_MISSING_CREATED);
4335        assert_eq!(missing_created.key.as_deref(), Some("created"));
4336        assert!(missing_created.is_error());
4337
4338        let missing_updated = find(&issues, codes::FM_MISSING_UPDATED);
4339        assert_eq!(missing_updated.key.as_deref(), Some("updated"));
4340        assert!(missing_updated.is_error());
4341    }
4342
4343    #[test]
4344    fn meta_files_do_not_require_universal_timestamps() {
4345        let fx = Fixture::new();
4346        let issues = fx.store_all();
4347
4348        assert!(
4349            !has(&issues, codes::FM_MISSING_CREATED),
4350            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
4351        );
4352        assert!(
4353            !has(&issues, codes::FM_MISSING_UPDATED),
4354            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
4355        );
4356    }
4357
4358    #[test]
4359    fn content_file_with_no_frontmatter_block_reports_type_and_summary() {
4360        let fx = Fixture::new();
4361        fx.write(
4362            "records/profiles/a.md",
4363            "# Just a heading\n\nNo frontmatter here.\n",
4364        );
4365        let issues = fx.store_all();
4366        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
4367        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4368    }
4369
4370    #[test]
4371    fn content_file_with_empty_frontmatter_reports_type_and_summary() {
4372        let fx = Fixture::new();
4373        fx.write("records/profiles/a.md", "---\n---\n\nbody\n");
4374        let issues = fx.store_all();
4375        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
4376        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4377    }
4378
4379    #[test]
4380    fn malformed_yaml_is_error_and_suppresses_field_checks() {
4381        let fx = Fixture::new();
4382        // A tab inside a mapping value is invalid YAML.
4383        fx.write(
4384            "records/contacts/a.md",
4385            "---\ntype: contact\n  bad: : : :\n: : nope\n---\n\nbody\n",
4386        );
4387        let issues = fx.store_all();
4388        let issue = find(&issues, codes::FM_MALFORMED_YAML);
4389        assert!(issue.is_error());
4390        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4391        // When YAML doesn't parse we don't *also* claim the summary is missing;
4392        // the agent fixes the YAML first.
4393        assert!(
4394            !has(&issues, codes::SUMMARY_MISSING),
4395            "malformed YAML should suppress SUMMARY_MISSING: {issues:#?}"
4396        );
4397    }
4398
4399    #[test]
4400    fn bad_created_timestamp_is_error() {
4401        let fx = Fixture::new();
4402        fx.write(
4403            "records/contacts/a.md",
4404            "---\ntype: contact\ncreated: not-a-date\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
4405        );
4406        let issues = fx.store_all();
4407        let issue = find(&issues, codes::FM_BAD_TIMESTAMP);
4408        assert_eq!(issue.key.as_deref(), Some("created"));
4409        assert!(issue.is_error());
4410    }
4411
4412    #[test]
4413    fn date_only_created_is_rejected_but_type_date_field_accepted() {
4414        let fx = Fixture::new();
4415        // `created` must be a full RFC3339 datetime → a date-only value is bad.
4416        // `last_touch` is a type-specific date field → date-only is fine.
4417        fx.write(
4418            "records/contacts/a.md",
4419            "---\ntype: contact\ncreated: 2026-05-22\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\nlast_touch: 2026-05-22\n---\n\n# A\n",
4420        );
4421        let issues = fx.store_all();
4422        let created_issues: Vec<_> = issues
4423            .iter()
4424            .filter(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created"))
4425            .collect();
4426        assert_eq!(
4427            created_issues.len(),
4428            1,
4429            "date-only `created` must fail: {issues:#?}"
4430        );
4431        assert!(
4432            !issues.iter().any(
4433                |i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("last_touch")
4434            ),
4435            "date-only `last_touch` is valid: {issues:#?}"
4436        );
4437    }
4438
4439    // ── summary ─────────────────────────────────────────────────────────────
4440
4441    #[test]
4442    fn summary_missing_empty_multiline_toolong() {
4443        let fx = Fixture::new();
4444        fx.write(
4445            "records/profiles/missing.md",
4446            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\n---\n\nbody\n",
4447        );
4448        fx.write(
4449            "records/profiles/empty.md",
4450            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"   \"\n---\n\nbody\n",
4451        );
4452        let long = "x".repeat(201);
4453        fx.write(
4454            "records/profiles/long.md",
4455            &format!("---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{long}\"\n---\n\nbody\n"),
4456        );
4457        let issues = fx.store_all();
4458        assert!(has(&issues, codes::SUMMARY_MISSING));
4459        assert_eq!(
4460            find(&issues, codes::SUMMARY_MISSING).file,
4461            PathBuf::from("records/profiles/missing.md")
4462        );
4463        assert!(has(&issues, codes::SUMMARY_EMPTY));
4464        assert!(has(&issues, codes::SUMMARY_TOO_LONG));
4465        assert_eq!(
4466            find(&issues, codes::SUMMARY_TOO_LONG).severity,
4467            Severity::Warning
4468        );
4469    }
4470
4471    #[test]
4472    fn summary_multiline_via_yaml_block_scalar() {
4473        let fx = Fixture::new();
4474        // A literal block scalar produces a value with a newline.
4475        fx.write(
4476            "records/profiles/a.md",
4477            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: |\n  line one\n  line two\n---\n\nbody\n",
4478        );
4479        let issues = fx.store_all();
4480        assert!(has(&issues, codes::SUMMARY_MULTILINE), "{issues:#?}");
4481    }
4482
4483    #[test]
4484    fn summary_exactly_200_chars_is_ok() {
4485        let fx = Fixture::new();
4486        let s = "y".repeat(200);
4487        fx.write(
4488            "records/profiles/a.md",
4489            &format!("---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{s}\"\n---\n\nbody\n"),
4490        );
4491        let issues = fx.store_all();
4492        assert!(
4493            !has(&issues, codes::SUMMARY_TOO_LONG),
4494            "200 is the bound, inclusive: {issues:#?}"
4495        );
4496    }
4497
4498    #[test]
4499    fn meta_files_need_no_summary() {
4500        let fx = Fixture::new();
4501        // The root/layer/type indexes + log carry no summary and must not be
4502        // flagged. (A lone DB.md store with one contact and full indexes.)
4503        fx.write("records/contacts/a.md", &valid_contact("A contact"));
4504        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n# I\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4505        fx.write(
4506            "records/index.md",
4507            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4508        );
4509        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — A contact\n");
4510        fx.write(
4511            "records/contacts/index.jsonl",
4512            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"A contact\"}\n",
4513        );
4514        fx.write("log.md", "---\ntype: log\n---\n\n# Log\n");
4515        let issues = fx.store_all();
4516        assert!(!has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4517    }
4518
4519    // ── tags ────────────────────────────────────────────────────────────────
4520
4521    #[test]
4522    fn nested_tags_warns_flat_tags_ok() {
4523        let fx = Fixture::new();
4524        fx.write(
4525            "records/contacts/nested.md",
4526            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags:\n  - good\n  - [nested, list]\n---\n\n# A\n",
4527        );
4528        fx.write(
4529            "records/contacts/flat.md",
4530            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags: [customer, vip]\n---\n\n# A\n",
4531        );
4532        let issues = fx.store_all();
4533        let tag_issues: Vec<_> = issues
4534            .iter()
4535            .filter(|i| i.code == codes::TAGS_MALFORMED)
4536            .collect();
4537        assert_eq!(
4538            tag_issues.len(),
4539            1,
4540            "only the nested-tags file should warn: {issues:#?}"
4541        );
4542        assert_eq!(
4543            tag_issues[0].file,
4544            PathBuf::from("records/contacts/nested.md")
4545        );
4546        assert_eq!(tag_issues[0].severity, Severity::Warning);
4547    }
4548
4549    // ── wiki-links ────────────────────────────────────────────────────────────
4550
4551    #[test]
4552    fn short_form_wiki_link_is_error() {
4553        let fx = Fixture::new();
4554        let mut body = valid_contact("links to a short form");
4555        body.push_str("\nSee [[sarah-chen]] for details.\n");
4556        fx.write("records/contacts/a.md", &body);
4557        let issues = fx.store_all();
4558        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
4559        assert!(issue.is_error());
4560        assert!(issue.message.contains("sarah-chen"));
4561        // A short-form link must NOT also be reported broken — fix the form first.
4562        assert!(
4563            !issues
4564                .iter()
4565                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.message.contains("sarah-chen")),
4566            "short-form should suppress broken: {issues:#?}"
4567        );
4568    }
4569
4570    #[test]
4571    fn broken_full_path_wiki_link_is_error() {
4572        let fx = Fixture::new();
4573        let mut body = valid_contact("links to a missing file");
4574        body.push_str("\nSee [[records/contacts/ghost]].\n");
4575        fx.write("records/contacts/a.md", &body);
4576        let issues = fx.store_all();
4577        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
4578        assert!(issue.is_error());
4579        assert!(issue.message.contains("records/contacts/ghost"));
4580        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4581    }
4582
4583    #[test]
4584    fn traversal_full_path_wiki_link_is_rejected_before_probe() {
4585        let fx = Fixture::new();
4586        let mut body = valid_contact("links with traversal");
4587        body.push_str("\nSee [[records/contacts/../../ghost]].\n");
4588        fx.write("records/contacts/a.md", &body);
4589        let issues = fx.store_all();
4590        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
4591        assert!(issue.message.contains("not a safe store-relative path"));
4592        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4593    }
4594
4595    #[test]
4596    fn valid_full_path_wiki_link_passes() {
4597        let fx = Fixture::new();
4598        fx.write("records/contacts/target.md", &valid_contact("target"));
4599        let mut body = valid_contact("links to target");
4600        body.push_str("\nSee [[records/contacts/target]].\n");
4601        fx.write("records/contacts/a.md", &body);
4602        let issues = fx.store_all();
4603        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4604        assert!(!has(&issues, codes::WIKI_LINK_SHORT_FORM), "{issues:#?}");
4605    }
4606
4607    #[test]
4608    fn md_extension_wiki_link_warns_and_resolves() {
4609        let fx = Fixture::new();
4610        fx.write("records/contacts/target.md", &valid_contact("target"));
4611        let mut body = valid_contact("links with extension");
4612        body.push_str("\nSee [[records/contacts/target.md]].\n");
4613        fx.write("records/contacts/a.md", &body);
4614        let issues = fx.store_all();
4615        let issue = find(&issues, codes::WIKI_LINK_HAS_EXTENSION);
4616        assert_eq!(issue.severity, Severity::Warning);
4617        assert_eq!(
4618            issue.suggestion.as_deref(),
4619            Some("drop the extension: [[records/contacts/target]]")
4620        );
4621        // The target exists once `.md` is stripped → not broken.
4622        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4623    }
4624
4625    #[test]
4626    fn wiki_links_in_code_fences_are_ignored() {
4627        let fx = Fixture::new();
4628        let mut body = valid_contact("has a fenced example");
4629        body.push_str("\n```\n[[sarah-chen]]\n```\n");
4630        fx.write("records/contacts/a.md", &body);
4631        let issues = fx.store_all();
4632        assert!(
4633            !has(&issues, codes::WIKI_LINK_SHORT_FORM),
4634            "fenced wiki-links must be ignored: {issues:#?}"
4635        );
4636    }
4637
4638    #[test]
4639    fn flow_form_link_list_in_frontmatter_is_error() {
4640        let fx = Fixture::new();
4641        fx.write(
4642            "records/meetings/m.md",
4643            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees: [[[records/contacts/a]], [[records/contacts/b]]]\n---\n\n# M\n",
4644        );
4645        let issues = fx.store_all();
4646        let issue = find(&issues, codes::WIKI_LINK_FLOW_FORM_LIST);
4647        assert!(issue.is_error());
4648        assert_eq!(issue.key.as_deref(), Some("attendees"));
4649    }
4650
4651    #[test]
4652    fn block_form_link_list_in_frontmatter_is_not_flow_form() {
4653        let fx = Fixture::new();
4654        fx.write("records/contacts/a.md", &valid_contact("a"));
4655        fx.write("records/contacts/b.md", &valid_contact("b"));
4656        fx.write(
4657            "records/meetings/m.md",
4658            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees:\n  - [[records/contacts/a]]\n  - [[records/contacts/b]]\n---\n\n# M\n",
4659        );
4660        let issues = fx.store_all();
4661        assert!(
4662            !has(&issues, codes::WIKI_LINK_FLOW_FORM_LIST),
4663            "{issues:#?}"
4664        );
4665        // Block-form link targets are still integrity-checked (both exist here).
4666        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4667    }
4668
4669    #[test]
4670    fn frontmatter_short_form_link_field_is_error() {
4671        let fx = Fixture::new();
4672        // `related` is a *custom* (non-schema) wiki-link field, so it goes
4673        // through the generic doctrine path → a short form is WIKI_LINK_SHORT_FORM.
4674        fx.write(
4675            "records/synthesis/a.md",
4676            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: \"[[sarah-chen]]\"\n---\n\n# A\n",
4677        );
4678        let issues = fx.store_all();
4679        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
4680        assert!(issue.is_error());
4681        assert_eq!(issue.key.as_deref(), Some("related"));
4682    }
4683
4684    #[test]
4685    fn unquoted_frontmatter_link_is_recognized() {
4686        // An UNQUOTED `[[...]]` parses in YAML as a nested sequence, not a
4687        // string. The validator must still see it as a wiki-link (text-based
4688        // extraction). A short-form custom field must report SHORT_FORM, and a
4689        // full-path one with a missing target must report BROKEN.
4690        let fx = Fixture::new();
4691        fx.write(
4692            "records/synthesis/short.md",
4693            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[sarah-chen]]\n---\n\n# A\n",
4694        );
4695        fx.write(
4696            "records/synthesis/broken.md",
4697            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[records/contacts/ghost]]\n---\n\n# A\n",
4698        );
4699        let issues = fx.store_all();
4700        assert!(
4701            issues.iter().any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
4702                && i.file == Path::new("records/synthesis/short.md")
4703                && i.key.as_deref() == Some("related")),
4704            "unquoted short-form frontmatter link must be caught: {issues:#?}"
4705        );
4706        assert!(
4707            issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN
4708                && i.file == Path::new("records/synthesis/broken.md")),
4709            "unquoted full-path frontmatter link to a missing file must be caught: {issues:#?}"
4710        );
4711    }
4712
4713    #[test]
4714    fn short_form_in_declared_link_field_is_prefix_mismatch_not_double_reported() {
4715        // A short-form value in a *declared* link field (a `### contact` schema
4716        // with `company link to records/companies/`) is SCHEMA_LINK_PREFIX_MISMATCH
4717        // (the target isn't under the prefix), and must NOT also be reported as a
4718        // bare WIKI_LINK_SHORT_FORM — the schema path owns that field once.
4719        let mut fx = Fixture::new();
4720        fx.config.schemas.insert(
4721            "contact".into(),
4722            Schema {
4723                fields: vec![FieldSpec {
4724                    name: "company".into(),
4725                    link_prefix: Some(PathBuf::from("records/companies")),
4726                    ..Default::default()
4727                }],
4728                ..Default::default()
4729            },
4730        );
4731        fx.write(
4732            "records/contacts/a.md",
4733            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[northstar]]\"\n---\n\n# A\n",
4734        );
4735        let issues = fx.store_all();
4736        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
4737        assert_eq!(issue.key.as_deref(), Some("company"));
4738        // The same link must NOT also be double-reported via the generic path.
4739        assert!(
4740            !issues
4741                .iter()
4742                .any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
4743                    && i.key.as_deref() == Some("company")),
4744            "schema link fields are checked once, by the schema path: {issues:#?}"
4745        );
4746    }
4747
4748    #[test]
4749    fn schema_link_field_with_md_extension_still_warns() {
4750        let mut fx = Fixture::new();
4751        fx.config.schemas.insert(
4752            "contact".into(),
4753            Schema {
4754                fields: vec![FieldSpec {
4755                    name: "company".into(),
4756                    link_prefix: Some(PathBuf::from("records/companies")),
4757                    ..Default::default()
4758                }],
4759                ..Default::default()
4760            },
4761        );
4762        fx.write(
4763            "records/companies/acme.md",
4764            "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: Acme\nname: Acme\n---\n\n# Acme\n",
4765        );
4766        fx.write(
4767            "records/contacts/a.md",
4768            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[records/companies/acme.md]]\"\n---\n\n# A\n",
4769        );
4770        let issues = fx.store_all();
4771        let issue = issues
4772            .iter()
4773            .find(|i| {
4774                i.code == codes::WIKI_LINK_HAS_EXTENSION && i.key.as_deref() == Some("company")
4775            })
4776            .unwrap_or_else(|| panic!("schema link extension warning missing: {issues:#?}"));
4777        assert_eq!(issue.severity, Severity::Warning);
4778        assert!(
4779            !issues
4780                .iter()
4781                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.key.as_deref() == Some("company")),
4782            "extensionless existence check should still find acme.md: {issues:#?}"
4783        );
4784    }
4785
4786    // ── schema: explicit DB.md schema (required / shape / enum) ───────────────
4787
4788    #[test]
4789    fn explicit_schema_required_shape_enum() {
4790        let fx = {
4791            let mut fx = Fixture::new();
4792            // contact schema: name required, email required+email shape,
4793            // status enum: active|inactive
4794            let schema = Schema {
4795                fields: vec![
4796                    FieldSpec {
4797                        name: "name".into(),
4798                        required: true,
4799                        ..Default::default()
4800                    },
4801                    FieldSpec {
4802                        name: "email".into(),
4803                        required: true,
4804                        shape: Some(Shape::Email),
4805                        ..Default::default()
4806                    },
4807                    FieldSpec {
4808                        name: "status".into(),
4809                        enum_values: Some(vec!["active".into(), "inactive".into()]),
4810                        ..Default::default()
4811                    },
4812                ],
4813                ..Default::default()
4814            };
4815            fx.config.schemas.insert("contact".into(), schema);
4816            fx
4817        };
4818        fx.write(
4819            "records/contacts/a.md",
4820            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nemail: not-an-email\nstatus: archived\n---\n\n# A\n",
4821        );
4822        let issues = fx.store_all();
4823        // name absent → MISSING_REQUIRED
4824        assert!(
4825            issues
4826                .iter()
4827                .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
4828                    && i.key.as_deref() == Some("name")),
4829            "{issues:#?}"
4830        );
4831        // email malformed → SHAPE_MISMATCH
4832        assert!(
4833            issues.iter().any(
4834                |i| i.code == codes::SCHEMA_SHAPE_MISMATCH && i.key.as_deref() == Some("email")
4835            ),
4836            "{issues:#?}"
4837        );
4838        // status archived not in enum → ENUM_VIOLATION
4839        assert!(
4840            issues
4841                .iter()
4842                .any(|i| i.code == codes::SCHEMA_ENUM_VIOLATION
4843                    && i.key.as_deref() == Some("status")),
4844            "{issues:#?}"
4845        );
4846    }
4847
4848    #[test]
4849    fn schema_without_link_field_allows_plain_value() {
4850        // A `contact` schema with no `company` link field means a plain `company`
4851        // string is fine — schema enforcement is exactly what the store declares,
4852        // nothing implicit.
4853        let mut fx = Fixture::new();
4854        fx.config.schemas.insert(
4855            "contact".into(),
4856            Schema {
4857                fields: vec![FieldSpec {
4858                    name: "name".into(),
4859                    required: true,
4860                    ..Default::default()
4861                }],
4862                ..Default::default()
4863            },
4864        );
4865        fx.write(
4866            "records/contacts/a.md",
4867            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
4868        );
4869        let issues = fx.store_all();
4870        assert!(
4871            !has(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH),
4872            "no declared link field for `company` → a plain value is fine: {issues:#?}"
4873        );
4874    }
4875
4876    #[test]
4877    fn schema_link_field_plain_value_is_prefix_mismatch() {
4878        // The surviving link-enforcement path: a declared `link to <prefix>/`
4879        // field with a plain-string value is SCHEMA_LINK_PREFIX_MISMATCH.
4880        let mut fx = Fixture::new();
4881        fx.config.schemas.insert(
4882            "contact".into(),
4883            Schema {
4884                fields: vec![FieldSpec {
4885                    name: "company".into(),
4886                    link_prefix: Some(PathBuf::from("records/companies")),
4887                    ..Default::default()
4888                }],
4889                ..Default::default()
4890            },
4891        );
4892        fx.write(
4893            "records/contacts/a.md",
4894            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
4895        );
4896        let issues = fx.store_all();
4897        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
4898        assert_eq!(issue.key.as_deref(), Some("company"));
4899        assert!(issue
4900            .suggestion
4901            .as_deref()
4902            .unwrap()
4903            .contains("records/companies/"));
4904    }
4905
4906    #[test]
4907    fn schema_shape_int_and_url_and_currency() {
4908        let mut fx = Fixture::new();
4909        fx.config.schemas.insert(
4910            "widget".into(),
4911            Schema {
4912                fields: vec![
4913                    FieldSpec {
4914                        name: "qty".into(),
4915                        shape: Some(Shape::Int),
4916                        ..Default::default()
4917                    },
4918                    FieldSpec {
4919                        name: "site".into(),
4920                        shape: Some(Shape::Url),
4921                        ..Default::default()
4922                    },
4923                    FieldSpec {
4924                        name: "price".into(),
4925                        shape: Some(Shape::Currency),
4926                        ..Default::default()
4927                    },
4928                ],
4929                ..Default::default()
4930            },
4931        );
4932        // `USD 100` is the corpus-realistic shape (an `expense.currency`-style
4933        // ISO code + amount). It must pass — it used to spuriously fail.
4934        fx.write(
4935            "records/widgets/ok.md",
4936            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nqty: 5\nsite: https://example.com\nprice: \"USD 1,234.50\"\n---\n\n# ok\n",
4937        );
4938        // `free` is non-numeric; `inf`/`NaN`/3-decimal used to slip through
4939        // because the old impl leaned on `f64::parse`. `price: inf` here guards
4940        // the under-rejection half of the finding.
4941        fx.write(
4942            "records/widgets/bad.md",
4943            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nqty: five\nsite: ftp://nope\nprice: inf\n---\n\n# bad\n",
4944        );
4945        let issues = fx.store_all();
4946        let bad_shape: Vec<_> = issues
4947            .iter()
4948            .filter(|i| {
4949                i.code == codes::SCHEMA_SHAPE_MISMATCH
4950                    && i.file == Path::new("records/widgets/bad.md")
4951            })
4952            .map(|i| i.key.clone().unwrap_or_default())
4953            .collect();
4954        assert!(bad_shape.contains(&"qty".to_string()), "{issues:#?}");
4955        assert!(bad_shape.contains(&"site".to_string()), "{issues:#?}");
4956        assert!(
4957            bad_shape.contains(&"price".to_string()),
4958            "inf must be rejected as currency: {issues:#?}"
4959        );
4960        assert!(
4961            !issues.iter().any(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH
4962                && i.file == Path::new("records/widgets/ok.md")),
4963            "valid shapes (incl. `USD 1,234.50`) must not fire: {issues:#?}"
4964        );
4965    }
4966
4967    #[test]
4968    fn schema_shape_or_enum_field_with_non_scalar_value_is_shape_mismatch() {
4969        let mut fx = Fixture::new();
4970        fx.config.schemas.insert(
4971            "contact".into(),
4972            Schema {
4973                fields: vec![
4974                    FieldSpec {
4975                        name: "email".into(),
4976                        required: true,
4977                        shape: Some(Shape::Email),
4978                        ..Default::default()
4979                    },
4980                    FieldSpec {
4981                        name: "status".into(),
4982                        enum_values: Some(vec!["active".into(), "inactive".into()]),
4983                        ..Default::default()
4984                    },
4985                ],
4986                ..Default::default()
4987            },
4988        );
4989        // A required EMAIL field and an ENUM field, each holding a LIST. Both
4990        // used to slip through entirely (`scalar_string` → None → the shape and
4991        // enum bodies silently no-op); now they flag SCHEMA_SHAPE_MISMATCH.
4992        fx.write(
4993            "records/contacts/bad.md",
4994            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nemail:\n  - a@b.com\n  - c@d.com\nstatus:\n  - active\n---\n\n# bad\n",
4995        );
4996        let issues = fx.store_all();
4997        let mismatched: Vec<_> = issues
4998            .iter()
4999            .filter(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH)
5000            .map(|i| i.key.clone().unwrap_or_default())
5001            .collect();
5002        assert!(
5003            mismatched.contains(&"email".to_string()),
5004            "list-valued required email must flag: {issues:#?}"
5005        );
5006        assert!(
5007            mismatched.contains(&"status".to_string()),
5008            "list-valued enum must flag: {issues:#?}"
5009        );
5010    }
5011
5012    #[test]
5013    fn is_currency_accepts_codes_and_rejects_non_numeric() {
5014        // Symbols and 3-letter ISO codes both strip; plain numbers pass.
5015        for ok in [
5016            "100",
5017            "1234.56",
5018            "$1,234.50",
5019            "USD 100", // the finding's headline probe — used to be false
5020            "usd 100", // case-insensitive code
5021            "EUR 9.50",
5022            "£12",
5023            "¥1000",
5024            "-5.00", // signed amounts are real (refunds)
5025            "+5",
5026            "1,000,000",
5027        ] {
5028            assert!(is_currency(ok), "expected currency: {ok:?}");
5029        }
5030        // Non-numeric floats `f64::parse` would accept, and the > 2-decimal /
5031        // bare-code / exponent cases the docstring forbids.
5032        for bad in [
5033            "inf", "-inf", "infinity", "NaN", "nan",    // f64 accepts these; we must not
5034            "12.999", // 3 decimals
5035            "1.2345", // 4 decimals
5036            "USD",    // bare code, no amount
5037            "$",      // bare symbol
5038            "free", "", " ", "1e3",      // exponent form
5039            "1.",       // trailing dot, no fractional digits
5040            ".5",       // leading dot, no integer digits
5041            "1 000",    // space as separator is not a thousands separator
5042            "USDD 100", // 4-letter "code" must not strip
5043        ] {
5044            assert!(!is_currency(bad), "expected NOT currency: {bad:?}");
5045        }
5046    }
5047
5048    // ── policies ───────────────────────────────────────────────────────────
5049
5050    #[test]
5051    fn ignored_type_present_is_info() {
5052        let mut fx = Fixture::new();
5053        fx.config.ignored_types.push("temp".into());
5054        fx.write(
5055            "records/temps/x.md",
5056            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
5057        );
5058        let issues = fx.store_all();
5059        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_PRESENT);
5060        assert_eq!(issue.severity, Severity::Info);
5061        assert!(!issue.is_error());
5062        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
5063    }
5064
5065    #[test]
5066    fn conclusion_record_derived_from_ignored_type_warns() {
5067        let mut fx = Fixture::new();
5068        fx.config.ignored_types.push("temp".into());
5069        fx.write(
5070            "records/temps/x.md",
5071            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
5072        );
5073        // The policy now gates on `meta-type: conclusion` (not the retired
5074        // `type: wiki-page`): a conclusion record that derives from an
5075        // ignored-type record warns.
5076        fx.write(
5077            "records/synthesis/t.md",
5078            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: derived\nderived_from: \"[[records/temps/x]]\"\n---\n\n# t\n",
5079        );
5080        let issues = fx.store_all();
5081        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_DERIVED);
5082        assert_eq!(issue.severity, Severity::Warning);
5083        assert_eq!(issue.key.as_deref(), Some("derived_from"));
5084        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
5085    }
5086
5087    /// The shared `derived_from_ignored_type` entry point — the single
5088    /// policy-decision both `dbmd validate` (read) and `dbmd write` (write-time
5089    /// warning) now route through, so they cannot diverge. This pins its
5090    /// contract directly: the meta-type gate (now `meta-type: conclusion`, not
5091    /// the retired `type: wiki-page`), the empty-ignored-types gate, a positive
5092    /// match carrying the resolved target type, and a non-ignored target
5093    /// rejected.
5094    #[test]
5095    fn derived_from_ignored_type_is_the_shared_policy_decision() {
5096        let mut fx = Fixture::new();
5097        fx.config.ignored_types.push("secret".into());
5098        // An ignored-type record …
5099        fx.write(
5100            "records/secrets/s.md",
5101            "---\ntype: secret\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: hush\n---\n\n# s\n",
5102        );
5103        // … and a non-ignored record.
5104        fx.write(
5105            "records/contacts/c.md",
5106            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nname: C\n---\n\n# c\n",
5107        );
5108        let store = fx.store();
5109
5110        // Positive: a conclusion record deriving from the ignored-type record
5111        // matches, and the hit carries both the target (as written) and its
5112        // resolved type.
5113        let hit =
5114            derived_from_ignored_type(&store, "conclusion", std::iter::once("records/secrets/s"))
5115                .expect("conclusion → ignored-type record must match");
5116        assert_eq!(hit.target, "records/secrets/s");
5117        assert_eq!(hit.target_type, "secret");
5118
5119        // Meta-type gate: a non-`conclusion` meta-type never triggers, even with
5120        // the same ignored-type target.
5121        assert_eq!(
5122            derived_from_ignored_type(&store, "fact", std::iter::once("records/secrets/s")),
5123            None,
5124            "only conclusion derivation is policed"
5125        );
5126
5127        // Target gate: a conclusion deriving from a non-ignored record is fine.
5128        assert_eq!(
5129            derived_from_ignored_type(&store, "conclusion", std::iter::once("records/contacts/c")),
5130            None,
5131            "deriving from a non-ignored type is allowed"
5132        );
5133
5134        // First match wins across multiple targets (here the second is the hit).
5135        let hit = derived_from_ignored_type(
5136            &store,
5137            "conclusion",
5138            ["records/contacts/c", "records/secrets/s"],
5139        )
5140        .expect("a later ignored-type target must still be found");
5141        assert_eq!(hit.target, "records/secrets/s");
5142
5143        // Empty-policy gate: with no `### Ignored types`, nothing is policed.
5144        fx.config.ignored_types.clear();
5145        let store = fx.store();
5146        assert_eq!(
5147            derived_from_ignored_type(&store, "conclusion", std::iter::once("records/secrets/s")),
5148            None,
5149            "an empty ignored-types policy short-circuits"
5150        );
5151    }
5152
5153    // ── duplicates ───────────────────────────────────────────────────────────
5154
5155    #[test]
5156    fn dup_id_is_hard_error_with_related() {
5157        let fx = Fixture::new();
5158        fx.write(
5159            "records/contacts/a.md",
5160            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
5161        );
5162        fx.write(
5163            "records/contacts/b.md",
5164            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
5165        );
5166        let issues = fx.store_all();
5167        // Reporting rule #1: ONE issue per collision group, keyed on the
5168        // lexicographically smallest path (`a.md`), partner in `related`.
5169        assert_eq!(
5170            count(&issues, codes::DUP_ID),
5171            1,
5172            "one issue per group: {issues:#?}"
5173        );
5174        let a = issues.iter().find(|i| i.code == codes::DUP_ID).unwrap();
5175        assert_eq!(a.file, PathBuf::from("records/contacts/a.md"));
5176        assert!(a.is_error());
5177        assert_eq!(a.key.as_deref(), Some("id"));
5178        assert_eq!(
5179            a.line,
5180            Some(3),
5181            "anchors to the `id` line on the reported file"
5182        );
5183        assert_eq!(a.related, vec![PathBuf::from("records/contacts/b.md")]);
5184    }
5185
5186    #[test]
5187    fn dup_id_not_fired_in_working_set() {
5188        // DUP_* is an --all-only cross-file check; the working set must not run it.
5189        let fx = Fixture::new();
5190        fx.write(
5191            "records/contacts/a.md",
5192            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
5193        );
5194        fx.write(
5195            "records/contacts/b.md",
5196            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
5197        );
5198        // Log says both changed since epoch, so they're in the working set.
5199        fx.write(
5200            "log.md",
5201            "---\ntype: log\n---\n\n## [2026-05-22 10:00] create | records/contacts/a\nx\n\n## [2026-05-22 10:01] create | records/contacts/b\nx\n",
5202        );
5203        let issues = validate_working_set(&fx.store(), None).unwrap();
5204        assert!(
5205            !has(&issues, codes::DUP_ID),
5206            "DUP_ID is --all only: {issues:#?}"
5207        );
5208    }
5209
5210    #[test]
5211    fn dup_unique_key_single_field_is_warning() {
5212        let mut fx = Fixture::new();
5213        // contact declares `- unique: email`.
5214        fx.config.schemas.insert(
5215            "contact".into(),
5216            Schema {
5217                unique_keys: vec![vec!["email".into()]],
5218                ..Default::default()
5219            },
5220        );
5221        for (f, name) in [("a", "A"), ("b", "B")] {
5222            fx.write(
5223                &format!("records/contacts/{f}.md"),
5224                &format!("---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: s\nname: {name}\nemail: dup@x.com\n---\n\n# {name}\n"),
5225            );
5226        }
5227        let issues = fx.store_all();
5228        // One issue per group (rule #1), keyed on the smallest path, anchored to
5229        // the single `email` field.
5230        assert_eq!(count(&issues, codes::DUP_UNIQUE_KEY), 1);
5231        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
5232        assert_eq!(dup.severity, Severity::Warning);
5233        assert_eq!(dup.file, PathBuf::from("records/contacts/a.md"));
5234        assert_eq!(dup.key.as_deref(), Some("email"));
5235        assert_eq!(dup.related, vec![PathBuf::from("records/contacts/b.md")]);
5236    }
5237
5238    #[test]
5239    fn dup_unique_key_compound_and_clean_when_one_field_differs() {
5240        let mut fx = Fixture::new();
5241        // expense declares `- unique: date, amount, vendor` (a compound key).
5242        fx.config.schemas.insert(
5243            "expense".into(),
5244            Schema {
5245                unique_keys: vec![vec!["date".into(), "amount".into(), "vendor".into()]],
5246                ..Default::default()
5247            },
5248        );
5249        fx.write("records/companies/acme.md", "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: c\nname: Acme\n---\n# A\n");
5250        let exp = |f: &str, amount: &str| {
5251            format!(
5252            "---\ntype: expense\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: e\ndate: 2026-05-01\namount: {amount}\nvendor: \"[[records/companies/acme]]\"\n---\n\n# {f}\n"
5253        )
5254        };
5255        fx.write("records/expenses/e1.md", &exp("e1", "100"));
5256        fx.write("records/expenses/e2.md", &exp("e2", "100"));
5257        fx.write("records/expenses/e3.md", &exp("e3", "200")); // different amount
5258        let issues = fx.store_all();
5259        // One issue for the e1+e2 group (rule #1), keyed on the smallest path
5260        // (e1) with e2 in `related`; e3 differs on amount and never appears.
5261        assert_eq!(
5262            count(&issues, codes::DUP_UNIQUE_KEY),
5263            1,
5264            "only e1+e2 collide, one issue: {issues:#?}"
5265        );
5266        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
5267        assert_eq!(dup.file, PathBuf::from("records/expenses/e1.md"));
5268        assert_eq!(
5269            dup.line,
5270            Some(1),
5271            "compound-key collision anchors to line 1"
5272        );
5273        assert_eq!(dup.related, vec![PathBuf::from("records/expenses/e2.md")]);
5274        assert!(
5275            !issues.iter().any(|i| i.code == codes::DUP_UNIQUE_KEY
5276                && i.related.contains(&PathBuf::from("records/expenses/e3.md"))),
5277            "e3 differs on amount and must not collide: {issues:#?}"
5278        );
5279    }
5280
5281    #[test]
5282    fn dup_unique_key_list_field_is_order_independent() {
5283        let mut fx = Fixture::new();
5284        // meeting declares `- unique: date, attendees`; the list field is a set.
5285        fx.config.schemas.insert(
5286            "meeting".into(),
5287            Schema {
5288                unique_keys: vec![vec!["date".into(), "attendees".into()]],
5289                ..Default::default()
5290            },
5291        );
5292        fx.write("records/contacts/a.md", &valid_contact("a"));
5293        fx.write("records/contacts/b.md", &valid_contact("b"));
5294        let m = |f: &str, order: &str| {
5295            let attendees = if order == "ab" {
5296                "  - [[records/contacts/a]]\n  - [[records/contacts/b]]"
5297            } else {
5298                "  - [[records/contacts/b]]\n  - [[records/contacts/a]]"
5299            };
5300            format!(
5301                "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nattendees:\n{attendees}\n---\n\n# {f}\n"
5302            )
5303        };
5304        fx.write("records/meetings/m1.md", &m("m1", "ab"));
5305        fx.write("records/meetings/m2.md", &m("m2", "ba"));
5306        let issues = fx.store_all();
5307        // The attendee SET is order-independent, so m1 (ab) and m2 (ba) collide
5308        // → a single issue on the smaller path.
5309        assert_eq!(
5310            count(&issues, codes::DUP_UNIQUE_KEY),
5311            1,
5312            "same date + same attendee set (any order) collide as one issue: {issues:#?}"
5313        );
5314        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
5315        assert_eq!(dup.file, PathBuf::from("records/meetings/m1.md"));
5316        assert_eq!(dup.related, vec![PathBuf::from("records/meetings/m2.md")]);
5317    }
5318
5319    // ── indexes ───────────────────────────────────────────────────────────────
5320
5321    #[test]
5322    fn missing_indexes_at_all_three_levels() {
5323        let fx = Fixture::new();
5324        fx.write("records/contacts/a.md", &valid_contact("a"));
5325        let issues = fx.store_all();
5326        // root, layer (records), and type-folder (records/contacts) all missing.
5327        // The type-folder INDEX_MISSING is keyed on the FOLDER path (not its
5328        // would-be index.md), per the field convention `EXPECTED` pins.
5329        let missing_files: BTreeSet<PathBuf> = issues
5330            .iter()
5331            .filter(|i| i.code == codes::INDEX_MISSING)
5332            .map(|i| i.file.clone())
5333            .collect();
5334        assert!(
5335            missing_files.contains(&PathBuf::from("index.md")),
5336            "{issues:#?}"
5337        );
5338        assert!(
5339            missing_files.contains(&PathBuf::from("records/index.md")),
5340            "{issues:#?}"
5341        );
5342        assert!(
5343            missing_files.contains(&PathBuf::from("records/contacts")),
5344            "{issues:#?}"
5345        );
5346        // When the index.md is entirely absent we do NOT additionally fire
5347        // INDEX_JSONL_MISSING — one INDEX_MISSING covers the folder (rule #4).
5348        assert!(!has(&issues, codes::INDEX_JSONL_MISSING), "{issues:#?}");
5349    }
5350
5351    #[test]
5352    fn index_stale_entry_and_missing_entry() {
5353        let fx = Fixture::new();
5354        fx.write(
5355            "records/contacts/present.md",
5356            &valid_contact("present contact"),
5357        );
5358        // Indexes for the parents (root/layer) present so we isolate type-folder.
5359        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5360        fx.write(
5361            "records/index.md",
5362            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5363        );
5364        // Type-folder index lists a GHOST (stale) and omits `present` (missing).
5365        fx.write(
5366            "records/contacts/index.md",
5367            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/ghost]] — gone\n",
5368        );
5369        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/present.md\",\"type\":\"contact\",\"summary\":\"present contact\"}\n");
5370        let issues = fx.store_all();
5371        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
5372        assert!(stale.message.contains("ghost"));
5373        assert!(stale.is_error());
5374        let missing = find(&issues, codes::INDEX_MISSING_ENTRY);
5375        assert!(
5376            missing.message.contains("present.md"),
5377            "{}",
5378            missing.message
5379        );
5380    }
5381
5382    #[test]
5383    fn index_md_entry_with_traversal_path_is_stale_not_probe() {
5384        let fx = Fixture::new();
5385        fx.write("records/contacts/a.md", &valid_contact("a"));
5386        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5387        fx.write(
5388            "records/index.md",
5389            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5390        );
5391        fx.write(
5392            "records/contacts/index.md",
5393            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/../../ghost]] — unsafe\n",
5394        );
5395        fx.write(
5396            "records/contacts/index.jsonl",
5397            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5398        );
5399        let issues = fx.store_all();
5400        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
5401        assert!(stale.message.contains("not a safe store-relative path"));
5402    }
5403
5404    #[test]
5405    fn index_summary_mismatch() {
5406        let fx = Fixture::new();
5407        fx.write("records/contacts/a.md", &valid_contact("the real summary"));
5408        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5409        fx.write(
5410            "records/index.md",
5411            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5412        );
5413        fx.write(
5414            "records/contacts/index.md",
5415            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a STALE summary\n",
5416        );
5417        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"the real summary\"}\n");
5418        let issues = fx.store_all();
5419        let issue = find(&issues, codes::INDEX_SUMMARY_MISMATCH);
5420        assert!(issue.is_error());
5421        assert_eq!(issue.related, vec![PathBuf::from("records/contacts/a.md")]);
5422    }
5423
5424    #[test]
5425    fn index_summary_match_passes() {
5426        let fx = Fixture::new();
5427        fx.write("records/contacts/a.md", &valid_contact("matching summary"));
5428        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5429        fx.write(
5430            "records/index.md",
5431            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5432        );
5433        fx.write(
5434            "records/contacts/index.md",
5435            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — matching summary\n",
5436        );
5437        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"matching summary\"}\n");
5438        let issues = fx.store_all();
5439        assert!(!has(&issues, codes::INDEX_SUMMARY_MISMATCH), "{issues:#?}");
5440    }
5441
5442    #[test]
5443    fn index_entry_with_tag_suffix_matches_summary() {
5444        let fx = Fixture::new();
5445        fx.write("records/contacts/a.md", &valid_contact("clean summary"));
5446        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5447        fx.write(
5448            "records/index.md",
5449            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5450        );
5451        // Entry carries the renderer's `  ·  #tag` suffix (the EXACT double-spaced
5452        // delimiter `crate::index::format_md_entry` emits for a tagged file),
5453        // which must be stripped before comparing against the file's summary.
5454        fx.write(
5455            "records/contacts/index.md",
5456            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — clean summary  ·  #customer\n",
5457        );
5458        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"clean summary\"}\n");
5459        let issues = fx.store_all();
5460        assert!(
5461            !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
5462            "tag suffix should be stripped: {issues:#?}"
5463        );
5464    }
5465
5466    #[test]
5467    fn index_entry_single_spaced_middot_tail_is_part_of_summary() {
5468        // Regression (the finding): a tagless file whose `summary` legitimately
5469        // ends in a single-spaced ` · #word` tail round-trips through `index
5470        // rebuild` verbatim (the renderer appends NO `  ·  #tag` block, since the
5471        // file has no tags). The validator must NOT mistake that single-spaced
5472        // tail for the renderer's tag suffix, or it reports a spurious — and
5473        // unfixable — INDEX_SUMMARY_MISMATCH on a freshly rebuilt store.
5474        let fx = Fixture::new();
5475        fx.write(
5476            "records/contacts/a.md",
5477            &valid_contact("Standup notes · #standup"),
5478        );
5479        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5480        fx.write(
5481            "records/index.md",
5482            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5483        );
5484        fx.write(
5485            "records/contacts/index.md",
5486            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — Standup notes · #standup\n",
5487        );
5488        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"Standup notes · #standup\"}\n");
5489        let issues = fx.store_all();
5490        assert!(
5491            !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
5492            "a single-spaced middot tail is part of the summary, not a tag block: {issues:#?}"
5493        );
5494    }
5495
5496    #[test]
5497    fn index_jsonl_desync_missing_file_in_jsonl() {
5498        let fx = Fixture::new();
5499        fx.write("records/contacts/a.md", &valid_contact("a"));
5500        fx.write("records/contacts/b.md", &valid_contact("b"));
5501        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (2 files)\n");
5502        fx.write(
5503            "records/index.md",
5504            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5505        );
5506        fx.write(
5507            "records/contacts/index.md",
5508            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n- [[records/contacts/b]] — b\n",
5509        );
5510        // jsonl only lists `a` → `b` is a desync (the twin must be complete).
5511        fx.write(
5512            "records/contacts/index.jsonl",
5513            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5514        );
5515        let issues = fx.store_all();
5516        let desync = find(&issues, codes::INDEX_JSONL_DESYNC);
5517        assert!(desync.message.contains("b.md"), "{}", desync.message);
5518    }
5519
5520    #[test]
5521    fn index_jsonl_desync_record_points_at_missing_file() {
5522        let fx = Fixture::new();
5523        fx.write("records/contacts/a.md", &valid_contact("a"));
5524        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5525        fx.write(
5526            "records/index.md",
5527            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5528        );
5529        fx.write(
5530            "records/contacts/index.md",
5531            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
5532        );
5533        fx.write(
5534            "records/contacts/index.jsonl",
5535            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
5536        );
5537        let issues = fx.store_all();
5538        assert!(
5539            issues
5540                .iter()
5541                .any(|i| i.code == codes::INDEX_JSONL_DESYNC && i.message.contains("ghost.md")),
5542            "{issues:#?}"
5543        );
5544    }
5545
5546    #[test]
5547    fn index_jsonl_record_with_traversal_path_is_desync_not_probe() {
5548        let fx = Fixture::new();
5549        fx.write("records/contacts/a.md", &valid_contact("a"));
5550        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5551        fx.write(
5552            "records/index.md",
5553            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5554        );
5555        fx.write(
5556            "records/contacts/index.md",
5557            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
5558        );
5559        fx.write(
5560            "records/contacts/index.jsonl",
5561            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/../../ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
5562        );
5563        let issues = fx.store_all();
5564        assert!(
5565            issues.iter().any(|i| i.code == codes::INDEX_JSONL_DESYNC
5566                && i.message.contains("not a safe store-relative path")),
5567            "{issues:#?}"
5568        );
5569    }
5570
5571    #[test]
5572    fn index_jsonl_stale_summary() {
5573        let fx = Fixture::new();
5574        fx.write("records/contacts/a.md", &valid_contact("real summary"));
5575        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5576        fx.write(
5577            "records/index.md",
5578            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5579        );
5580        fx.write(
5581            "records/contacts/index.md",
5582            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — real summary\n",
5583        );
5584        // jsonl summary disagrees with the file frontmatter.
5585        fx.write(
5586            "records/contacts/index.jsonl",
5587            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"OUTDATED\"}\n",
5588        );
5589        let issues = fx.store_all();
5590        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5591        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
5592        assert!(stale.key.as_deref().unwrap().contains("summary"));
5593    }
5594
5595    /// The whole point of `INDEX_JSONL_STALE`: a sidecar field the query/search
5596    /// path actually reads (`email`, `domain`, the `(date,amount,vendor)` dedup
5597    /// tuple, `tags`, `updated`, `links`, `company` …) that disagrees with the
5598    /// `.md` is STALE — even when `summary` and `type` are perfectly correct.
5599    /// Pre-fix the validator only diffed summary+type, so a sidecar with a wrong
5600    /// `email` validated clean and answered `--where email=…` with a phantom
5601    /// value present in no file. This is the direct regression guard.
5602    #[test]
5603    fn index_jsonl_stale_queryable_field_email() {
5604        let fx = Fixture::new();
5605        let contact = "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"a contact\"\nname: A\nemail: real@correct.com\n---\n\n# A\n";
5606        fx.write("records/contacts/a.md", contact);
5607        // Start from the canonical, fully-correct sidecar set …
5608        fx.rebuild_indexes();
5609        let jsonl_path = fx.dir.path().join("records/contacts/index.jsonl");
5610        let good = fs::read_to_string(&jsonl_path).unwrap();
5611        // sanity: the canonical store is clean (no STALE on a fresh rebuild).
5612        assert!(
5613            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
5614            "freshly-rebuilt sidecar must not be stale"
5615        );
5616        // … then desync ONLY the email so it's the single differing field.
5617        assert!(
5618            good.contains("real@correct.com"),
5619            "sidecar projects email: {good}"
5620        );
5621        fx.write(
5622            "records/contacts/index.jsonl",
5623            &good.replace("real@correct.com", "STALE-WRONG@evil.com"),
5624        );
5625
5626        let issues = fx.store_all();
5627        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5628        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
5629        // The mismatch is reported precisely on `email`, and summary/type — which
5630        // still match — are NOT named.
5631        let key = stale.key.as_deref().unwrap();
5632        assert!(
5633            key.contains("email"),
5634            "expected `email` in stale key, got {key:?}"
5635        );
5636        assert!(!key.contains("summary"), "summary still matches: {key:?}");
5637        assert!(!key.contains("type"), "type still matches: {key:?}");
5638    }
5639
5640    /// Broaden the guard across the typed/list/timestamp projections at once:
5641    /// a wrong `tags`, `updated`, and a custom dedup field (`amount`) are each
5642    /// caught, with all three named in one issue.
5643    #[test]
5644    fn index_jsonl_stale_typed_and_list_fields() {
5645        let fx = Fixture::new();
5646        let expense = "---\ntype: expense\ncreated: 2026-05-20T08:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"office chairs\"\ntags: [furniture, q2]\namount: 1299\nvendor: Acme\ndate: 2026-05-20\n---\n\n# Expense\n";
5647        fx.write("records/expenses/e.md", expense);
5648        fx.rebuild_indexes();
5649        let jsonl_path = fx.dir.path().join("records/expenses/index.jsonl");
5650        let good = fs::read_to_string(&jsonl_path).unwrap();
5651        assert!(
5652            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
5653            "freshly-rebuilt sidecar must not be stale"
5654        );
5655        // Desync a list field (tags), a timestamp (updated), and a number (amount).
5656        let stale_line = good
5657            .replace("\"q2\"", "\"WRONG-TAG\"")
5658            .replace("2026-05-22T10:00:00-07:00", "2099-01-01T00:00:00-07:00")
5659            .replace("1299", "9999");
5660        fx.write("records/expenses/index.jsonl", &stale_line);
5661
5662        let issues = fx.store_all();
5663        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5664        let key = stale.key.as_deref().unwrap();
5665        for expected in ["amount", "tags", "updated"] {
5666            assert!(
5667                key.contains(expected),
5668                "expected `{expected}` in stale key, got {key:?}"
5669            );
5670        }
5671    }
5672
5673    #[test]
5674    fn index_orphan_in_noncanonical_folder() {
5675        let fx = Fixture::new();
5676        fx.write("records/contacts/a.md", &valid_contact("a"));
5677        // Build the canonical indexes so they aren't reported as orphans.
5678        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5679        fx.write(
5680            "records/index.md",
5681            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5682        );
5683        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
5684        fx.write(
5685            "records/contacts/index.jsonl",
5686            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5687        );
5688        // An index.md inside a sub-sub-folder (operator territory) is an orphan.
5689        fx.write(
5690            "records/contacts/subfolder/index.md",
5691            "---\ntype: index\nscope: type-folder\n---\n\n# stray\n",
5692        );
5693        let issues = fx.store_all();
5694        let orphan = find(&issues, codes::INDEX_ORPHAN);
5695        assert_eq!(orphan.severity, Severity::Warning);
5696        assert_eq!(
5697            orphan.file,
5698            PathBuf::from("records/contacts/subfolder/index.md")
5699        );
5700    }
5701
5702    #[test]
5703    fn index_wrong_scope() {
5704        let fx = Fixture::new();
5705        fx.write("records/contacts/a.md", &valid_contact("a"));
5706        // Root index declares the wrong scope.
5707        fx.write("index.md", "---\ntype: index\nscope: layer\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5708        fx.write(
5709            "records/index.md",
5710            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5711        );
5712        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
5713        fx.write(
5714            "records/contacts/index.jsonl",
5715            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5716        );
5717        let issues = fx.store_all();
5718        let issue = find(&issues, codes::INDEX_WRONG_SCOPE);
5719        assert_eq!(issue.severity, Severity::Warning);
5720        assert_eq!(issue.file, PathBuf::from("index.md"));
5721    }
5722
5723    #[test]
5724    fn capped_type_folder_index_does_not_flag_missing_entries() {
5725        // Over the 500-entry cap, omitted entries are expected, not an error.
5726        let fx = Fixture::new();
5727        for i in 0..501 {
5728            fx.write(
5729                &format!("records/contacts/c{i:04}.md"),
5730                &valid_contact(&format!("contact {i}")),
5731            );
5732        }
5733        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (501 files)\n");
5734        fx.write(
5735            "records/index.md",
5736            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5737        );
5738        // Type-folder index lists only ONE entry + a More footer.
5739        fx.write(
5740            "records/contacts/index.md",
5741            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/c0000]] — contact 0\n\n## More\n\nThis folder has 501 files.\n",
5742        );
5743        // jsonl must still be complete — write all 501 lines.
5744        let mut jsonl = String::new();
5745        for i in 0..501 {
5746            jsonl.push_str(&format!(
5747                "{{\"path\":\"records/contacts/c{i:04}.md\",\"type\":\"contact\",\"summary\":\"contact {i}\"}}\n"
5748            ));
5749        }
5750        fx.write("records/contacts/index.jsonl", &jsonl);
5751        let issues = fx.store_all();
5752        assert!(
5753            !has(&issues, codes::INDEX_MISSING_ENTRY),
5754            "over the cap, missing browse entries are expected: {issues:#?}"
5755        );
5756        // But the jsonl is complete → no desync.
5757        assert!(
5758            !has(&issues, codes::INDEX_JSONL_DESYNC),
5759            "{:#?}",
5760            issues
5761                .iter()
5762                .filter(|i| i.code == codes::INDEX_JSONL_DESYNC)
5763                .collect::<Vec<_>>()
5764        );
5765    }
5766
5767    // ── log ────────────────────────────────────────────────────────────────
5768
5769    #[test]
5770    fn log_bad_timestamp_unknown_kind_out_of_order() {
5771        let fx = Fixture::new();
5772        fx.write(
5773            "log.md",
5774            concat!(
5775                "---\ntype: log\n---\n\n# Log\n\n",
5776                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5777                "## [2026-05-27 09:00] update | records/contacts/b\nx\n\n", // out of order
5778                "## [2026-05-27 11:00] frobnicate | records/contacts/c\nx\n\n", // unknown kind
5779                "## [not-a-date] create | records/contacts/d\nx\n",         // bad timestamp
5780            ),
5781        );
5782        let issues = fx.store_all();
5783        assert!(has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
5784        assert_eq!(
5785            find(&issues, codes::LOG_OUT_OF_ORDER).severity,
5786            Severity::Warning
5787        );
5788        let unknown = find(&issues, codes::LOG_UNKNOWN_KIND);
5789        assert_eq!(unknown.severity, Severity::Warning);
5790        assert!(unknown.message.contains("frobnicate"));
5791        assert!(unknown
5792            .suggestion
5793            .as_deref()
5794            .is_some_and(|s| s.contains("create")));
5795        let bad = find(&issues, codes::LOG_BAD_TIMESTAMP);
5796        assert!(bad.is_error());
5797    }
5798
5799    #[test]
5800    fn log_validate_entry_without_object_is_well_formed() {
5801        let fx = Fixture::new();
5802        fx.write(
5803            "log.md",
5804            "---\ntype: log\n---\n\n## [2026-05-27 10:00] validate\nPASS\n",
5805        );
5806        let issues = fx.store_all();
5807        assert!(!has(&issues, codes::LOG_BAD_TIMESTAMP), "{issues:#?}");
5808        assert!(!has(&issues, codes::LOG_UNKNOWN_KIND), "{issues:#?}");
5809    }
5810
5811    #[test]
5812    fn log_in_order_is_clean() {
5813        let fx = Fixture::new();
5814        fx.write(
5815            "log.md",
5816            concat!(
5817                "---\ntype: log\n---\n\n",
5818                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5819                "## [2026-05-27 10:05] update | records/contacts/a\nx\n",
5820            ),
5821        );
5822        let issues = fx.store_all();
5823        assert!(!has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
5824    }
5825
5826    #[test]
5827    fn log_not_checked_in_working_set() {
5828        // log.md ordering is an --all-only check.
5829        let fx = Fixture::new();
5830        fx.write(
5831            "log.md",
5832            concat!(
5833                "---\ntype: log\n---\n\n",
5834                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5835                "## [2026-05-27 09:00] update | records/contacts/a\nx\n",
5836            ),
5837        );
5838        let issues = validate_working_set(&fx.store(), None).unwrap();
5839        assert!(
5840            !has(&issues, codes::LOG_OUT_OF_ORDER),
5841            "log ordering is --all only: {issues:#?}"
5842        );
5843    }
5844
5845    // ── working-set scoping ───────────────────────────────────────────────────
5846
5847    #[test]
5848    fn working_set_validates_only_changed_files() {
5849        let fx = Fixture::new();
5850        // `dirty` has a bad timestamp; `clean_but_unlogged` also does but is NOT
5851        // in the log → working set must skip it.
5852        fx.write(
5853            "records/contacts/dirty.md",
5854            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5855        );
5856        fx.write(
5857            "records/contacts/unlogged.md",
5858            "---\ntype: contact\ncreated: ALSO-BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5859        );
5860        fx.write(
5861            "log.md",
5862            "---\ntype: log\n---\n\n## [2026-05-22 10:00] update | records/contacts/dirty\nedited\n",
5863        );
5864        let issues = validate_working_set(&fx.store(), None).unwrap();
5865        assert!(
5866            issues.iter().any(|i| i.code == codes::FM_BAD_TIMESTAMP
5867                && i.file == Path::new("records/contacts/dirty.md")),
5868            "{issues:#?}"
5869        );
5870        assert!(
5871            !issues
5872                .iter()
5873                .any(|i| i.file == Path::new("records/contacts/unlogged.md")),
5874            "unlogged file must not be in the working set: {issues:#?}"
5875        );
5876    }
5877
5878    #[test]
5879    fn working_set_includes_incoming_linkers_to_changed_path() {
5880        let fx = Fixture::new();
5881        // `changed` was renamed/removed (logged). `linker` points at it with a
5882        // now-broken link and was NOT itself logged — but must be pulled in.
5883        fx.write(
5884            "records/profiles/linker.md",
5885            "---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: links to a removed page\n---\n\nSee [[records/contacts/changed]].\n",
5886        );
5887        // `changed.md` does NOT exist on disk (removed).
5888        fx.write(
5889            "log.md",
5890            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/changed\nremoved\n",
5891        );
5892        let issues = validate_working_set(&fx.store(), None).unwrap();
5893        assert!(
5894            issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN
5895                && i.file == Path::new("records/profiles/linker.md")),
5896            "incoming linker to a removed path must be validated: {issues:#?}"
5897        );
5898    }
5899
5900    #[test]
5901    fn working_set_respects_explicit_since_cutoff() {
5902        let fx = Fixture::new();
5903        fx.write(
5904            "records/contacts/old.md",
5905            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5906        );
5907        fx.write(
5908            "records/contacts/new.md",
5909            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5910        );
5911        fx.write(
5912            "log.md",
5913            concat!(
5914                "---\ntype: log\n---\n\n",
5915                "## [2026-05-20 10:00] update | records/contacts/old\nx\n\n",
5916                "## [2026-05-25 10:00] update | records/contacts/new\nx\n",
5917            ),
5918        );
5919        // Cutoff after `old` but before `new`.
5920        let since = DateTime::parse_from_rfc3339("2026-05-22T00:00:00+00:00").unwrap();
5921        let issues = validate_working_set(&fx.store(), Some(since)).unwrap();
5922        assert!(
5923            issues
5924                .iter()
5925                .any(|i| i.file == Path::new("records/contacts/new.md")),
5926            "{issues:#?}"
5927        );
5928        assert!(
5929            !issues
5930                .iter()
5931                .any(|i| i.file == Path::new("records/contacts/old.md")),
5932            "old change is before the cutoff: {issues:#?}"
5933        );
5934    }
5935
5936    #[test]
5937    fn working_set_default_since_is_last_validate_entry() {
5938        let fx = Fixture::new();
5939        // `before` changed before the last validate; `after` changed after.
5940        fx.write(
5941            "records/contacts/before.md",
5942            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5943        );
5944        fx.write(
5945            "records/contacts/after.md",
5946            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5947        );
5948        fx.write(
5949            "log.md",
5950            concat!(
5951                "---\ntype: log\n---\n\n",
5952                "## [2026-05-20 10:00] update | records/contacts/before\nx\n\n",
5953                "## [2026-05-21 10:00] validate\nPASS\n\n",
5954                "## [2026-05-22 10:00] update | records/contacts/after\nx\n",
5955            ),
5956        );
5957        let issues = validate_working_set(&fx.store(), None).unwrap();
5958        assert!(
5959            issues
5960                .iter()
5961                .any(|i| i.file == Path::new("records/contacts/after.md")),
5962            "{issues:#?}"
5963        );
5964        assert!(
5965            !issues
5966                .iter()
5967                .any(|i| i.file == Path::new("records/contacts/before.md")),
5968            "change before the last validate entry is outside the default window: {issues:#?}"
5969        );
5970    }
5971
5972    // ── ordering / determinism ────────────────────────────────────────────────
5973
5974    #[test]
5975    fn issues_are_sorted_by_file_then_line() {
5976        let fx = Fixture::new();
5977        fx.write("records/profiles/z.md", "---\ntype: profile\nmeta-type: conclusion\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
5978        fx.write("records/profiles/a.md", "---\ntype: profile\nmeta-type: conclusion\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
5979        let issues = fx.store_all();
5980        let files: Vec<&PathBuf> = issues.iter().map(|i| &i.file).collect();
5981        let mut sorted = files.clone();
5982        sorted.sort();
5983        assert_eq!(
5984            files, sorted,
5985            "issues must be emitted in a stable file order"
5986        );
5987    }
5988
5989    // ── boundaries: codes validate must NOT emit ──────────────────────────────
5990
5991    #[test]
5992    fn frozen_page_is_not_a_validate_error() {
5993        // POLICY_FROZEN_PAGE is a *write-time* refusal, never a validate finding.
5994        // A clean file listed in `### Frozen pages` must validate clean.
5995        let mut fx = Fixture::new();
5996        fx.config
5997            .frozen_pages
5998            .push(PathBuf::from("records/decisions/d.md"));
5999        fx.write(
6000            "records/decisions/d.md",
6001            "---\ntype: decision\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a finalized decision\n---\n\n# D\n",
6002        );
6003        let issues = fx.store_all();
6004        assert!(
6005            !has(&issues, codes::POLICY_FROZEN_PAGE),
6006            "frozen pages are enforced at write-time, not by validate: {issues:#?}"
6007        );
6008    }
6009
6010    #[test]
6011    fn wiki_link_ambiguous_is_never_emitted_under_full_path_doctrine() {
6012        // The full-path doctrine makes ambiguity impossible; the defensive code
6013        // must never fire on a normal store.
6014        let fx = Fixture::new();
6015        fx.write("records/contacts/sarah-chen.md", &valid_contact("sarah"));
6016        let mut body = valid_contact("links to sarah");
6017        body.push_str("\nSee [[records/contacts/sarah-chen]].\n");
6018        fx.write("records/contacts/p.md", &body);
6019        let issues = fx.store_all();
6020        assert!(!has(&issues, codes::WIKI_LINK_AMBIGUOUS), "{issues:#?}");
6021    }
6022
6023    // ── unknown-type / unknown-field passthrough ──────────────────────────────
6024
6025    #[test]
6026    fn unknown_type_passes_through() {
6027        // A custom type is ambient context: it has a `type`, so no
6028        // FM_MISSING_TYPE, and with no matching schema there are no schema
6029        // errors. Only the universal contract (summary, timestamps) applies.
6030        let fx = Fixture::new();
6031        fx.write(
6032            "records/proposals/x.md",
6033            "---\ntype: proposal\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a proposal\ncustom_field: anything\nbudget: 5000\n---\n\n# Proposal\n",
6034        );
6035        let issues = fx.store_all();
6036        assert!(!has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
6037        assert!(!has(&issues, codes::SCHEMA_MISSING_REQUIRED), "{issues:#?}");
6038        assert!(!has(&issues, codes::SCHEMA_SHAPE_MISMATCH), "{issues:#?}");
6039        // The unknown fields don't trip anything.
6040        assert!(
6041            !issues
6042                .iter()
6043                .any(|i| i.key.as_deref() == Some("custom_field")
6044                    || i.key.as_deref() == Some("budget")),
6045            "unknown fields are ambient context: {issues:#?}"
6046        );
6047    }
6048
6049    // ── find_links_to prefix-collision safety (working set) ───────────────────
6050
6051    #[test]
6052    fn incoming_linker_scan_does_not_prefix_match() {
6053        // A changed `records/contacts/sarah` must NOT pull in a file that only
6054        // links to `records/contacts/sarah-chen` (a longer path sharing a prefix).
6055        let fx = Fixture::new();
6056        fx.write(
6057            "records/profiles/only-sarah-chen.md",
6058            "---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
6059        );
6060        // The log says `records/contacts/sarah` (the shorter path) changed.
6061        fx.write(
6062            "log.md",
6063            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah\nremoved\n",
6064        );
6065        let issues = validate_working_set(&fx.store(), None).unwrap();
6066        assert!(
6067            !issues
6068                .iter()
6069                .any(|i| i.file == Path::new("records/profiles/only-sarah-chen.md")),
6070            "a prefix-sharing link must not pull a file into the working set: {issues:#?}"
6071        );
6072    }
6073
6074    #[test]
6075    fn working_set_does_not_flag_stale_catalog_index_as_wiki_link_broken() {
6076        // The working-set incoming-linker scan rides embedded-ripgrep
6077        // `Store::find_links_to`, which scans EVERY `.md` — so a type-folder
6078        // `index.md` listing a now-deleted target IS pulled into the working set.
6079        // But its entries are GENERATED catalog entries, not authored body links:
6080        // a dangling one is an `INDEX_STALE_ENTRY` ("run `dbmd index rebuild`"),
6081        // the job of `check_indexes` under `--all` — NOT a `WIKI_LINK_BROKEN`
6082        // ("create the target"), whose remedy would steer an agent to recreate
6083        // the very data it just deleted. The loop default must therefore NOT
6084        // body-link-check the derived catalog (index integrity is an O(store)
6085        // sweep concern, not an O(changed) loop concern). Adversarial review #11:
6086        // the prior behavior gave WIKI_LINK_BROKEN here while `--all` gave
6087        // INDEX_STALE_ENTRY for the identical condition — two codes, opposite
6088        // remedies, across the loop default vs the sweep.
6089        let fx = Fixture::new();
6090        // A catalog that still lists the deleted contact (a real, common stale
6091        // state after an out-of-band `delete`).
6092        fx.write(
6093            "records/contacts/index.md",
6094            "---\ntype: index\n---\n\n- [[records/contacts/sarah-chen]] — Sarah Chen\n",
6095        );
6096        // The log says `records/contacts/sarah-chen` was deleted.
6097        fx.write(
6098            "log.md",
6099            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n",
6100        );
6101        let issues = validate_working_set(&fx.store(), None).unwrap();
6102        assert!(
6103            !issues
6104                .iter()
6105                .any(|i| i.file == Path::new("records/contacts/index.md")
6106                    && i.code == codes::WIKI_LINK_BROKEN),
6107            "a stale catalog `index.md` entry must NOT be WIKI_LINK_BROKEN in the \
6108             working set (it is an INDEX_STALE_ENTRY under `--all`): {issues:#?}"
6109        );
6110    }
6111
6112    #[test]
6113    fn incoming_linker_scan_covers_the_whole_changed_set_in_one_pass() {
6114        // CONTRACT (the O(changed × store) fix): the working-set scan finds
6115        // incoming linkers for EVERY changed object, and does so via the single
6116        // batch pass `Store::find_links_to_any` — not one full store read per
6117        // changed object. This test pins the behavior that makes the single-pass
6118        // correct: with two DISTINCT deleted targets, the linker to EACH is pulled
6119        // into the working set and flagged. A regression that scanned for only the
6120        // first/last changed object, or that dropped the batch union, would leave
6121        // one of the two broken links unreported and fail here.
6122        let fx = Fixture::new();
6123        // Linker A → deleted target #1 (in the body).
6124        fx.write(
6125            "records/profiles/refers-sarah.md",
6126            "---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
6127        );
6128        // Linker B → deleted target #2 (in a typed frontmatter field — an edge the
6129        // sidecar `links` projection would miss, which is why this must be a
6130        // content scan, not a sidecar read).
6131        fx.write(
6132            "records/meetings/2026/05/kickoff.md",
6133            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\ncompany: \"[[records/companies/acme]]\"\n---\n\n# Kickoff\n",
6134        );
6135        // The log says BOTH targets were deleted in this window.
6136        fx.write(
6137            "log.md",
6138            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n\n## [2026-05-22 10:05] delete | records/companies/acme\nremoved\n",
6139        );
6140
6141        let issues = validate_working_set(&fx.store(), None).unwrap();
6142        assert!(
6143            issues
6144                .iter()
6145                .any(|i| i.file == Path::new("records/profiles/refers-sarah.md")
6146                    && i.code == codes::WIKI_LINK_BROKEN),
6147            "linker to the FIRST deleted target must be pulled in and flagged: {issues:#?}"
6148        );
6149        assert!(
6150            issues.iter().any(
6151                |i| i.file == Path::new("records/meetings/2026/05/kickoff.md")
6152                    && i.code == codes::WIKI_LINK_BROKEN
6153            ),
6154            "linker to the SECOND deleted target (typed-field edge) must also be \
6155             pulled in and flagged — proves the scan covers the whole changed set, \
6156             not just one object: {issues:#?}"
6157        );
6158    }
6159
6160    #[test]
6161    fn frontmatter_block_sequence_links_each_get_their_own_line() {
6162        // Each block-sequence wiki-link reports on its own source line.
6163        let fx = Fixture::new();
6164        // Neither target exists → two WIKI_LINK_BROKEN, on different lines.
6165        fx.write(
6166            "records/meetings/m.md",
6167            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nparticipants:\n  - [[records/contacts/ghost1]]\n  - [[records/contacts/ghost2]]\n---\n\n# M\n",
6168        );
6169        let issues = fx.store_all();
6170        let broken_lines: BTreeSet<Option<u32>> = issues
6171            .iter()
6172            .filter(|i| i.code == codes::WIKI_LINK_BROKEN)
6173            .map(|i| i.line)
6174            .collect();
6175        assert_eq!(
6176            broken_lines.len(),
6177            2,
6178            "two distinct broken-link lines: {issues:#?}"
6179        );
6180    }
6181
6182    // ── Regression: null / non-scalar created/updated ────────────────────────
6183
6184    #[test]
6185    fn null_created_is_missing_not_silently_passed() {
6186        // Regression: a present-but-`null` `created:` previously slipped past
6187        // both FM_MISSING_CREATED (only `!contains_key` was checked) and
6188        // FM_BAD_TIMESTAMP (`scalar_string(null)` is None → branch no-oped).
6189        let fx = Fixture::new();
6190        fx.write(
6191            "records/contacts/a.md",
6192            "---\ntype: contact\ncreated:\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
6193        );
6194        let issues = fx.store_all();
6195        assert!(
6196            has(&issues, codes::FM_MISSING_CREATED),
6197            "null `created:` must read as missing: {issues:#?}"
6198        );
6199    }
6200
6201    #[test]
6202    fn sequence_created_is_bad_timestamp() {
6203        // A non-scalar `created: [2026]` is not a timestamp string → FM_BAD_TIMESTAMP.
6204        let fx = Fixture::new();
6205        fx.write(
6206            "records/contacts/a.md",
6207            "---\ntype: contact\ncreated: [2026]\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
6208        );
6209        let issues = fx.store_all();
6210        assert!(
6211            issues
6212                .iter()
6213                .any(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created")),
6214            "a sequence `created:` must be FM_BAD_TIMESTAMP: {issues:#?}"
6215        );
6216    }
6217
6218    // ── Regression: schema required null / empty-collection ──────────────────
6219
6220    #[test]
6221    fn required_field_null_or_empty_collection_is_missing() {
6222        // Regression: a plain required field (no shape/enum) holding YAML null
6223        // (`name:`), an empty list (`name: []`), or an empty mapping (`name: {}`)
6224        // previously validated with 0 issues — `scalar_string` returned None and
6225        // `.unwrap_or(false)` treated the value as non-empty.
6226        for value in ["", " []", " {}"] {
6227            let mut fx = Fixture::new();
6228            fx.config.schemas.insert(
6229                "contact".into(),
6230                Schema {
6231                    fields: vec![FieldSpec {
6232                        name: "name".into(),
6233                        required: true,
6234                        ..Default::default()
6235                    }],
6236                    ..Default::default()
6237                },
6238            );
6239            fx.write(
6240                "records/contacts/a.md",
6241                &format!(
6242                    "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname:{value}\n---\n\n# A\n"
6243                ),
6244            );
6245            let issues = fx.store_all();
6246            assert!(
6247                issues
6248                    .iter()
6249                    .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
6250                        && i.key.as_deref() == Some("name")),
6251                "required `name:{value}` must be SCHEMA_MISSING_REQUIRED: {issues:#?}"
6252            );
6253        }
6254    }
6255
6256    // ── Regression: WIKI_LINK_BROKEN on raw source files ─────────────────────
6257
6258    #[test]
6259    fn wiki_link_to_raw_source_file_resolves() {
6260        // Regression: a body link to a raw `.eml`/`.pdf` source kept verbatim
6261        // under `sources/` was flagged WIKI_LINK_BROKEN because the existence
6262        // probe only ever stat'd `{bare}.md`. It must resolve the literal path.
6263        let fx = Fixture::new();
6264        fx.write("sources/emails/2026-05-22-elena.eml", "raw email bytes\n");
6265        fx.write(
6266            "records/contacts/a.md",
6267            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\nSee [[sources/emails/2026-05-22-elena.eml]] for context.\n",
6268        );
6269        let issues = fx.store_all();
6270        assert!(
6271            !issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN),
6272            "a link to an existing raw source file must not be broken: {issues:#?}"
6273        );
6274    }
6275
6276    // ── Regression: wrong-case wiki-link must be platform-independent ─────────
6277
6278    #[test]
6279    fn wrong_case_wiki_link_is_broken_exact_case() {
6280        // Regression (cross-platform false-negative): on case-insensitive
6281        // APFS/macOS, `Path::is_file()` resolves `[[records/contacts/BOB]]` to the
6282        // on-disk `bob.md`, so validate passed — but on case-sensitive Linux that
6283        // file does not exist (WIKI_LINK_BROKEN). Existence resolution is now
6284        // exact-case, so a wrong-case target is flagged on every platform.
6285        let fx = Fixture::new();
6286        fx.write("records/contacts/bob.md", &valid_contact("Bob"));
6287        let mut body = valid_contact("links with the wrong case");
6288        body.push_str("\nKnows [[records/contacts/BOB]].\n");
6289        fx.write("records/contacts/alice.md", &body);
6290        let issues = fx.store_all();
6291        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
6292        assert!(issue.is_error());
6293        assert!(
6294            issue.message.contains("records/contacts/BOB"),
6295            "the wrong-case target must be named in the issue: {issues:#?}"
6296        );
6297    }
6298
6299    #[test]
6300    fn correct_case_wiki_link_still_resolves() {
6301        // The companion to the exact-case fix: a *correct*-case lowercase link to
6302        // the same on-disk file must STILL resolve clean. Only a genuine case
6303        // mismatch is newly flagged; correct case is never a false positive.
6304        let fx = Fixture::new();
6305        fx.write("records/contacts/bob.md", &valid_contact("Bob"));
6306        let mut body = valid_contact("links with the right case");
6307        body.push_str("\nKnows [[records/contacts/bob]].\n");
6308        fx.write("records/contacts/alice.md", &body);
6309        let issues = fx.store_all();
6310        assert!(
6311            !issues
6312                .iter()
6313                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.message.contains("contacts/bob")),
6314            "a correct-case link must resolve clean: {issues:#?}"
6315        );
6316    }
6317
6318    #[test]
6319    fn wrong_case_raw_source_wiki_link_is_broken() {
6320        // The literal-path candidate (raw `.eml`/`.pdf` sources kept verbatim)
6321        // gets the same exact-case treatment as the `.md`-appended candidate: a
6322        // wrong-case link to a raw source is broken on a case-sensitive host, so
6323        // it must flag on macOS too.
6324        let fx = Fixture::new();
6325        fx.write("sources/emails/2026-05-22-elena.eml", "raw email bytes\n");
6326        fx.write(
6327            "records/contacts/a.md",
6328            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\nSee [[sources/emails/2026-05-22-ELENA.eml]] for context.\n",
6329        );
6330        let issues = fx.store_all();
6331        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
6332        assert!(issue.is_error());
6333        assert!(
6334            issue.message.contains("2026-05-22-ELENA.eml"),
6335            "the wrong-case raw-source target must be flagged: {issues:#?}"
6336        );
6337    }
6338
6339    // ── Regression: unreadable (non-UTF-8) content file ──────────────────────
6340
6341    #[test]
6342    fn non_utf8_content_file_is_reported() {
6343        // Regression: a content file with invalid UTF-8 bytes made
6344        // check_content_file return None silently, so the store passed with exit
6345        // 0. It must surface FM_UNREADABLE instead of passing vacuously.
6346        let fx = Fixture::new();
6347        let abs = fx.dir.path().join("records/notes/corrupt.md");
6348        fs::create_dir_all(abs.parent().unwrap()).unwrap();
6349        fs::write(&abs, [0xFF, 0xFE, 0x00, 0x01]).unwrap();
6350        let issues = validate_working_set(&fx.store(), None).unwrap();
6351        assert!(
6352            has(&issues, codes::FM_UNREADABLE),
6353            "an unreadable content file must be reported, not silently skipped: {issues:#?}"
6354        );
6355    }
6356
6357    // ── Regression: code-fence char/run tracking ─────────────────────────────
6358
6359    #[test]
6360    fn tilde_fence_containing_backtick_fence_does_not_invert() {
6361        // Regression: a `~~~` block legally contains ``` lines (documenting a
6362        // backtick fence); a naive toggle inverted `in_fence` and checked the
6363        // demo `[[fake]]` inside the code block as a live link. The link inside
6364        // BOTH fences must be skipped.
6365        let body = "~~~markdown\n```\n[[fake-link]]\n```\n~~~\n";
6366        let links = extract_wiki_links(body);
6367        assert!(
6368            links.is_empty(),
6369            "wiki-link inside a nested code fence must be skipped: {links:?}"
6370        );
6371    }
6372
6373    // ── Regression: --all skips in-layer `log/` folder ───────────────────────
6374
6375    #[test]
6376    fn all_sweep_visits_in_layer_log_folder() {
6377        // Regression: `validate --all` pruned every dir named `log`, so a real
6378        // content folder like `records/log/` was invisible to the full sweep —
6379        // reporting FEWER errors than the default scope. A frontmatter-less file
6380        // there must still surface FM_MISSING_TYPE under --all.
6381        let fx = Fixture::new();
6382        fx.write("records/log/2026-06-01-pricing.md", "no frontmatter here\n");
6383        let issues = fx.store_all();
6384        assert!(
6385            has(&issues, codes::FM_MISSING_TYPE),
6386            "--all must validate files under an in-layer `log/` folder: {issues:#?}"
6387        );
6388    }
6389
6390    // ── Regression: flow-form list with whitespace ───────────────────────────
6391
6392    #[test]
6393    fn flow_form_link_list_with_spaces_is_flagged() {
6394        // Regression: `attendees: [ [[a]] ]` parses to the same nested-sequence
6395        // mis-encoding as `[[[a]]]` but evaded the literal `starts_with("[[[")`
6396        // text test. The value-based detector must catch the whitespace variant.
6397        let keys = detect_flow_form_link_lists("attendees: [ [[records/contacts/elena]] ]\n");
6398        assert!(
6399            keys.iter().any(|k| k == "attendees"),
6400            "spaced flow-form list must be detected: {keys:?}"
6401        );
6402    }
6403
6404    // ── Regression: INDEX_SUMMARY_MISMATCH middot tail ───────────────────────
6405
6406    #[test]
6407    fn middot_hashtag_summary_tail_round_trips() {
6408        // Regression: a tagless summary that legitimately ends in a single-spaced
6409        // ` · #word` tail round-trips through the renderer verbatim, but the loose
6410        // ` · ` strip mistook it for the tag block and reported a spurious,
6411        // unfixable INDEX_SUMMARY_MISMATCH. The strip must use the renderer's
6412        // exact double-spaced `  ·  ` delimiter.
6413        assert_eq!(
6414            extract_index_entry_summary("— Standup notes · #standup").as_deref(),
6415            Some("Standup notes · #standup"),
6416            "a single-spaced middot tail is part of the summary, not a tag block"
6417        );
6418        // The renderer's real double-spaced tag suffix IS still stripped.
6419        assert_eq!(
6420            extract_index_entry_summary("— Renewal champion  ·  #renewal #acme").as_deref(),
6421            Some("Renewal champion"),
6422            "the renderer's double-spaced `  ·  #tag` suffix is stripped"
6423        );
6424    }
6425
6426    // ── Regression: shape Url / Email edge cases ─────────────────────────────
6427
6428    #[test]
6429    fn url_shape_accepts_short_http_and_rejects_bare_scheme() {
6430        assert!(is_url("http://x"), "an 8-char http URL is valid");
6431        assert!(is_url("https://x"), "a 9-char https URL is valid");
6432        assert!(!is_url("http://"), "a bare scheme with no host is rejected");
6433        assert!(!is_url("https://"), "a bare https scheme is rejected");
6434    }
6435
6436    #[test]
6437    fn email_shape_rejects_double_at() {
6438        assert!(!is_email("sarah@@acme.com"), "double-@ domain is rejected");
6439        assert!(!is_email("a@b@c.com"), "two @ signs are rejected");
6440        assert!(is_email("sarah@acme.com"), "a normal address still passes");
6441    }
6442
6443    // ── Regression: working-set vs --all agree on log.md links ───────────────
6444
6445    #[test]
6446    fn working_set_does_not_flag_log_md_body_links() {
6447        // Regression: the working-set incoming-linker scan runs root `log.md`
6448        // through the body wiki-link check, flagging a historical `[[deleted]]`
6449        // mention as WIKI_LINK_BROKEN — an error `--all` never reports and that
6450        // the append-only log can't have "fixed". The root meta files must be
6451        // excluded from the body link check, matching --all.
6452        let fx = Fixture::new();
6453        fx.write("records/contacts/a.md", &valid_contact("A"));
6454        fx.write(
6455            "log.md",
6456            "---\ntype: log\n---\n\n## [2026-06-01 10:00] delete | records/contacts/ghost\n\nRemoved [[records/contacts/ghost]] per cleanup.\n",
6457        );
6458        let issues = validate_working_set(&fx.store(), None).unwrap();
6459        assert!(
6460            !issues
6461                .iter()
6462                .any(|i| i.code == codes::WIKI_LINK_BROKEN
6463                    && i.file == std::path::Path::new("log.md")),
6464            "a broken wiki-link inside append-only log.md must not be flagged: {issues:#?}"
6465        );
6466    }
6467
6468    // ── Regression: DB.md schema field lint ──────────────────────────────────
6469
6470    #[test]
6471    fn schema_duplicate_field_name_is_flagged() {
6472        let mut fx = Fixture::new();
6473        fx.config.schemas.insert(
6474            "contact".into(),
6475            Schema {
6476                fields: vec![
6477                    FieldSpec {
6478                        name: "name".into(),
6479                        required: true,
6480                        ..Default::default()
6481                    },
6482                    FieldSpec {
6483                        name: "name".into(),
6484                        ..Default::default()
6485                    },
6486                ],
6487                ..Default::default()
6488            },
6489        );
6490        let issues = fx.store_all();
6491        assert!(
6492            issues
6493                .iter()
6494                .any(|i| i.code == codes::DB_MD_SCHEMA_FIELD && i.key.as_deref() == Some("name")),
6495            "a duplicate schema field name must be flagged: {issues:#?}"
6496        );
6497    }
6498
6499    #[test]
6500    fn schema_unknown_modifier_is_info() {
6501        let mut fx = Fixture::new();
6502        fx.config.schemas.insert(
6503            "contact".into(),
6504            Schema {
6505                fields: vec![FieldSpec {
6506                    name: "name".into(),
6507                    unknown_modifiers: vec!["requierd".into()],
6508                    ..Default::default()
6509                }],
6510                ..Default::default()
6511            },
6512        );
6513        let issues = fx.store_all();
6514        assert!(
6515            issues.iter().any(|i| i.code == codes::DB_MD_SCHEMA_FIELD
6516                && i.severity == Severity::Info
6517                && i.key.as_deref() == Some("name")),
6518            "an unrecognized schema modifier must surface as Info: {issues:#?}"
6519        );
6520    }
6521
6522    /// A `unique:` key naming a declared-but-optional field silently skips
6523    /// every record missing that field (an incomplete key never collides), so
6524    /// the declaration itself must warn. The dogfood case: `unique: date,
6525    /// amount, vendor` with `vendor` optional — a vendorless re-entered
6526    /// expense sails past the check.
6527    #[test]
6528    fn schema_unique_key_optional_field_is_warning() {
6529        let mut fx = Fixture::new();
6530        fx.config.schemas.insert(
6531            "expense".into(),
6532            Schema {
6533                fields: vec![
6534                    FieldSpec {
6535                        name: "date".into(),
6536                        required: true,
6537                        ..Default::default()
6538                    },
6539                    FieldSpec {
6540                        name: "amount".into(),
6541                        required: true,
6542                        ..Default::default()
6543                    },
6544                    FieldSpec {
6545                        name: "vendor".into(),
6546                        ..Default::default()
6547                    },
6548                ],
6549                unique_keys: vec![vec!["date".into(), "amount".into(), "vendor".into()]],
6550                ..Default::default()
6551            },
6552        );
6553        let issues = fx.store_all();
6554        assert!(
6555            issues.iter().any(|i| i.code == codes::DB_MD_SCHEMA_FIELD
6556                && i.severity == Severity::Warning
6557                && i.key.as_deref() == Some("vendor")
6558                && i.message.contains("unique")),
6559            "a `unique:` key field not marked required must warn: {issues:#?}"
6560        );
6561        // The required key fields are fine — no warning for them.
6562        assert!(
6563            !issues.iter().any(|i| i.code == codes::DB_MD_SCHEMA_FIELD
6564                && matches!(i.key.as_deref(), Some("date") | Some("amount"))),
6565            "required key fields must not warn: {issues:#?}"
6566        );
6567    }
6568
6569    /// A `unique:` key naming a field the schema never declares can also never
6570    /// be `required` — same silent skip, same warning.
6571    #[test]
6572    fn schema_unique_key_undeclared_field_is_warning() {
6573        let mut fx = Fixture::new();
6574        fx.config.schemas.insert(
6575            "expense".into(),
6576            Schema {
6577                fields: vec![FieldSpec {
6578                    name: "date".into(),
6579                    required: true,
6580                    ..Default::default()
6581                }],
6582                unique_keys: vec![vec!["date".into(), "vendor".into()]],
6583                ..Default::default()
6584            },
6585        );
6586        let issues = fx.store_all();
6587        assert!(
6588            issues.iter().any(|i| i.code == codes::DB_MD_SCHEMA_FIELD
6589                && i.severity == Severity::Warning
6590                && i.key.as_deref() == Some("vendor")
6591                && i.message.contains("not declared")),
6592            "a `unique:` key field absent from the schema must warn: {issues:#?}"
6593        );
6594    }
6595
6596    /// The clean shape — every key field `required` — stays silent.
6597    #[test]
6598    fn schema_unique_key_all_required_is_clean() {
6599        let mut fx = Fixture::new();
6600        fx.config.schemas.insert(
6601            "expense".into(),
6602            Schema {
6603                fields: vec![
6604                    FieldSpec {
6605                        name: "date".into(),
6606                        required: true,
6607                        ..Default::default()
6608                    },
6609                    FieldSpec {
6610                        name: "amount".into(),
6611                        required: true,
6612                        ..Default::default()
6613                    },
6614                ],
6615                unique_keys: vec![vec!["date".into(), "amount".into()]],
6616                ..Default::default()
6617            },
6618        );
6619        let issues = fx.store_all();
6620        assert!(
6621            !issues
6622                .iter()
6623                .any(|i| i.code == codes::DB_MD_SCHEMA_FIELD && i.message.contains("unique")),
6624            "an all-required unique key must not warn: {issues:#?}"
6625        );
6626    }
6627
6628    /// Every code in `mod codes` must appear as a row in SPEC.md § Validation —
6629    /// the SPEC table is the declared "complete vocabulary" an agent branches on,
6630    /// and the module doc-comment promises this code implements "exactly those
6631    /// codes — no more, no fewer." This guards against the code/SPEC drift where a
6632    /// new validation code is added to the engine but never documented.
6633    #[test]
6634    fn every_code_constant_is_documented_in_spec() {
6635        // Parse the canonical constant *values* straight out of this module's
6636        // source, so a future `pub const X: &str = "X";` is covered with no test
6637        // edit. Format is uniform: `    pub const NAME: &str = "VALUE";`.
6638        let this_src = include_str!("validate.rs");
6639        let mut codes_in_module: Vec<String> = Vec::new();
6640        let mut in_codes_mod = false;
6641        for line in this_src.lines() {
6642            let t = line.trim();
6643            if t.starts_with("pub mod codes") {
6644                in_codes_mod = true;
6645                continue;
6646            }
6647            // The `mod codes` block ends at its closing brace at column 0.
6648            if in_codes_mod && line == "}" {
6649                break;
6650            }
6651            if in_codes_mod {
6652                if let Some(rest) = t.strip_prefix("pub const ") {
6653                    // rest = `NAME: &str = "VALUE";`
6654                    let value = rest
6655                        .split_once('=')
6656                        .map(|(_, v)| v.trim())
6657                        .and_then(|v| v.strip_prefix('"'))
6658                        .and_then(|v| v.strip_suffix("\";"))
6659                        .unwrap_or_else(|| panic!("unparseable code constant line: {line:?}"));
6660                    codes_in_module.push(value.to_string());
6661                }
6662            }
6663        }
6664        assert!(
6665            codes_in_module.len() >= 36,
6666            "parsed only {} code constants from `mod codes`; the parser likely \
6667             broke against a source-format change",
6668            codes_in_module.len()
6669        );
6670
6671        // SPEC.md lives at the repo root, two levels up from this crate's manifest.
6672        let spec_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../../SPEC.md");
6673        let spec = fs::read_to_string(&spec_path)
6674            .unwrap_or_else(|e| panic!("cannot read {}: {e}", spec_path.display()));
6675
6676        // Each code must appear as a SPEC § Validation table cell: `` | `CODE` | ``.
6677        let missing: Vec<&String> = codes_in_module
6678            .iter()
6679            .filter(|code| !spec.contains(&format!("| `{code}` |")))
6680            .collect();
6681        assert!(
6682            missing.is_empty(),
6683            "validation codes emitted by the engine but absent from SPEC.md \
6684             § Validation (the declared complete vocabulary): {missing:?}"
6685        );
6686    }
6687
6688    // ── loose files (directly at a layer root, no type-folder) ───────────────
6689
6690    const LOOSE_ALICE: &str = "---\ntype: contact\nid: alice\ncreated: 2026-06-01T08:00:00-07:00\nupdated: 2026-06-01T08:00:00-07:00\nsummary: Alice\n---\nbody\n";
6691    const LOOSE_BOB: &str = "---\ntype: contact\nid: bob\ncreated: 2026-06-01T08:00:00-07:00\nupdated: 2026-06-01T08:00:00-07:00\nsummary: Bob loose\n---\nbody\n";
6692
6693    #[test]
6694    fn loose_file_catalogued_in_layer_jsonl_validates_clean() {
6695        let fx = Fixture::new();
6696        fx.write("records/contacts/alice.md", LOOSE_ALICE);
6697        fx.write("records/bob.md", LOOSE_BOB); // loose, directly under records/
6698        fx.rebuild_indexes();
6699        let issues = fx.store_all();
6700        assert!(
6701            issues.is_empty(),
6702            "a rebuilt store with a catalogued loose file must validate clean, got: {issues:?}"
6703        );
6704    }
6705
6706    #[test]
6707    fn loose_file_with_missing_layer_jsonl_is_index_jsonl_missing() {
6708        let fx = Fixture::new();
6709        fx.write("records/contacts/alice.md", LOOSE_ALICE);
6710        fx.write("records/bob.md", LOOSE_BOB);
6711        fx.rebuild_indexes();
6712        // Simulate the layer sidecar going missing (a hand-deletion / bad sync).
6713        fs::remove_file(fx.dir.path().join("records/index.jsonl")).unwrap();
6714        let issues = fx.store_all();
6715        assert!(
6716            has(&issues, codes::INDEX_JSONL_MISSING),
6717            "a loose file with no layer index.jsonl must raise INDEX_JSONL_MISSING, got: {issues:?}"
6718        );
6719    }
6720}