Skip to main content

dbmd_core/
validate.rs

1//! `validate` — the validation engine.
2//!
3//! The canonical issue-code vocabulary is **SPEC.md § Validation** (that table
4//! is the single source of truth). This module implements exactly those codes
5//! — no more, no fewer. If a code is added here it must be added to the SPEC
6//! table in the same change. The codes are exposed as the [`codes`] constants
7//! so call sites never spell a code as a bare string literal.
8//!
9//! **Two scopes.** [`validate_working_set`] is the loop default: content files
10//! changed since `since`, plus any file whose wiki-links target a changed path.
11//! The changed set and the per-file checks are O(changed); the incoming linkers
12//! are found by a *single* embedded-ripgrep pass over the store for the whole
13//! changed set at once ([`Store::find_links_to_any`], one scan — not a full read
14//! per changed object, and not the parse-the-tree walk `--all` does). On this
15//! changed-set path it never builds the global cross-file state.
16//!
17//! The **one** exception is the vacuous-pass guard: when the change log records
18//! no objects since the cutoff and no explicit `--since` was given (a fresh
19//! store, a missing/empty `log.md`, or external edits never logged), the default
20//! call falls back to a single per-file content sweep ([`Store::walk`]) so an
21//! externally edited or freshly copied store cannot pass validation vacuously.
22//! That fallback is O(store) by design; the O(changed) guarantee is about the
23//! normal post-write path, not this safety net.
24//!
25//! [`validate_all`] is the full SWEEP: it adds the checks that need the global
26//! cross-file state — entity-dedup `DUP_*`, every-index sync, and `log.md`
27//! ordering.
28//!
29//! ## Why this module is self-contained
30//!
31//! Validation does its own frontmatter split, YAML parse, wiki-link scan,
32//! log-header parse, and file walk here, reading only the two public,
33//! caller-populated fields of a [`Store`]: [`Store::root`] and
34//! [`Store::config`] — rather than routing through the sibling modules
35//! ([`crate::parser`], [`crate::store`], [`crate::log`], [`crate::index`]).
36//! Keeping the checks local lets the validator report precise, per-issue
37//! diagnostics (exact codes, file, and context) without coupling its output to
38//! incidental behavior of the shared readers; the public surface and the
39//! emitted issue vocabulary are the contract.
40
41use std::collections::{BTreeMap, BTreeSet, HashMap};
42use std::path::{Component, Path, PathBuf};
43
44use chrono::{DateTime, FixedOffset, NaiveDateTime};
45use serde_norway::Value;
46
47use crate::parser::{Schema, Shape};
48use crate::store::Store;
49
50/// Severity of a validation [`Issue`]. Any [`Severity::Error`] fails validation
51/// (non-zero exit); warnings and info do not.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum Severity {
54    /// Blocks: a hard violation of the format or doctrine.
55    Error,
56    /// A decision point the agent resolves at its discretion.
57    Warning,
58    /// Visibility only; never affects exit status.
59    Info,
60}
61
62/// A single structured validation finding. Agent-primary and machine-parseable
63/// via `--json`; `suggestion` is a deterministic remediation hint the agent
64/// applies without guessing.
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub struct Issue {
67    /// The severity; only [`Severity::Error`] fails validation.
68    pub severity: Severity,
69    /// The structured code, e.g. `"WIKI_LINK_SHORT_FORM"` — one of [`codes`].
70    pub code: &'static str,
71    /// The file the issue is about.
72    pub file: PathBuf,
73    /// The 1-based line, when applicable.
74    pub line: Option<u32>,
75    /// The frontmatter key, when the issue is about a specific field.
76    pub key: Option<String>,
77    /// A human-readable message.
78    pub message: String,
79    /// A deterministic remediation hint, when one exists.
80    pub suggestion: Option<String>,
81    /// Other files involved (e.g. the duplicate partner in a collision).
82    pub related: Vec<PathBuf>,
83}
84
85impl Issue {
86    /// True if this issue fails validation (i.e. its severity is
87    /// [`Severity::Error`]).
88    pub fn is_error(&self) -> bool {
89        matches!(self.severity, Severity::Error)
90    }
91}
92
93/// The canonical validation issue codes — one constant per row of the SPEC.md
94/// § Validation table. Call sites reference these instead of bare strings so
95/// the code and the SPEC table can never silently drift.
96pub mod codes {
97    /// path has no `DB.md`; not a db.md store.
98    pub const NOT_A_STORE: &str = "NOT_A_STORE";
99    /// the store's `DB.md` is not `type: db-md`.
100    pub const DB_MD_BAD_TYPE: &str = "DB_MD_BAD_TYPE";
101    /// the store's `DB.md` frontmatter lacks `scope` or `owner`.
102    pub const DB_MD_MISSING_FIELD: &str = "DB_MD_MISSING_FIELD";
103    /// `DB.md` has an `##` section other than the three recognized ones.
104    pub const DB_MD_UNKNOWN_SECTION: &str = "DB_MD_UNKNOWN_SECTION";
105    /// a `DB.md ## Schemas` field declaration is malformed (empty or duplicate
106    /// field name) or carries an unrecognized modifier.
107    pub const DB_MD_SCHEMA_FIELD: &str = "DB_MD_SCHEMA_FIELD";
108    /// content file has no `type:`.
109    pub const FM_MISSING_TYPE: &str = "FM_MISSING_TYPE";
110    /// content file has no `created:`.
111    pub const FM_MISSING_CREATED: &str = "FM_MISSING_CREATED";
112    /// content file has no `updated:`.
113    pub const FM_MISSING_UPDATED: &str = "FM_MISSING_UPDATED";
114    /// content file can't be read (not valid UTF-8, or an I/O error).
115    pub const FM_UNREADABLE: &str = "FM_UNREADABLE";
116    /// frontmatter block isn't valid YAML.
117    pub const FM_MALFORMED_YAML: &str = "FM_MALFORMED_YAML";
118    /// `created` or `updated` isn't ISO-8601.
119    pub const FM_BAD_TIMESTAMP: &str = "FM_BAD_TIMESTAMP";
120    /// `meta-type` is present but not one of fact / operational / conclusion.
121    pub const FM_BAD_META_TYPE: &str = "FM_BAD_META_TYPE";
122    /// content file has no `summary`.
123    pub const SUMMARY_MISSING: &str = "SUMMARY_MISSING";
124    /// `summary` present but empty.
125    pub const SUMMARY_EMPTY: &str = "SUMMARY_EMPTY";
126    /// `summary` contains newlines.
127    pub const SUMMARY_MULTILINE: &str = "SUMMARY_MULTILINE";
128    /// `summary` > 200 chars.
129    pub const SUMMARY_TOO_LONG: &str = "SUMMARY_TOO_LONG";
130    /// wiki-link target isn't a full store-relative path.
131    pub const WIKI_LINK_SHORT_FORM: &str = "WIKI_LINK_SHORT_FORM";
132    /// wiki-link target file doesn't exist.
133    pub const WIKI_LINK_BROKEN: &str = "WIKI_LINK_BROKEN";
134    /// wiki-link target matches multiple files (defensive).
135    pub const WIKI_LINK_AMBIGUOUS: &str = "WIKI_LINK_AMBIGUOUS";
136    /// wiki-link target carries a `.md` extension — drop it.
137    pub const WIKI_LINK_HAS_EXTENSION: &str = "WIKI_LINK_HAS_EXTENSION";
138    /// frontmatter list uses inline `[[[a]], [[b]]]` — use block form.
139    pub const WIKI_LINK_FLOW_FORM_LIST: &str = "WIKI_LINK_FLOW_FORM_LIST";
140    /// two files declare the same explicit `id`.
141    pub const DUP_ID: &str = "DUP_ID";
142    /// two records of a type collide on a `DB.md ## Schemas` `unique:` key.
143    pub const DUP_UNIQUE_KEY: &str = "DUP_UNIQUE_KEY";
144    /// a `DB.md` schema requires a field that's absent.
145    pub const SCHEMA_MISSING_REQUIRED: &str = "SCHEMA_MISSING_REQUIRED";
146    /// a value doesn't match the schema's shape modifier.
147    pub const SCHEMA_SHAPE_MISMATCH: &str = "SCHEMA_SHAPE_MISMATCH";
148    /// a `link to <prefix>/` field has a plain or wrong-prefix value.
149    pub const SCHEMA_LINK_PREFIX_MISMATCH: &str = "SCHEMA_LINK_PREFIX_MISMATCH";
150    /// a value isn't in the schema's `enum`.
151    pub const SCHEMA_ENUM_VIOLATION: &str = "SCHEMA_ENUM_VIOLATION";
152    /// a write was attempted on a `### Frozen pages` path (write-time).
153    pub const POLICY_FROZEN_PAGE: &str = "POLICY_FROZEN_PAGE";
154    /// a file with an `### Ignored types` type exists.
155    pub const POLICY_IGNORED_TYPE_PRESENT: &str = "POLICY_IGNORED_TYPE_PRESENT";
156    /// a `meta-type: conclusion` record derives from an ignored-type record.
157    pub const POLICY_IGNORED_TYPE_DERIVED: &str = "POLICY_IGNORED_TYPE_DERIVED";
158    /// a `log.md` entry header timestamp is unparseable.
159    pub const LOG_BAD_TIMESTAMP: &str = "LOG_BAD_TIMESTAMP";
160    /// a `log.md` entry kind isn't recognized.
161    pub const LOG_UNKNOWN_KIND: &str = "LOG_UNKNOWN_KIND";
162    /// `log.md` entries aren't in non-decreasing time order (possible rewrite).
163    pub const LOG_OUT_OF_ORDER: &str = "LOG_OUT_OF_ORDER";
164    /// a non-empty canonical folder lacks `index.md`.
165    pub const INDEX_MISSING: &str = "INDEX_MISSING";
166    /// an `index.md` lists a file that no longer exists.
167    pub const INDEX_STALE_ENTRY: &str = "INDEX_STALE_ENTRY";
168    /// a file isn't listed in its folder's `index.md`.
169    pub const INDEX_MISSING_ENTRY: &str = "INDEX_MISSING_ENTRY";
170    /// an `index.md` sits in an empty / non-canonical folder.
171    pub const INDEX_ORPHAN: &str = "INDEX_ORPHAN";
172    /// an index's `scope:` doesn't match its filesystem location.
173    pub const INDEX_WRONG_SCOPE: &str = "INDEX_WRONG_SCOPE";
174    /// an index entry's text doesn't match the target file's `summary`.
175    pub const INDEX_SUMMARY_MISMATCH: &str = "INDEX_SUMMARY_MISMATCH";
176    /// a type-folder's `index.jsonl` twin is missing.
177    pub const INDEX_JSONL_MISSING: &str = "INDEX_JSONL_MISSING";
178    /// a file isn't in the `index.jsonl`, or a jsonl record points at a missing
179    /// file.
180    pub const INDEX_JSONL_DESYNC: &str = "INDEX_JSONL_DESYNC";
181    /// a `index.jsonl` record's fields don't match the file's frontmatter.
182    pub const INDEX_JSONL_STALE: &str = "INDEX_JSONL_STALE";
183    /// `tags` isn't a flat YAML list of short scalar labels.
184    pub const TAGS_MALFORMED: &str = "TAGS_MALFORMED";
185    /// a line in `assets.jsonl` is not a valid asset record.
186    pub const ASSET_MANIFEST_MALFORMED: &str = "ASSET_MANIFEST_MALFORMED";
187    /// a content file references an `asset`/`assets` path with no record in
188    /// `assets.jsonl` (run `dbmd assets scan`).
189    pub const ASSET_UNDECLARED: &str = "ASSET_UNDECLARED";
190    /// an `assets.jsonl` record names a wrapper file that does not exist.
191    pub const ASSET_WRAPPER_BROKEN: &str = "ASSET_WRAPPER_BROKEN";
192    /// an `assets.jsonl` record's path is referenced by no wrapper.
193    pub const ASSET_MANIFEST_ORPHAN: &str = "ASSET_MANIFEST_ORPHAN";
194    /// an `asset`/`assets` path points at a tracked markdown content file.
195    pub const ASSET_PATH_IS_CONTENT: &str = "ASSET_PATH_IS_CONTENT";
196}
197
198/// The SPEC's `summary` length bound (chars). Over it → `SUMMARY_TOO_LONG`.
199const MAX_SUMMARY_LEN: usize = 200;
200
201/// Recognized `log.md` entry kinds (SPEC § `log.md`). Anything else →
202/// `LOG_UNKNOWN_KIND` (warning, not error).
203const RECOGNIZED_LOG_KINDS: &[&str] = &[
204    "ingest",
205    "create",
206    "update",
207    "delete",
208    "rename",
209    "link",
210    "validate",
211    "index-rebuild",
212    "contradiction",
213];
214
215// ─────────────────────────────────────────────────────────────────────────────
216//  Public entrypoints
217// ─────────────────────────────────────────────────────────────────────────────
218
219/// **Loop default.** Validate the working set: content files changed since
220/// `since` (default: the last `validate` entry in `log.md`), plus any file whose
221/// wiki-links target a changed/renamed/removed path. Per-file *checks* only —
222/// none of the cross-file global passes (entity-dedup, every-index sync,
223/// `log.md` ordering) that `--all` adds. If the default call finds no logged
224/// changed objects, it falls back to a per-file content sweep so an externally
225/// edited or freshly copied store cannot pass vacuously.
226///
227/// **Cost.** The changed set is read from `log.md` — O(changed): every
228/// `create`/`update`/`ingest`/`rename`/`delete`/`link` entry newer than the
229/// cutoff names an object. Per-file frontmatter + link-doctrine checks then run
230/// over that set plus its incoming linkers — also O(changed). The one part that
231/// is *not* O(changed) is discovering those incoming linkers: a link to a
232/// changed path can live in the body or a typed frontmatter field of any file,
233/// so it is found by a **single** embedded-ripgrep pass over the store
234/// ([`Store::find_links_to_any`]) for the whole changed set at once — one store
235/// scan, flat in the changed-set size. (It was previously a full store read
236/// *per* changed object — `O(changed × store)`; that is the blow-up this path
237/// no longer pays.) The unavoidable single content scan is the same shape as
238/// free-text `dbmd search`; the sidecar `links` projection can't replace it
239/// because it omits body/typed-field edges.
240pub fn validate_working_set(
241    store: &Store,
242    since: Option<DateTime<FixedOffset>>,
243) -> crate::Result<Vec<Issue>> {
244    if !store_marker_present(store) {
245        return Ok(vec![not_a_store_issue(store)]);
246    }
247
248    let cutoff = match since {
249        Some(ts) => Some(ts),
250        None => last_validate_at(store),
251    };
252
253    // 1. Changed objects, straight from the log (O(changed) — never a walk).
254    let changed = changed_objects_since(store, cutoff);
255    if changed.is_empty() && since.is_none() {
256        return validate_content_sweep(store);
257    }
258
259    // 2. Add every file with an incoming wiki-link to a changed/renamed/removed
260    //    path (the linker may now be stale even though it didn't change). The
261    //    incoming-linker scan is `Store::find_links_to_any` — ONE embedded-ripgrep
262    //    pass over the store for the WHOLE changed set (one `.md` walk, one
263    //    presence-only/early-exit scan per file), not one walk per object. This
264    //    is the fix for the `O(changed × store)` blow-up that calling
265    //    `find_links_to` in a loop produced (a full store read per changed
266    //    object); the cost is now a single store scan regardless of how many
267    //    objects changed. A returned self-link is harmlessly deduped by the set
268    //    (the object is already inserted below).
269    let changed_targets: Vec<PathBuf> = changed.iter().cloned().collect();
270    let mut working: BTreeSet<PathBuf> = changed;
271    for linker in store.find_links_to_any(&changed_targets)? {
272        working.insert(linker);
273    }
274
275    let mut issues = Vec::new();
276    for rel in &working {
277        let abs = store.root.join(rel);
278        // A changed path can be a *deletion* — skip files that no longer exist;
279        // the incoming-linker scan above already flagged links into them.
280        if !abs.is_file() {
281            continue;
282        }
283        // `None` basename index: the working-set pass does not build the
284        // store-wide basename map (that is a `--all`-only structure), so a bare
285        // short-form target is reported as plain `WIKI_LINK_SHORT_FORM` and the
286        // `--all` sweep does the ambiguity upgrade.
287        check_content_file(store, rel, &abs, None, &mut issues);
288    }
289    issues.sort_by(issue_order);
290    Ok(issues)
291}
292
293fn validate_content_sweep(store: &Store) -> crate::Result<Vec<Issue>> {
294    let mut issues = Vec::new();
295    for rel in store.walk()? {
296        let abs = store.root.join(&rel);
297        check_content_file(store, &rel, &abs, None, &mut issues);
298    }
299    issues.sort_by(issue_order);
300    Ok(issues)
301}
302
303/// **Full SWEEP (O(store)).** Validate every file, every link, and every index,
304/// adding the cross-file checks that need global state: entity-dedup `DUP_*`,
305/// every-index sync (md + jsonl), and `log.md` ordering. CI / recovery, not the
306/// loop.
307pub fn validate_all(store: &Store) -> crate::Result<Vec<Issue>> {
308    if !store_marker_present(store) {
309        return Ok(vec![not_a_store_issue(store)]);
310    }
311
312    let mut issues = Vec::new();
313
314    // Store-identity file: `DB.md` shape (type / required fields / section
315    // headers). A single root file, checked once in the sweep — not a content
316    // file (it carries no `summary`), so it is not part of `walk_content_files`.
317    check_db_md(store, &mut issues);
318
319    let files = walk_content_files(&store.root);
320
321    // The basename index makes the short-form wiki-link check able to upgrade a
322    // bare-basename target to `WIKI_LINK_AMBIGUOUS` when it matches ≥2 files.
323    // Built once from the already-gathered sweep list (no extra walk); only the
324    // `--all` path has it (the working-set path stays O(changed)).
325    let basenames = build_basename_index(&files);
326
327    // Per-file checks over the whole store.
328    let mut parsed: Vec<(PathBuf, Parsed)> = Vec::new();
329    for rel in &files {
330        let abs = store.root.join(rel);
331        if let Some(p) = check_content_file(store, rel, &abs, Some(&basenames), &mut issues) {
332            parsed.push((rel.clone(), p));
333        }
334    }
335
336    // Cross-file: hard `id` + soft schema-declared `unique:` dedup collisions.
337    check_duplicates(store, &parsed, &mut issues);
338
339    // Cross-file: hierarchical index.md + index.jsonl sync.
340    check_indexes(store, &files, &mut issues);
341
342    // Cross-file: log.md well-formedness + ordering.
343    check_log(store, &mut issues);
344
345    // Cross-file: asset manifest (assets.jsonl) integrity against wrapper
346    // declarations. Text-only, no hashing, no byte reads — a SWEEP check like
347    // dedup. Byte presence/correctness is `dbmd assets verify`, not validate, so
348    // a fresh clone with no restored bytes still passes here.
349    check_assets(store, &parsed, &mut issues);
350
351    issues.sort_by(issue_order);
352    Ok(issues)
353}
354
355// ─────────────────────────────────────────────────────────────────────────────
356//  Per-file content checks (shared by both scopes)
357// ─────────────────────────────────────────────────────────────────────────────
358
359/// What `validate_all`'s cross-file pass needs from a per-file parse: the
360/// parsed YAML mapping (for dedup keys) and the raw frontmatter text (for
361/// text-based wiki-link extraction). The body and fence-line are consumed
362/// inline during the per-file pass and not carried here.
363struct Parsed {
364    /// The parsed top-level YAML mapping, keyed by string. `None` ⇒ malformed
365    /// YAML (a `FM_MALFORMED_YAML` was already emitted).
366    fm: Option<BTreeMap<String, Value>>,
367    /// The raw frontmatter YAML text (between the fences) — the source for
368    /// text-based wiki-link extraction in dedup.
369    fm_yaml: String,
370}
371
372/// Run every per-file check on one content file, pushing issues. Returns the
373/// parsed file so `validate_all` can reuse it for cross-file checks. Returns
374/// `None` only when the file is unreadable or has no frontmatter block at all
375/// (which for a content file is itself reported).
376fn check_content_file(
377    store: &Store,
378    rel: &Path,
379    abs: &Path,
380    basenames: Option<&BasenameIndex>,
381    issues: &mut Vec<Issue>,
382) -> Option<Parsed> {
383    let text = match std::fs::read_to_string(abs) {
384        Ok(t) => t,
385        Err(e) => {
386            // The file exists in the walk but can't be read as UTF-8 text
387            // (invalid bytes) or hit an I/O error. Returning `None` silently
388            // here let a store whose only content file was binary garbage pass
389            // `dbmd validate` with exit 0 — the exact vacuous-pass the fallback
390            // sweep exists to prevent. Report it so the agent gets an actionable
391            // diagnostic naming the unreadable file (and `index rebuild`, which
392            // hard-fails on the same file, isn't the only signal).
393            let detail = if e.kind() == std::io::ErrorKind::InvalidData {
394                "file is not valid UTF-8 text".to_string()
395            } else {
396                format!("file could not be read: {e}")
397            };
398            push(
399                issues,
400                Severity::Error,
401                codes::FM_UNREADABLE,
402                rel,
403                None,
404                None,
405                format!("content file is unreadable: {detail}"),
406                Some(
407                    "save the file as UTF-8 text, or remove it if it isn't a db.md content file"
408                        .into(),
409                ),
410                vec![],
411            );
412            return None;
413        }
414    };
415
416    let is_content = is_content_file(rel);
417
418    let (fm_yaml, body, fm_end_line) = match split_frontmatter(&text) {
419        Some(split) => split,
420        None => {
421            // No frontmatter at all. For a content file that means there's no
422            // `type:` and no `summary:` — report both the way a parsed-but-empty
423            // file would, so the agent gets the same actionable codes.
424            if is_content {
425                push(
426                    issues,
427                    Severity::Error,
428                    codes::FM_MISSING_TYPE,
429                    rel,
430                    None,
431                    Some("type".into()),
432                    "content file has no frontmatter `type:`".into(),
433                    Some("add a YAML frontmatter block with `type:`".into()),
434                    vec![],
435                );
436                push(
437                    issues,
438                    Severity::Error,
439                    codes::SUMMARY_MISSING,
440                    rel,
441                    None,
442                    Some("summary".into()),
443                    "content file has no `summary`".into(),
444                    Some("run `dbmd fm init`".into()),
445                    vec![],
446                );
447            }
448            return None;
449        }
450    };
451
452    // Parse the YAML block.
453    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
454        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
455        // An empty frontmatter block parses as Null; treat as an empty mapping.
456        Ok(Value::Null) => Some(BTreeMap::new()),
457        Ok(_) => {
458            // A scalar / sequence at the top level isn't a frontmatter mapping.
459            // Anchor to line 1 — the frontmatter block's opening `---`; the whole
460            // block is opaque, so there is no single offending field line.
461            push(
462                issues,
463                Severity::Error,
464                codes::FM_MALFORMED_YAML,
465                rel,
466                Some(1),
467                None,
468                "frontmatter is not a YAML mapping".into(),
469                Some("repair the frontmatter YAML mapping, then rerun `dbmd validate`".into()),
470                vec![],
471            );
472            None
473        }
474        Err(e) => {
475            // Anchor to line 1 (the opening `---`): an unparseable block has no
476            // single offending field line; the agent re-reads the whole block.
477            push(
478                issues,
479                Severity::Error,
480                codes::FM_MALFORMED_YAML,
481                rel,
482                Some(1),
483                None,
484                format!("frontmatter block isn't valid YAML: {e}"),
485                Some("repair the frontmatter YAML block, then rerun `dbmd validate`".into()),
486                vec![],
487            );
488            None
489        }
490    };
491
492    if let Some(map) = &fm {
493        // The detailed frontmatter checks only run when the YAML parsed.
494        check_frontmatter(store, rel, map, &fm_yaml, basenames, issues, is_content);
495    }
496
497    // Wiki-link doctrine checks run on the body of content files. They are NOT
498    // run on:
499    //   - the root append-only meta files `log.md`/`DB.md` — they reach this
500    //     function only via the working-set incoming-linker scan (`walk_all_md`
501    //     includes them), and `validate --all` never link-checks their bodies. A
502    //     historical `[[deleted-page]]` mention in a `log.md` note, or a `[[…]]`
503    //     in DB.md's `## Agent instructions`, must not be `WIKI_LINK_BROKEN`; the
504    //     log is append-only, so "fix the link" can't even be applied.
505    //   - the derived catalogs `index.md`/`index.jsonl` — their "links" are
506    //     GENERATED catalog entries, not authored body wiki-links. A folder's
507    //     `index.md` is pulled into the working set as an incoming linker (an
508    //     entry `[[records/contacts/a]]` IS a wiki-link to a member, so touching
509    //     or deleting any member drags its folder `index.md` in). Its integrity
510    //     is the job of `check_indexes` under `--all`, which reports a dangling
511    //     entry as `INDEX_STALE_ENTRY` ("run `dbmd index rebuild`"). Body-link-
512    //     checking it here instead emitted `WIKI_LINK_BROKEN` ("create the
513    //     target") for the SAME condition — a different code with the OPPOSITE
514    //     remedy across the loop default vs the sweep, steering an agent to
515    //     recreate deleted data. `walk_content_files` skips `index.md` under
516    //     `--all` for exactly this reason; the working-set scope must match.
517    // Without these guards the two scopes disagree on the same store.
518    if !is_root_meta_file(rel) && !is_index_catalog_file(rel) {
519        check_body_wiki_links(store, rel, &body, fm_end_line, basenames, issues);
520    }
521
522    Some(Parsed { fm, fm_yaml })
523}
524
525/// All frontmatter-level checks for a content file with valid YAML.
526fn check_frontmatter(
527    store: &Store,
528    rel: &Path,
529    fm: &BTreeMap<String, Value>,
530    fm_yaml: &str,
531    basenames: Option<&BasenameIndex>,
532    issues: &mut Vec<Issue>,
533    is_content: bool,
534) {
535    let type_ = fm.get("type").and_then(scalar_string);
536
537    // ── type ────────────────────────────────────────────────────────────────
538    if is_content && type_.is_none() {
539        push(
540            issues,
541            Severity::Error,
542            codes::FM_MISSING_TYPE,
543            rel,
544            fm_key_line_or_top(fm_yaml, "type"),
545            Some("type".into()),
546            "content file has no `type:`".into(),
547            Some("add a `type:` field (e.g. `type: contact`)".into()),
548            vec![],
549        );
550    }
551
552    // ── meta-type (records-only epistemic class; closed enum) ─────────────────
553    // Present-but-out-of-enum is an error; absent is fine (effective default
554    // `fact`). Sources don't normally carry one, but validating the value when
555    // present is layer-agnostic and harmless.
556    if is_content {
557        // Branch on the raw value, NOT `and_then(scalar_string)`. Pre-filtering
558        // through `scalar_string` made a list/mapping value (which returns `None`)
559        // short-circuit the whole check, so a structurally-wrong `meta-type`
560        // slipped through clean AND was silently reclassified as the default
561        // `fact` by the rest of the toolkit. Absent or explicit-`null` is fine
562        // (effective default `fact`); a present non-null value must be a scalar in
563        // the closed enum. This mirrors the sibling timestamp check below, which
564        // was already hardened against the same non-scalar escape.
565        if let Some(v) = fm.get("meta-type").filter(|v| !v.is_null()) {
566            match scalar_string(v) {
567                Some(mt) if matches!(mt.as_str(), "fact" | "operational" | "conclusion") => {}
568                Some(mt) => push(
569                    issues,
570                    Severity::Error,
571                    codes::FM_BAD_META_TYPE,
572                    rel,
573                    fm_key_line_or_top(fm_yaml, "meta-type"),
574                    Some("meta-type".into()),
575                    format!("`meta-type: {mt}` is not one of fact / operational / conclusion"),
576                    Some(
577                        "use one of: fact, operational, conclusion (or omit for the default `fact`)"
578                            .into(),
579                    ),
580                    vec![],
581                ),
582                None => push(
583                    issues,
584                    Severity::Error,
585                    codes::FM_BAD_META_TYPE,
586                    rel,
587                    fm_key_line_or_top(fm_yaml, "meta-type"),
588                    Some("meta-type".into()),
589                    "`meta-type` is not one of fact / operational / conclusion: expected a scalar \
590                     string, found a list or mapping"
591                        .to_string(),
592                    Some(
593                        "use one of: fact, operational, conclusion (or omit for the default `fact`)"
594                            .into(),
595                    ),
596                    vec![],
597                ),
598            }
599        }
600    }
601
602    // ── summary (universal on content files) ──────────────────────────────────
603    if is_content {
604        check_summary(rel, fm, fm_yaml, issues);
605    }
606
607    // ── timestamps: created / updated ─────────────────────────────────────────
608    // The `created`/`updated` contract is content-file-only; meta files
609    // (`DB.md`, `log.md`, index twins) legitimately carry no such timestamps.
610    if is_content {
611        for (key, missing_code) in [
612            ("created", codes::FM_MISSING_CREATED),
613            ("updated", codes::FM_MISSING_UPDATED),
614        ] {
615            // A key that is absent, or present-but-`null`, has *no* timestamp →
616            // `FM_MISSING_*`. The toolkit's parser also treats a null value as
617            // "no timestamp", so a null `created:` must read as missing, not
618            // silently pass.
619            let value = fm.get(key);
620            let missing = value.is_none() || value.is_some_and(Value::is_null);
621            if missing {
622                push(
623                    issues,
624                    Severity::Error,
625                    missing_code,
626                    rel,
627                    fm_key_line_or_top(fm_yaml, key),
628                    Some(key.into()),
629                    format!("content file has no `{key}:` timestamp"),
630                    Some(format!(
631                        "set `{key}` to an RFC3339 timestamp, e.g. 2026-05-27T08:00:00-07:00"
632                    )),
633                    vec![],
634                );
635            } else if let Some(v) = value {
636                // Present and non-null. A scalar is checked for ISO-8601; a
637                // sequence/mapping is not a timestamp string at all and so
638                // cannot be ISO-8601 → `FM_BAD_TIMESTAMP` (it must not slip
639                // through the way it did when `scalar_string` returned `None`
640                // and the branch silently no-oped).
641                match scalar_string(v) {
642                    Some(s) if is_iso8601(&s) => {}
643                    Some(s) => push(
644                        issues,
645                        Severity::Error,
646                        codes::FM_BAD_TIMESTAMP,
647                        rel,
648                        fm_key_line(fm_yaml, key),
649                        Some(key.into()),
650                        format!("`{key}` is not ISO-8601: {s:?}"),
651                        Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
652                        vec![],
653                    ),
654                    None => push(
655                        issues,
656                        Severity::Error,
657                        codes::FM_BAD_TIMESTAMP,
658                        rel,
659                        fm_key_line(fm_yaml, key),
660                        Some(key.into()),
661                        format!(
662                            "`{key}` is not ISO-8601: expected a timestamp string, found a list or mapping"
663                        ),
664                        Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
665                        vec![],
666                    ),
667                }
668            }
669        }
670    }
671    // ── tags shape ────────────────────────────────────────────────────────────
672    if let Some(tags) = fm.get("tags") {
673        if !is_flat_scalar_list(tags) {
674            push(
675                issues,
676                Severity::Warning,
677                codes::TAGS_MALFORMED,
678                rel,
679                fm_key_line(fm_yaml, "tags"),
680                Some("tags".into()),
681                "`tags` must be a flat YAML list of short scalar labels".into(),
682                Some("use block form: one `- <tag>` per line".into()),
683                vec![],
684            );
685        }
686    }
687
688    // ── inline flow-form wiki-link lists in frontmatter ──────────────────────
689    for key in detect_flow_form_link_lists(fm_yaml) {
690        push(
691            issues,
692            Severity::Error,
693            codes::WIKI_LINK_FLOW_FORM_LIST,
694            rel,
695            fm_key_line(fm_yaml, &key),
696            Some(key.clone()),
697            format!("`{key}` uses inline flow form `[[[a]], [[b]]]`"),
698            Some("use YAML block-sequence form: one `- [[...]]` per line".into()),
699            vec![],
700        );
701    }
702
703    // ── frontmatter wiki-link fields: doctrine + integrity ───────────────────
704    // Skip keys that have an explicit `link to` schema spec — those are checked
705    // (with prefix enforcement) in `check_schema`, and double-reporting the same
706    // link via two paths would be noise.
707    let schema_link_keys: BTreeSet<String> =
708        effective_schema(store, type_.as_deref().unwrap_or(""))
709            .map(|s| {
710                s.fields
711                    .iter()
712                    .filter(|f| f.link_prefix.is_some())
713                    .map(|f| f.name.clone())
714                    .collect()
715            })
716            .unwrap_or_default();
717    for (key, link) in frontmatter_link_fields_text(fm_yaml, 2) {
718        if schema_link_keys.contains(&key) {
719            continue;
720        }
721        check_wiki_link(
722            store,
723            rel,
724            &link,
725            Some(link.line),
726            Some(&key),
727            basenames,
728            issues,
729        );
730    }
731
732    // ── policies: ignored types ──────────────────────────────────────────────
733    if let Some(t) = &type_ {
734        if store.config.ignored_types.iter().any(|it| it == t) {
735            push(
736                issues,
737                Severity::Info,
738                codes::POLICY_IGNORED_TYPE_PRESENT,
739                rel,
740                fm_key_line(fm_yaml, "type"),
741                Some("type".into()),
742                format!("file has ignored type `{t}` (per DB.md ## Policies)"),
743                Some(
744                    "change the `type`, or remove it from DB.md `### Ignored types` if it should be managed"
745                        .into(),
746                ),
747                // The policy source: `DB.md` declares the ignored type.
748                vec![PathBuf::from("DB.md")],
749            );
750        }
751        // A conclusion record (`meta-type: conclusion`) deriving from an
752        // ignored-type record → warning. The decision lives in the shared
753        // `derived_from_ignored_type` entry point; this side only supplies the
754        // `derived_from` targets (with their line, which the issue carries) and
755        // renders the finding.
756        let meta_type = fm
757            .get("meta-type")
758            .and_then(scalar_string)
759            .unwrap_or_else(|| "fact".to_string());
760        for link in frontmatter_links_for_key(fm_yaml, "derived_from", 2) {
761            if let Some(hit) =
762                derived_from_ignored_type(store, &meta_type, std::iter::once(link.target.as_str()))
763            {
764                push(
765                    issues,
766                    Severity::Warning,
767                    codes::POLICY_IGNORED_TYPE_DERIVED,
768                    rel,
769                    Some(link.line),
770                    Some("derived_from".into()),
771                    format!(
772                        "conclusion record derives from ignored-type record `{}` (type `{}`)",
773                        hit.target, hit.target_type
774                    ),
775                    Some(
776                        "drop this `derived_from` link, or remove the target type from DB.md `### Ignored types`"
777                            .into(),
778                    ),
779                    // The ignored-type source record, plus `DB.md` (the policy
780                    // source that lists the ignored type).
781                    vec![
782                        PathBuf::from(format!("{}.md", hit.target)),
783                        PathBuf::from("DB.md"),
784                    ],
785                );
786            }
787        }
788    }
789
790    // ── schema enforcement: DB.md ## Schemas (the only schema source) ─────────
791    if let Some(t) = &type_ {
792        if let Some(schema) = effective_schema(store, t) {
793            check_schema(store, rel, fm, fm_yaml, &schema, issues);
794        }
795    }
796}
797
798/// `summary` rules: required, non-empty, single-line, ≤ 200 chars.
799fn check_summary(rel: &Path, fm: &BTreeMap<String, Value>, fm_yaml: &str, issues: &mut Vec<Issue>) {
800    let line = fm_key_line(fm_yaml, "summary");
801    match fm.get("summary") {
802        None => push(
803            issues,
804            Severity::Error,
805            codes::SUMMARY_MISSING,
806            rel,
807            // A missing `summary` key has no line of its own → anchor to the
808            // frontmatter block top (line 1), the EXPECTED field-absence rule.
809            fm_key_line_or_top(fm_yaml, "summary"),
810            Some("summary".into()),
811            "content file has no `summary`".into(),
812            Some("run `dbmd fm init`".into()),
813            vec![],
814        ),
815        Some(v) => {
816            let s = scalar_string(v).unwrap_or_default();
817            if s.trim().is_empty() {
818                push(
819                    issues,
820                    Severity::Error,
821                    codes::SUMMARY_EMPTY,
822                    rel,
823                    line,
824                    Some("summary".into()),
825                    "`summary` is present but empty".into(),
826                    Some("write a one-line summary, or run `dbmd fm init`".into()),
827                    vec![],
828                );
829            } else if s.contains('\n') {
830                push(
831                    issues,
832                    Severity::Error,
833                    codes::SUMMARY_MULTILINE,
834                    rel,
835                    line,
836                    Some("summary".into()),
837                    "`summary` must be one line (contains a newline)".into(),
838                    Some("collapse the summary to a single line".into()),
839                    vec![],
840                );
841            } else if s.chars().count() > MAX_SUMMARY_LEN {
842                push(
843                    issues,
844                    Severity::Warning,
845                    codes::SUMMARY_TOO_LONG,
846                    rel,
847                    line,
848                    Some("summary".into()),
849                    format!(
850                        "`summary` is {} chars (> {MAX_SUMMARY_LEN})",
851                        s.chars().count()
852                    ),
853                    Some(format!("trim the summary to ≤ {MAX_SUMMARY_LEN} chars")),
854                    vec![],
855                );
856            }
857        }
858    }
859}
860
861/// Wiki-link checks for a body. Per-link doctrine (`WIKI_LINK_*`).
862fn check_body_wiki_links(
863    store: &Store,
864    rel: &Path,
865    body: &str,
866    fm_end_line: u32,
867    basenames: Option<&BasenameIndex>,
868    issues: &mut Vec<Issue>,
869) {
870    for link in extract_wiki_links(body) {
871        // Body lines are offset past the frontmatter block. `link.line` is
872        // 1-based within `body`; the body starts at `fm_end_line + 1`.
873        let abs_line = fm_end_line + link.line;
874        check_wiki_link(store, rel, &link, Some(abs_line), None, basenames, issues);
875    }
876}
877
878/// A store-wide map from a file's bare basename (its stem, no `.md`) to every
879/// store-relative path carrying that basename. Built once per `validate --all`
880/// sweep so the short-form wiki-link check can distinguish a merely short-form
881/// target (`WIKI_LINK_SHORT_FORM`) from one that is *ambiguous* because the bare
882/// basename matches two or more files (`WIKI_LINK_AMBIGUOUS`, the defensive
883/// code). `None` in the working-set path — that loop is O(changed) and never
884/// walks the store, so it reports the plain short-form error without the scan.
885type BasenameIndex = HashMap<String, Vec<PathBuf>>;
886
887/// Build the [`BasenameIndex`] from the swept file list (already gathered by
888/// `validate_all`; no extra walk).
889fn build_basename_index(files: &[PathBuf]) -> BasenameIndex {
890    let mut idx: BasenameIndex = HashMap::new();
891    for rel in files {
892        if let Some(stem) = rel.file_stem().and_then(|s| s.to_str()) {
893            idx.entry(stem.to_string()).or_default().push(rel.clone());
894        }
895    }
896    idx
897}
898
899/// The shared per-wiki-link doctrine + integrity check used by both body links
900/// and frontmatter link-fields. `basenames` is `Some` only in the `--all`
901/// sweep, where a no-slash short-form target is upgraded to `WIKI_LINK_AMBIGUOUS`
902/// when its bare basename matches ≥2 files.
903fn check_wiki_link(
904    store: &Store,
905    rel: &Path,
906    link: &Link,
907    line: Option<u32>,
908    key: Option<&str>,
909    basenames: Option<&BasenameIndex>,
910    issues: &mut Vec<Issue>,
911) {
912    let bare = link.target.trim_end_matches(".md");
913
914    // Short-form: not a full store-relative path (no `/`, or first segment isn't
915    // a known layer).
916    if !is_full_store_path(bare) {
917        // Ambiguous (defensive) takes precedence over plain short-form when the
918        // target is a bare basename (no `/`) that matches ≥2 files in the store.
919        // Only computable in the sweep (where `basenames` is populated); the
920        // working-set path falls through to the plain short-form error.
921        if !bare.contains('/') {
922            if let Some(idx) = basenames {
923                if let Some(matches) = idx.get(bare) {
924                    if matches.len() >= 2 {
925                        let mut related = matches.clone();
926                        related.sort();
927                        push(
928                            issues,
929                            Severity::Error,
930                            codes::WIKI_LINK_AMBIGUOUS,
931                            rel,
932                            line,
933                            key.map(str::to_string),
934                            format!(
935                                "short-form wiki-link `[[{}]]` matches multiple files",
936                                link.target
937                            ),
938                            Some("use the full store-relative path to disambiguate".into()),
939                            related,
940                        );
941                        return;
942                    }
943                }
944            }
945        }
946        push(
947            issues,
948            Severity::Error,
949            codes::WIKI_LINK_SHORT_FORM,
950            rel,
951            line,
952            key.map(str::to_string),
953            format!(
954                "wiki-link `[[{}]]` is not a full store-relative path",
955                link.target
956            ),
957            short_form_suggestion(bare),
958            vec![],
959        );
960        // Don't also report broken; the agent must fix the form first.
961        return;
962    }
963
964    // `.md` extension → warning, then still check existence.
965    if link.target.ends_with(".md") {
966        push(
967            issues,
968            Severity::Warning,
969            codes::WIKI_LINK_HAS_EXTENSION,
970            rel,
971            line,
972            key.map(str::to_string),
973            format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
974            Some(format!("drop the extension: [[{bare}]]")),
975            vec![],
976        );
977    }
978
979    // Broken: target file doesn't exist (O(1) stat). Resolve the target the
980    // same way the graph engine does — the literal path first (so a link to a
981    // raw `.eml`/`.pdf` source kept verbatim under `sources/` resolves), then
982    // the `.md`-appended path.
983    match resolve_wiki_target(store, bare) {
984        TargetResolution::Exists => {}
985        TargetResolution::Missing => push(
986            issues,
987            Severity::Error,
988            codes::WIKI_LINK_BROKEN,
989            rel,
990            line,
991            key.map(str::to_string),
992            format!("wiki-link target `{bare}` doesn't exist"),
993            Some(format!(
994                "create `{bare}.md`, or point the link at an existing file"
995            )),
996            vec![],
997        ),
998        TargetResolution::Unsafe => push(
999            issues,
1000            Severity::Error,
1001            codes::WIKI_LINK_BROKEN,
1002            rel,
1003            line,
1004            key.map(str::to_string),
1005            format!("wiki-link target `{bare}` is not a safe store-relative path"),
1006            Some("use a full store-relative path under sources/ or records/".into()),
1007            vec![],
1008        ),
1009    }
1010}
1011
1012// ─────────────────────────────────────────────────────────────────────────────
1013//  Schema enforcement (user-declared DB.md ## Schemas — the only source)
1014// ─────────────────────────────────────────────────────────────────────────────
1015
1016/// The effective schema for a type: the store's explicit `DB.md ## Schemas`
1017/// block, or `None`. This is the **only** source of schema enforcement — the
1018/// toolkit ships no implicit or built-in per-type schema (SPEC § Schemas). A
1019/// store that wants its `contact` / `expense` / etc. fields enforced declares
1020/// them in `## Schemas`; the example schema pack in SPEC § Example types is a
1021/// copy-in starting point.
1022fn effective_schema(store: &Store, type_: &str) -> Option<Schema> {
1023    store.config.schemas.get(type_).cloned()
1024}
1025
1026/// Validate a file's frontmatter against a schema's [`FieldSpec`]s.
1027fn check_schema(
1028    store: &Store,
1029    rel: &Path,
1030    fm: &BTreeMap<String, Value>,
1031    fm_yaml: &str,
1032    schema: &Schema,
1033    issues: &mut Vec<Issue>,
1034) {
1035    for spec in &schema.fields {
1036        let present = fm.get(&spec.name);
1037        let line = fm_key_line(fm_yaml, &spec.name);
1038
1039        // Required. "Empty" means: the key is absent, or its value carries no
1040        // content — a YAML `null` (`name:`), an empty list (`name: []`), an
1041        // empty mapping (`name: {}`), or a blank/whitespace-only scalar
1042        // (`name: ""`). `scalar_string` returns `None` for null/list/mapping, so
1043        // a bare `.unwrap_or(false)` wrongly treated those as non-empty and let
1044        // a required field with a null or empty-collection value pass silently;
1045        // route them through `is_empty_value` instead.
1046        let is_empty = match present {
1047            None => true,
1048            Some(v) => is_empty_value(v),
1049        };
1050        if spec.required && is_empty {
1051            push(
1052                issues,
1053                Severity::Error,
1054                codes::SCHEMA_MISSING_REQUIRED,
1055                rel,
1056                // Absent key → anchor to the frontmatter top (line 1); a
1057                // present-but-empty value keeps its own line.
1058                fm_key_line_or_top(fm_yaml, &spec.name),
1059                Some(spec.name.clone()),
1060                format!("required field `{}` is absent or empty", spec.name),
1061                Some(format!("set `{}` to a non-empty value", spec.name)),
1062                vec![],
1063            );
1064            continue;
1065        }
1066        let Some(value) = present else { continue };
1067
1068        // An OPTIONAL field that is `null` or empty is simply unset — there is
1069        // no value to shape/enum/link-check. (The required+empty case already
1070        // returned above as `SCHEMA_MISSING_REQUIRED`.) Without this, an
1071        // `paid_at: null` on an `invoice` whose schema marks `paid_at (date)`
1072        // would wrongly fire `SCHEMA_SHAPE_MISMATCH` against the empty string.
1073        let value_empty = value.is_null()
1074            || scalar_string(value)
1075                .map(|s| s.trim().is_empty())
1076                .unwrap_or(false);
1077        if !spec.required && value_empty {
1078            continue;
1079        }
1080
1081        // link to <prefix>/ — extract the link target(s) from the raw frontmatter
1082        // text (unquoted `[[...]]` is a YAML nested-sequence, not a string).
1083        if let Some(prefix) = &spec.link_prefix {
1084            check_schema_link(store, rel, &spec.name, fm_yaml, prefix, line, issues);
1085            continue; // a link field is never also shape/enum-checked
1086        }
1087
1088        // A shape- or enum-constrained field expects a SCALAR. A YAML sequence
1089        // or mapping satisfies neither, and would otherwise slip through both
1090        // checks (`scalar_string` returns `None` for non-scalars, so the enum
1091        // and shape bodies silently no-op). Flag it as a shape mismatch rather
1092        // than let a structurally-wrong value validate clean. (Link fields,
1093        // which legitimately take block-form sequences, already `continue`d.)
1094        if (spec.shape.is_some() || spec.enum_values.is_some()) && scalar_string(value).is_none() {
1095            push(
1096                issues,
1097                Severity::Error,
1098                codes::SCHEMA_SHAPE_MISMATCH,
1099                rel,
1100                line,
1101                Some(spec.name.clone()),
1102                format!(
1103                    "`{}` must be a scalar value, found a list or mapping",
1104                    spec.name
1105                ),
1106                Some(format!("set `{}` to a single scalar value", spec.name)),
1107                vec![],
1108            );
1109            continue;
1110        }
1111
1112        // enum
1113        if let Some(allowed) = &spec.enum_values {
1114            if let Some(s) = scalar_string(value) {
1115                if !allowed.iter().any(|a| a == &s) {
1116                    push(
1117                        issues,
1118                        Severity::Error,
1119                        codes::SCHEMA_ENUM_VIOLATION,
1120                        rel,
1121                        line,
1122                        Some(spec.name.clone()),
1123                        format!("`{}` value {s:?} not in enum {allowed:?}", spec.name),
1124                        Some(format!("use one of: {}", allowed.join(", "))),
1125                        vec![],
1126                    );
1127                }
1128            }
1129            continue;
1130        }
1131
1132        // shape
1133        if let Some(shape) = spec.shape {
1134            check_schema_shape(rel, &spec.name, value, shape, line, issues);
1135        }
1136    }
1137}
1138
1139/// `link to <prefix>/` enforcement: the value must be a wiki-link whose target
1140/// starts with `<prefix>`. Reads the link target(s) from the raw frontmatter
1141/// text so unquoted `field: [[...]]` (a YAML nested-sequence, not a string) is
1142/// recognized exactly like the quoted form.
1143fn check_schema_link(
1144    store: &Store,
1145    rel: &Path,
1146    field: &str,
1147    fm_yaml: &str,
1148    prefix: &Path,
1149    line: Option<u32>,
1150    issues: &mut Vec<Issue>,
1151) {
1152    let prefix_str = prefix.to_string_lossy();
1153    let prefix_str = prefix_str.trim_end_matches('/');
1154    let suggestion = |target_leaf: &str| {
1155        Some(format!(
1156            "expected `link to {prefix_str}/`; replace with [[{prefix_str}/{target_leaf}]]"
1157        ))
1158    };
1159
1160    let links = frontmatter_links_for_key(fm_yaml, field, 2);
1161    if links.is_empty() {
1162        // No wiki-link in the field's value → it's a plain string.
1163        let raw = frontmatter_raw_value_for_key(fm_yaml, field, 2).unwrap_or_default();
1164        let raw = raw.trim().trim_matches('"').trim_matches('\'').trim();
1165        let leaf = slugish(raw);
1166        push(
1167            issues,
1168            Severity::Error,
1169            codes::SCHEMA_LINK_PREFIX_MISMATCH,
1170            rel,
1171            line,
1172            Some(field.to_string()),
1173            format!(
1174                "`{field}` is a plain string {raw:?}, expected a wiki-link under `{prefix_str}/`"
1175            ),
1176            suggestion(&leaf),
1177            vec![],
1178        );
1179        return;
1180    }
1181
1182    for link in links {
1183        if link.target.ends_with(".md") {
1184            let bare = link.target.trim_end_matches(".md");
1185            push(
1186                issues,
1187                Severity::Warning,
1188                codes::WIKI_LINK_HAS_EXTENSION,
1189                rel,
1190                Some(link.line),
1191                Some(field.to_string()),
1192                format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
1193                Some(format!("drop the extension: [[{bare}]]")),
1194                vec![],
1195            );
1196        }
1197        let bare = link.target.trim_end_matches(".md");
1198        if !path_under_prefix(bare, prefix_str) {
1199            let leaf = bare.rsplit('/').next().unwrap_or(bare);
1200            push(
1201                issues,
1202                Severity::Error,
1203                codes::SCHEMA_LINK_PREFIX_MISMATCH,
1204                rel,
1205                line,
1206                Some(field.to_string()),
1207                format!("`{field}` target `{bare}` is not under `{prefix_str}/`"),
1208                suggestion(leaf),
1209                vec![],
1210            );
1211        } else {
1212            // Correct prefix — still surface a broken target so the agent sees
1213            // one consistent vocabulary. Resolve like the graph engine (literal
1214            // path first, then `.md`) so a `link to sources/` field pointing at a
1215            // raw `.eml`/`.pdf` source isn't wrongly flagged broken.
1216            match resolve_wiki_target(store, bare) {
1217                TargetResolution::Exists => {}
1218                TargetResolution::Missing => push(
1219                    issues,
1220                    Severity::Error,
1221                    codes::WIKI_LINK_BROKEN,
1222                    rel,
1223                    line,
1224                    Some(field.to_string()),
1225                    format!("wiki-link target `{bare}` doesn't exist"),
1226                    Some(format!(
1227                        "create `{bare}.md`, or point the link at an existing file"
1228                    )),
1229                    vec![],
1230                ),
1231                TargetResolution::Unsafe => push(
1232                    issues,
1233                    Severity::Error,
1234                    codes::WIKI_LINK_BROKEN,
1235                    rel,
1236                    line,
1237                    Some(field.to_string()),
1238                    format!("wiki-link target `{bare}` is not a safe store-relative path"),
1239                    Some("use a full store-relative path under sources/ or records/".into()),
1240                    vec![],
1241                ),
1242            }
1243        }
1244    }
1245}
1246
1247/// Shape enforcement for a non-link, non-enum schema field.
1248fn check_schema_shape(
1249    rel: &Path,
1250    field: &str,
1251    value: &Value,
1252    shape: Shape,
1253    line: Option<u32>,
1254    issues: &mut Vec<Issue>,
1255) {
1256    let s = scalar_string(value).unwrap_or_default();
1257    let ok = match shape {
1258        Shape::String => true, // any scalar string
1259        Shape::Int => value.is_i64() || value.is_u64() || s.trim().parse::<i64>().is_ok(),
1260        Shape::Bool => value.is_bool() || matches!(s.trim(), "true" | "false"),
1261        Shape::Date => is_iso8601_date_or_datetime(&s),
1262        Shape::Email => is_email(&s),
1263        Shape::Currency => is_currency(&s),
1264        Shape::Url => is_url(&s),
1265    };
1266    if !ok {
1267        push(
1268            issues,
1269            Severity::Error,
1270            codes::SCHEMA_SHAPE_MISMATCH,
1271            rel,
1272            line,
1273            Some(field.to_string()),
1274            format!("`{field}` value {s:?} doesn't match shape {shape:?}"),
1275            Some(shape_suggestion(shape)),
1276            vec![],
1277        );
1278    }
1279}
1280
1281// ─────────────────────────────────────────────────────────────────────────────
1282//  Cross-file: entity-dedup collisions (validate_all only)
1283// ─────────────────────────────────────────────────────────────────────────────
1284
1285/// Hard `DUP_ID` + the soft, schema-declared `DUP_UNIQUE_KEY` collisions.
1286///
1287/// `DUP_ID` is universal (two files with the same explicit `id`).
1288/// `DUP_UNIQUE_KEY` is driven entirely by the store's `DB.md ## Schemas`: each
1289/// `- unique: <field>[, <field> …]` directive on a `### <type>` declares a
1290/// uniqueness constraint, and two records of that type whose declared values
1291/// collide warn. No type carries a built-in dedup key — the store opts in.
1292///
1293/// **Reporting precedence (rule #1 in `corpus-b-edges/EXPECTED/README.md`):** a
1294/// collision group of N files yields exactly ONE issue, not N. Its `file` is the
1295/// lexicographically smallest store-relative path in the group (a total order →
1296/// deterministic); `related` is the rest, sorted. A single-field key anchors to
1297/// that field's line on the reported file and carries it as `key`; a multi-field
1298/// key anchors to line 1 with a null key.
1299fn check_duplicates(store: &Store, parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
1300    // Path → frontmatter YAML, for resolving the anchor field's line on the
1301    // reported (smallest-path) member.
1302    let fm_yaml_of: HashMap<&PathBuf, &str> = parsed
1303        .iter()
1304        .map(|(rel, p)| (rel, p.fm_yaml.as_str()))
1305        .collect();
1306
1307    // ── DUP_ID (hard error): two files with the same explicit `id`. ──────────
1308    let mut by_id: HashMap<String, Vec<PathBuf>> = HashMap::new();
1309    for (rel, p) in parsed {
1310        if let Some(map) = &p.fm {
1311            if let Some(id) = map.get("id").and_then(scalar_string) {
1312                if !id.trim().is_empty() {
1313                    by_id.entry(id).or_default().push(rel.clone());
1314                }
1315            }
1316        }
1317    }
1318    for (id, files) in &by_id {
1319        if files.len() > 1 {
1320            let (reported, related) = canonical_and_related(files);
1321            let line = fm_yaml_of.get(&reported).and_then(|y| fm_key_line(y, "id"));
1322            push(
1323                issues,
1324                Severity::Error,
1325                codes::DUP_ID,
1326                &reported,
1327                line,
1328                Some("id".into()),
1329                format!("id {id:?} is declared by more than one file"),
1330                Some("give each file a unique `id` (or drop it to derive from the path)".into()),
1331                related,
1332            );
1333        }
1334    }
1335
1336    // ── DUP_UNIQUE_KEY (warning): schema-declared `unique:` collisions. ───────
1337    // Every constraint comes from the store's `## Schemas`; a type with no
1338    // `unique:` directive is never dedup-checked. Iteration over the BTreeMap is
1339    // key-ordered, so emitted issues are deterministic across runs.
1340    for (type_name, schema) in &store.config.schemas {
1341        for key_fields in &schema.unique_keys {
1342            soft_dup(parsed, issues, type_name, key_fields, &fm_yaml_of);
1343        }
1344    }
1345}
1346
1347/// Emit ONE `DUP_UNIQUE_KEY` warning per group of ≥2 files of `type_` whose
1348/// declared `key_fields` render to the same token tuple. Files missing any key
1349/// field are skipped — an incomplete key is never a collision.
1350///
1351/// Per reporting rule #1 the issue is keyed on the lexicographically smallest
1352/// store-relative path; `related` is the rest. A single-field key anchors to
1353/// that field's line on the reported file and carries it as `key`; a multi-field
1354/// key anchors to line 1 with a null key. `fm_yaml_of` resolves the field line.
1355fn soft_dup(
1356    parsed: &[(PathBuf, Parsed)],
1357    issues: &mut Vec<Issue>,
1358    type_: &str,
1359    key_fields: &[String],
1360    fm_yaml_of: &HashMap<&PathBuf, &str>,
1361) {
1362    if key_fields.is_empty() {
1363        return;
1364    }
1365    let mut groups: HashMap<Vec<String>, Vec<PathBuf>> = HashMap::new();
1366    for (rel, p) in parsed {
1367        let is_type =
1368            p.fm.as_ref()
1369                .and_then(|m| m.get("type"))
1370                .and_then(scalar_string)
1371                .map(|t| t == type_)
1372                .unwrap_or(false);
1373        if !is_type {
1374            continue;
1375        }
1376        if let Some(key) = dedup_key(p, key_fields) {
1377            groups.entry(key).or_default().push(rel.clone());
1378        }
1379    }
1380    // HashMap iteration is nondeterministic; sort by reported member so the
1381    // emitted issue order is stable across runs.
1382    let mut collisions: Vec<(PathBuf, Vec<PathBuf>)> = groups
1383        .values()
1384        .filter(|files| files.len() > 1)
1385        .map(|files| canonical_and_related(files))
1386        .collect();
1387    collisions.sort_by(|a, b| a.0.cmp(&b.0));
1388
1389    let fields_disp = key_fields.join(", ");
1390    for (reported, related) in collisions {
1391        // Single-field keys anchor to the field's line + carry the key; multi-
1392        // field keys anchor to line 1 with a null key.
1393        let (line, key) = if key_fields.len() == 1 {
1394            (
1395                fm_yaml_of
1396                    .get(&reported)
1397                    .and_then(|y| fm_key_line(y, &key_fields[0])),
1398                Some(key_fields[0].clone()),
1399            )
1400        } else {
1401            (Some(1), None)
1402        };
1403        let n = related.len();
1404        push(
1405            issues,
1406            Severity::Warning,
1407            codes::DUP_UNIQUE_KEY,
1408            &reported,
1409            line,
1410            key,
1411            format!("`{type_}` unique key ({fields_disp}) collides with {n} other record(s)"),
1412            Some("merge with `dbmd rename`, or cross-link with `dbmd link`".into()),
1413            related,
1414        );
1415    }
1416}
1417
1418/// Render a type's `unique:` key for one file: each field's dedup token in
1419/// order, or `None` if any field is absent/empty (an incomplete key never
1420/// collides).
1421fn dedup_key(p: &Parsed, key_fields: &[String]) -> Option<Vec<String>> {
1422    let mut out = Vec::with_capacity(key_fields.len());
1423    for f in key_fields {
1424        out.push(dedup_token(p, f)?);
1425    }
1426    Some(out)
1427}
1428
1429/// One field's normalized dedup token, or `None` when absent/empty. Wiki-link
1430/// values (single or block-sequence list) reduce to their lower-cased target
1431/// path(s); a list collapses to a sorted, de-duplicated set so item order never
1432/// matters. Plain scalars (and YAML scalar lists) lower-case and trim.
1433fn dedup_token(p: &Parsed, field: &str) -> Option<String> {
1434    // Wiki-links first — read from the raw frontmatter text so the unquoted
1435    // `field: [[...]]` (a YAML nested-sequence, not a string) is handled.
1436    let links = frontmatter_links_for_key(&p.fm_yaml, field, 2);
1437    if !links.is_empty() {
1438        let set: BTreeSet<String> = links
1439            .into_iter()
1440            .map(|l| l.target.trim_end_matches(".md").to_lowercase())
1441            .filter(|t| !t.is_empty())
1442            .collect();
1443        return if set.is_empty() {
1444            None
1445        } else {
1446            Some(set.into_iter().collect::<Vec<_>>().join(","))
1447        };
1448    }
1449    match p.fm.as_ref()?.get(field) {
1450        Some(Value::Sequence(items)) => {
1451            let set: BTreeSet<String> = items
1452                .iter()
1453                .filter_map(scalar_string)
1454                .map(|s| s.trim().to_lowercase())
1455                .filter(|t| !t.is_empty())
1456                .collect();
1457            if set.is_empty() {
1458                None
1459            } else {
1460                Some(set.into_iter().collect::<Vec<_>>().join(","))
1461            }
1462        }
1463        Some(v) => {
1464            let s = scalar_string(v)?.trim().to_lowercase();
1465            if s.is_empty() {
1466                None
1467            } else {
1468                Some(s)
1469            }
1470        }
1471        None => None,
1472    }
1473}
1474
1475/// Split a non-empty collision group into `(reported, related)`: the
1476/// lexicographically smallest store-relative path is the reported member; the
1477/// rest, sorted ascending, are `related`. Deterministic because store-relative
1478/// path is a total order — the property reporting rule #1 relies on.
1479fn canonical_and_related(files: &[PathBuf]) -> (PathBuf, Vec<PathBuf>) {
1480    let mut sorted = files.to_vec();
1481    sorted.sort();
1482    let reported = sorted[0].clone();
1483    let related = sorted[1..].to_vec();
1484    (reported, related)
1485}
1486
1487// ─────────────────────────────────────────────────────────────────────────────
1488//  Cross-file: hierarchical index.md + index.jsonl sync (validate_all only)
1489// ─────────────────────────────────────────────────────────────────────────────
1490
1491/// All `INDEX_*` and `INDEX_JSONL_*` checks across the three canonical levels.
1492fn check_indexes(store: &Store, files: &[PathBuf], issues: &mut Vec<Issue>) {
1493    // Group content files by their immediate parent folder (the type-folder,
1494    // *across date shards* — a sharded file's "type folder" is the folder right
1495    // under the layer). We key on the type-folder so shards roll up correctly.
1496    let mut type_folders: BTreeMap<PathBuf, Vec<PathBuf>> = BTreeMap::new();
1497    let mut layers_present: BTreeSet<&'static str> = BTreeSet::new();
1498    for rel in files {
1499        // The layer is the first path component — recorded independently of the
1500        // type-folder so a layer containing only loose files still requires an
1501        // `index.md`.
1502        if let Some(layer) = rel.iter().next().and_then(|s| s.to_str()) {
1503            match layer {
1504                "sources" => layers_present.insert("sources"),
1505                "records" => layers_present.insert("records"),
1506                _ => false,
1507            };
1508        }
1509        if let Some(tf) = type_folder_of(rel) {
1510            type_folders.entry(tf).or_default().push(rel.clone());
1511        }
1512    }
1513
1514    // ── Root index.md ─────────────────────────────────────────────────────────
1515    // The root `index.md` is a TYPE-FOLDER rollup, so it is required only when
1516    // the store has type-folder content. A store whose only content is loose
1517    // files (directly at a layer root) is catalogued by its layer `index.jsonl`
1518    // and has nothing to roll up, so the absence of a root `index.md` is not a
1519    // defect — but if one exists, scope-check it.
1520    {
1521        let root_index = store.root.join("index.md");
1522        if root_index.is_file() {
1523            check_index_scope(store, Path::new("index.md"), "root", None, issues);
1524        } else if !type_folders.is_empty() {
1525            push(
1526                issues,
1527                Severity::Error,
1528                codes::INDEX_MISSING,
1529                Path::new("index.md"),
1530                None,
1531                None,
1532                "store has files but no root `index.md`".into(),
1533                Some("run `dbmd index rebuild`".into()),
1534                vec![],
1535            );
1536        }
1537    }
1538
1539    // ── Layer index.md ────────────────────────────────────────────────────────
1540    // A layer `index.md` is the rollup of that layer's type-folders, so it is
1541    // required only when the layer HAS type-folders. A layer whose only content
1542    // is loose files is catalogued by its own `index.jsonl` (checked below) and
1543    // needs no rollup; demanding one there was a false `INDEX_MISSING`.
1544    for layer in &layers_present {
1545        let layer_index_rel = PathBuf::from(layer).join("index.md");
1546        let abs = store.root.join(&layer_index_rel);
1547        let layer_has_type_folders = type_folders.keys().any(|tf| tf.starts_with(layer));
1548        if abs.is_file() {
1549            check_index_scope(store, &layer_index_rel, "layer", Some(layer), issues);
1550        } else if layer_has_type_folders {
1551            push(
1552                issues,
1553                Severity::Error,
1554                codes::INDEX_MISSING,
1555                &layer_index_rel,
1556                None,
1557                None,
1558                format!("layer `{layer}/` has files but no `index.md`"),
1559                Some("run `dbmd index rebuild`".into()),
1560                vec![],
1561            );
1562        }
1563    }
1564
1565    // ── Type-folder index.md + index.jsonl ───────────────────────────────────
1566    for (tf, members) in &type_folders {
1567        let index_md_rel = tf.join("index.md");
1568        let index_md_abs = store.root.join(&index_md_rel);
1569        let index_md_present = index_md_abs.is_file();
1570        if !index_md_present {
1571            // The whole folder index is absent → a single `INDEX_MISSING` keyed
1572            // on the FOLDER (not the would-be `index.md` path). When the index is
1573            // entirely missing we do NOT additionally evaluate per-entry
1574            // completeness or the `index.jsonl` twin: one `INDEX_MISSING` covers
1575            // the folder (precedence rule #4 in `corpus-b-edges/EXPECTED`).
1576            push(
1577                issues,
1578                Severity::Error,
1579                codes::INDEX_MISSING,
1580                tf,
1581                None,
1582                None,
1583                format!("non-empty folder `{}` has no index.md", tf.display()),
1584                Some(format!(
1585                    "run `dbmd index rebuild --folder {}`",
1586                    tf.display()
1587                )),
1588                vec![],
1589            );
1590            continue;
1591        }
1592
1593        check_index_scope(store, &index_md_rel, "type-folder", tf.to_str(), issues);
1594        check_type_folder_index_md(store, tf, &index_md_rel, members, issues);
1595
1596        // index.jsonl twin — must exist and be complete (uncapped). Only checked
1597        // when the `index.md` is present (above): a folder whose entire index is
1598        // missing is one `INDEX_MISSING`, not also an `INDEX_JSONL_MISSING`.
1599        let jsonl_rel = tf.join("index.jsonl");
1600        let jsonl_abs = store.root.join(&jsonl_rel);
1601        if !jsonl_abs.is_file() {
1602            push(
1603                issues,
1604                Severity::Error,
1605                codes::INDEX_JSONL_MISSING,
1606                &jsonl_rel,
1607                None,
1608                None,
1609                format!("type-folder `{}/` has no `index.jsonl` twin", tf.display()),
1610                Some("run `dbmd index rebuild`".into()),
1611                vec![],
1612            );
1613        } else {
1614            check_type_folder_index_jsonl(store, tf, &jsonl_rel, members, issues);
1615        }
1616    }
1617
1618    // ── Loose files: content directly at a layer root (no type-folder). ──────
1619    // They are catalogued in the layer's own `index.jsonl` (the layer `index.md`
1620    // stays a type-folder rollup), so structured reads — `query`, dedup, `graph`
1621    // — see them the same way they see canonical files. Require that sidecar and
1622    // sync-check it, so a loose file is never silently absent from the catalog.
1623    // Only genuinely-loose files land here: `type_folder_of` already grouped
1624    // every file two-or-more levels under a layer into its type-folder above.
1625    let mut loose_by_layer: BTreeMap<PathBuf, Vec<PathBuf>> = BTreeMap::new();
1626    for rel in files {
1627        if !is_content_file(rel) || type_folder_of(rel).is_some() {
1628            continue;
1629        }
1630        if let Some(layer_dir) = loose_layer_dir(rel) {
1631            loose_by_layer
1632                .entry(layer_dir)
1633                .or_default()
1634                .push(rel.clone());
1635        }
1636    }
1637    for (layer_dir, members) in &loose_by_layer {
1638        let jsonl_rel = layer_dir.join("index.jsonl");
1639        if !store.root.join(&jsonl_rel).is_file() {
1640            push(
1641                issues,
1642                Severity::Error,
1643                codes::INDEX_JSONL_MISSING,
1644                &jsonl_rel,
1645                None,
1646                None,
1647                format!(
1648                    "loose files at `{}/` are not catalogued — the layer has no `index.jsonl`",
1649                    layer_dir.display()
1650                ),
1651                Some("run `dbmd index rebuild`".into()),
1652                members.clone(),
1653            );
1654        } else {
1655            // `check_type_folder_index_jsonl` ignores its `tf` arg (`let _ = tf`)
1656            // and only checks jsonl-vs-files-vs-frontmatter — exactly the layer
1657            // sidecar's contract, so it is reused verbatim.
1658            check_type_folder_index_jsonl(store, layer_dir, &jsonl_rel, members, issues);
1659        }
1660    }
1661
1662    // ── Orphan index.md: an index file in a folder with no content. ──────────
1663    for rel in walk_index_files(&store.root) {
1664        let parent = rel.parent().unwrap_or(Path::new("")).to_path_buf();
1665        let parent_str = parent.to_string_lossy().to_string();
1666        let is_canonical = parent_str.is_empty() // root
1667            || matches!(parent_str.as_str(), "sources" | "records")
1668            || type_folders.contains_key(&parent);
1669        if !is_canonical {
1670            push(
1671                issues,
1672                Severity::Warning,
1673                codes::INDEX_ORPHAN,
1674                &rel,
1675                None,
1676                None,
1677                format!(
1678                    "`{}` sits in an empty or non-canonical folder",
1679                    rel.display()
1680                ),
1681                Some("remove it, or run `dbmd index rebuild`".into()),
1682                vec![],
1683            );
1684        }
1685    }
1686}
1687
1688/// Check a type-folder `index.md`'s entries against the folder's actual files:
1689/// stale entries (target gone), missing entries (file not listed), and
1690/// summary mismatches.
1691fn check_type_folder_index_md(
1692    store: &Store,
1693    tf: &Path,
1694    index_rel: &Path,
1695    members: &[PathBuf],
1696    issues: &mut Vec<Issue>,
1697) {
1698    let abs = store.root.join(index_rel);
1699    let Ok(text) = std::fs::read_to_string(&abs) else {
1700        return;
1701    };
1702    let entries = parse_index_entries(&text);
1703
1704    let listed: BTreeSet<PathBuf> = entries
1705        .iter()
1706        .map(|e| PathBuf::from(e.target.trim_end_matches(".md")))
1707        .collect();
1708
1709    // Stale entries + summary mismatch.
1710    for entry in &entries {
1711        let bare = entry.target.trim_end_matches(".md");
1712        // Resolve like the graph engine (literal path first, then `.md`) so an
1713        // index entry naming a raw `.eml`/`.pdf` source isn't reported stale.
1714        let target_abs = match resolved_target_abs(store, bare) {
1715            Some(abs) => abs,
1716            None => {
1717                if matches!(resolve_wiki_target(store, bare), TargetResolution::Unsafe) {
1718                    push(
1719                        issues,
1720                        Severity::Error,
1721                        codes::INDEX_STALE_ENTRY,
1722                        index_rel,
1723                        Some(entry.line),
1724                        None,
1725                        format!("index entry `[[{bare}]]` is not a safe store-relative path"),
1726                        Some("run `dbmd index rebuild`".into()),
1727                        vec![],
1728                    );
1729                } else {
1730                    push(
1731                        issues,
1732                        Severity::Error,
1733                        codes::INDEX_STALE_ENTRY,
1734                        index_rel,
1735                        Some(entry.line),
1736                        None,
1737                        format!("index entry `[[{bare}]]` points at a missing file"),
1738                        Some("run `dbmd index rebuild`".into()),
1739                        // The stale target the entry names (the file that no
1740                        // longer exists) — so the agent can locate the dangling
1741                        // reference.
1742                        vec![PathBuf::from(format!("{bare}.md"))],
1743                    );
1744                }
1745                continue;
1746            }
1747        };
1748        // Summary mismatch: the entry text must equal the file's `summary`. A
1749        // bare `- [[path]]` entry (no `— <text>`) when the file HAS a non-empty
1750        // summary is also a mismatch — the SPEC requires every type-folder index
1751        // entry to quote the file's `summary` (`- [[path]] — <summary>`), so a
1752        // missing quote can't validate clean just because there's nothing to
1753        // compare.
1754        if let Some(expected) = read_summary(&target_abs) {
1755            match &entry.summary_text {
1756                // Compare with the SAME whitespace normalization the renderer
1757                // applies when it writes the `index.md` browse line
1758                // (`format_md_entry` -> `collapse_whitespace`). `text_part` is the
1759                // already-collapsed text parsed back out of `index.md`; `expected`
1760                // is the RAW file summary. Comparing a collapsed value against a
1761                // raw one falsely flagged any valid one-line summary that carries
1762                // internal whitespace (a double space, a tab) — a permanent,
1763                // rebuild-immune INDEX_SUMMARY_MISMATCH that wedged the store, since
1764                // `index rebuild` regenerates the byte-identical collapsed line.
1765                // Normalizing both sides makes the check compare like with like.
1766                Some(text_part)
1767                    if crate::summary::collapse_whitespace(text_part)
1768                        != crate::summary::collapse_whitespace(&expected) =>
1769                {
1770                    push(
1771                        issues,
1772                        Severity::Error,
1773                        codes::INDEX_SUMMARY_MISMATCH,
1774                        index_rel,
1775                        Some(entry.line),
1776                        None,
1777                        format!("index entry for `{bare}` text doesn't match the file's `summary`"),
1778                        Some("run `dbmd index rebuild`".into()),
1779                        vec![PathBuf::from(format!("{bare}.md"))],
1780                    );
1781                }
1782                None if !expected.trim().is_empty() => {
1783                    push(
1784                        issues,
1785                        Severity::Error,
1786                        codes::INDEX_SUMMARY_MISMATCH,
1787                        index_rel,
1788                        Some(entry.line),
1789                        None,
1790                        format!("index entry for `{bare}` is missing its summary text (the file has a `summary`)"),
1791                        Some("run `dbmd index rebuild`".into()),
1792                        vec![PathBuf::from(format!("{bare}.md"))],
1793                    );
1794                }
1795                _ => {}
1796            }
1797        }
1798    }
1799
1800    // Missing entries: a member file not listed. Skip the index/log meta files.
1801    // The browse view caps at 500; only flag a missing entry when the folder is
1802    // under the cap (a capped folder legitimately omits older files).
1803    let content_members: Vec<&PathBuf> = members.iter().filter(|m| is_content_file(m)).collect();
1804    if content_members.len() <= 500 {
1805        for m in content_members {
1806            let bare = PathBuf::from(m.to_string_lossy().trim_end_matches(".md").to_string());
1807            if !listed.contains(&bare) {
1808                push(
1809                    issues,
1810                    Severity::Error,
1811                    codes::INDEX_MISSING_ENTRY,
1812                    index_rel,
1813                    None,
1814                    None,
1815                    format!(
1816                        "file `{}` is not listed in its folder's `index.md`",
1817                        m.display()
1818                    ),
1819                    Some("run `dbmd index rebuild`".into()),
1820                    vec![(*m).clone()],
1821                );
1822            }
1823        }
1824    }
1825    let _ = tf;
1826}
1827
1828/// Check a type-folder `index.jsonl` twin: it must list **every** file in the
1829/// folder (uncapped), every record must point at a real file, and each record's
1830/// fields must match the file's frontmatter.
1831fn check_type_folder_index_jsonl(
1832    store: &Store,
1833    tf: &Path,
1834    jsonl_rel: &Path,
1835    members: &[PathBuf],
1836    issues: &mut Vec<Issue>,
1837) {
1838    let abs = store.root.join(jsonl_rel);
1839    let Ok(text) = std::fs::read_to_string(&abs) else {
1840        return;
1841    };
1842
1843    // Parse records (last-write-wins by path), tolerating tombstones/blank lines.
1844    let mut records: BTreeMap<PathBuf, serde_json::Value> = BTreeMap::new();
1845    for (i, line) in text.lines().enumerate() {
1846        let line = line.trim();
1847        if line.is_empty() {
1848            continue;
1849        }
1850        let rec: serde_json::Value = match serde_json::from_str(line) {
1851            Ok(v) => v,
1852            Err(e) => {
1853                push(
1854                    issues,
1855                    Severity::Error,
1856                    codes::INDEX_JSONL_DESYNC,
1857                    jsonl_rel,
1858                    Some((i + 1) as u32),
1859                    None,
1860                    format!("`index.jsonl` line {} is not valid JSON: {e}", i + 1),
1861                    Some("run `dbmd index rebuild`".into()),
1862                    vec![],
1863                );
1864                continue;
1865            }
1866        };
1867        if let Some(path) = rec.get("path").and_then(|v| v.as_str()) {
1868            if !is_safe_store_relative_path(Path::new(path)) {
1869                push(
1870                    issues,
1871                    Severity::Error,
1872                    codes::INDEX_JSONL_DESYNC,
1873                    jsonl_rel,
1874                    Some((i + 1) as u32),
1875                    None,
1876                    format!("`index.jsonl` record path `{path}` is not a safe store-relative path"),
1877                    Some("run `dbmd index rebuild`".into()),
1878                    vec![],
1879                );
1880                continue;
1881            }
1882            records.insert(PathBuf::from(path), rec);
1883        }
1884    }
1885
1886    let member_set: BTreeSet<PathBuf> = members
1887        .iter()
1888        .filter(|m| is_content_file(m))
1889        .cloned()
1890        .collect();
1891
1892    // jsonl record → missing file = desync.
1893    for path in records.keys() {
1894        let target_abs = store.root.join(path);
1895        if !target_abs.is_file() {
1896            push(
1897                issues,
1898                Severity::Error,
1899                codes::INDEX_JSONL_DESYNC,
1900                jsonl_rel,
1901                None,
1902                None,
1903                format!(
1904                    "`index.jsonl` record points at missing file `{}`",
1905                    path.display()
1906                ),
1907                Some("run `dbmd index rebuild`".into()),
1908                vec![],
1909            );
1910        }
1911    }
1912
1913    // file not in jsonl = desync (the jsonl is the complete twin — no cap).
1914    for m in &member_set {
1915        if !records.contains_key(m) {
1916            push(
1917                issues,
1918                Severity::Error,
1919                codes::INDEX_JSONL_DESYNC,
1920                jsonl_rel,
1921                None,
1922                None,
1923                format!(
1924                    "file `{}` is missing from the complete `index.jsonl`",
1925                    m.display()
1926                ),
1927                Some("run `dbmd index rebuild`".into()),
1928                vec![m.clone()],
1929            );
1930        }
1931    }
1932
1933    // Record fields stale vs. frontmatter. SPEC § Validation defines
1934    // `INDEX_JSONL_STALE` as "an `index.jsonl` record's fields don't match the
1935    // file's frontmatter" — ANY field, not just `summary`/`type`. The query and
1936    // search paths read every field straight from these sidecars (`tags`,
1937    // `links`, `created`, `updated`, plus type-specific `email` / `domain` /
1938    // `company` / `amount` / `vendor` …), so a single field left unchecked lets
1939    // a stale value answer queries with data that exists in no `.md` file.
1940    //
1941    // Rather than re-list (and drift from) every projected key, rebuild the
1942    // record the canonical projection would write for this file
1943    // ([`IndexRecord::expected_from_file`], the same path `index rebuild` uses)
1944    // and diff the two as flat JSON maps. Every key the projection emits is
1945    // covered automatically; `path` is the join key and is skipped.
1946    for (path, rec) in &records {
1947        let target_abs = store.root.join(path);
1948        if !target_abs.is_file() {
1949            continue;
1950        }
1951        let Ok(expected) = crate::index::IndexRecord::expected_from_file(&target_abs, path.clone())
1952        else {
1953            continue; // unreadable / unparseable frontmatter is reported elsewhere
1954        };
1955        let Ok(expected_json) = serde_json::to_value(&expected) else {
1956            continue;
1957        };
1958        let (Some(have), Some(want)) = (rec.as_object(), expected_json.as_object()) else {
1959            continue;
1960        };
1961
1962        // Compare the union of keys present on either side; a key the file
1963        // projects but the sidecar omits is just as stale as a wrong value.
1964        let mut mismatched_keys: BTreeSet<&str> = BTreeSet::new();
1965        for key in have.keys().chain(want.keys()) {
1966            if key == "path" {
1967                continue;
1968            }
1969            if have.get(key) != want.get(key) {
1970                mismatched_keys.insert(key);
1971            }
1972        }
1973
1974        if !mismatched_keys.is_empty() {
1975            let keys: Vec<&str> = mismatched_keys.into_iter().collect();
1976            push(
1977                issues,
1978                Severity::Error,
1979                codes::INDEX_JSONL_STALE,
1980                jsonl_rel,
1981                None,
1982                Some(keys.join(",")),
1983                format!(
1984                    "`index.jsonl` record for `{}` is stale ({})",
1985                    path.display(),
1986                    keys.join(", ")
1987                ),
1988                Some("run `dbmd index rebuild`".into()),
1989                vec![path.clone()],
1990            );
1991        }
1992    }
1993    let _ = tf;
1994}
1995
1996/// Check an index's `scope:` frontmatter against its filesystem location.
1997fn check_index_scope(
1998    store: &Store,
1999    index_rel: &Path,
2000    expected_scope: &str,
2001    expected_folder: Option<&str>,
2002    issues: &mut Vec<Issue>,
2003) {
2004    let abs = store.root.join(index_rel);
2005    let Ok(text) = std::fs::read_to_string(&abs) else {
2006        return;
2007    };
2008    let Some((yaml, _, _)) = split_frontmatter(&text) else {
2009        return;
2010    };
2011    let Ok(Value::Mapping(map)) = serde_norway::from_str::<Value>(&yaml) else {
2012        return;
2013    };
2014    let fm = yaml_map_to_btree(&map);
2015
2016    if let Some(scope) = fm.get("scope").and_then(scalar_string) {
2017        // Accept "type-folder" and the SPEC example's looser "folder" alias.
2018        let scope_ok =
2019            scope == expected_scope || (expected_scope == "type-folder" && scope == "folder");
2020        if !scope_ok {
2021            push(
2022                issues,
2023                Severity::Warning,
2024                codes::INDEX_WRONG_SCOPE,
2025                index_rel,
2026                fm_key_line(&yaml, "scope"),
2027                Some("scope".into()),
2028                format!(
2029                    "index `scope: {scope}` doesn't match location (expected `{expected_scope}`)"
2030                ),
2031                Some(format!("set `scope: {expected_scope}`")),
2032                vec![],
2033            );
2034        }
2035    }
2036    // folder: must match for layer/type-folder indexes.
2037    if let Some(expected) = expected_folder {
2038        if let Some(folder) = fm.get("folder").and_then(scalar_string) {
2039            if folder.trim_end_matches('/') != expected.trim_end_matches('/') {
2040                push(
2041                    issues,
2042                    Severity::Warning,
2043                    codes::INDEX_WRONG_SCOPE,
2044                    index_rel,
2045                    fm_key_line(&yaml, "folder"),
2046                    Some("folder".into()),
2047                    format!("index `folder: {folder}` doesn't match location `{expected}`"),
2048                    Some(format!("set `folder: {expected}`")),
2049                    vec![],
2050                );
2051            }
2052        }
2053    }
2054}
2055
2056// ─────────────────────────────────────────────────────────────────────────────
2057//  Cross-file: log.md well-formedness + ordering (validate_all only)
2058// ─────────────────────────────────────────────────────────────────────────────
2059
2060/// `LOG_*` checks: bad timestamps, unknown kinds, out-of-order entries — across
2061/// the active `log.md` AND the rotated `log/<YYYY-MM>.md` archives.
2062///
2063/// [`Log::append`] rolls strictly-prior-month entries into `log/<YYYY-MM>.md`,
2064/// and `Log::tail`/`Log::since` deliberately read those archives back. If the
2065/// LOG_* checks read only the active file, an entry `validate --all` flagged
2066/// while it lived in `log.md` would stop being flagged the moment a newer-month
2067/// append rotated it into an archive — even though the log readers still surface
2068/// that exact entry to the curator. Scanning the archives too keeps validate and
2069/// the readers in agreement after a rotation.
2070///
2071/// Order: archives oldest-month first, then the active `log.md` last — the true
2072/// chronological timeline — so the out-of-order check threads `prev` across the
2073/// rotation boundary the same way it does within a single file.
2074fn check_log(store: &Store, issues: &mut Vec<Issue>) {
2075    let mut prev: Option<DateTime<FixedOffset>> = None;
2076    for rel in log_files_chronological(store) {
2077        check_log_file(store, &rel, &mut prev, issues);
2078    }
2079}
2080
2081/// The log files to scan, in chronological order: every `log/<YYYY-MM>.md`
2082/// archive oldest-month first, then the active `log.md` last. Missing files are
2083/// simply absent from the list.
2084fn log_files_chronological(store: &Store) -> Vec<PathBuf> {
2085    let mut files: Vec<PathBuf> = Vec::new();
2086    let archive_dir = store.root.join("log");
2087    if let Ok(entries) = std::fs::read_dir(&archive_dir) {
2088        let mut archives: Vec<PathBuf> = entries
2089            .flatten()
2090            .map(|e| e.path())
2091            .filter(|p| {
2092                p.is_file()
2093                    && p.file_name()
2094                        .and_then(|s| s.to_str())
2095                        .and_then(|n| n.strip_suffix(".md"))
2096                        .is_some_and(is_year_month_archive)
2097            })
2098            .filter_map(|p| p.strip_prefix(&store.root).ok().map(Path::to_path_buf))
2099            .collect();
2100        // `YYYY-MM` stems sort lexically == chronologically; oldest first.
2101        archives.sort();
2102        files.extend(archives);
2103    }
2104    // The active file holds the current month — newest, so it comes last.
2105    if store.root.join("log.md").is_file() {
2106        files.push(PathBuf::from("log.md"));
2107    }
2108    files
2109}
2110
2111/// Scan one log file's entry headers, threading the running `prev` timestamp so
2112/// the out-of-order check spans file (rotation) boundaries. Issues anchor to the
2113/// given store-relative path so an archived entry points at its archive file.
2114fn check_log_file(
2115    store: &Store,
2116    log_rel: &Path,
2117    prev: &mut Option<DateTime<FixedOffset>>,
2118    issues: &mut Vec<Issue>,
2119) {
2120    let abs = store.root.join(log_rel);
2121    let Ok(text) = std::fs::read_to_string(&abs) else {
2122        return;
2123    };
2124
2125    for (i, line) in text.lines().enumerate() {
2126        if !line.starts_with("## [") {
2127            continue;
2128        }
2129        let line_no = (i + 1) as u32;
2130        match parse_log_header(line) {
2131            None => push(
2132                issues,
2133                Severity::Error,
2134                codes::LOG_BAD_TIMESTAMP,
2135                log_rel,
2136                Some(line_no),
2137                None,
2138                format!("log entry header has an unparseable timestamp: {line:?}"),
2139                Some("use `## [YYYY-MM-DD HH:MM] <kind> | <object>`".into()),
2140                vec![],
2141            ),
2142            Some((ts, kind, _object)) => {
2143                if !RECOGNIZED_LOG_KINDS.contains(&kind.as_str()) {
2144                    push(
2145                        issues,
2146                        Severity::Warning,
2147                        codes::LOG_UNKNOWN_KIND,
2148                        log_rel,
2149                        Some(line_no),
2150                        None,
2151                        format!("log entry kind `{kind}` is not recognized"),
2152                        Some(format!("use one of: {}", RECOGNIZED_LOG_KINDS.join(", "))),
2153                        vec![],
2154                    );
2155                }
2156                if let Some(p) = *prev {
2157                    if ts < p {
2158                        push(
2159                            issues,
2160                            Severity::Warning,
2161                            codes::LOG_OUT_OF_ORDER,
2162                            log_rel,
2163                            Some(line_no),
2164                            None,
2165                            "log entry is older than the entry above it (possible rewrite)".into(),
2166                            Some("append corrective entries; never reorder past ones".into()),
2167                            vec![],
2168                        );
2169                    }
2170                }
2171                *prev = Some(ts);
2172            }
2173        }
2174    }
2175}
2176
2177// ─────────────────────────────────────────────────────────────────────────────
2178//  Self-contained primitives (collapse onto sibling modules once they land)
2179// ─────────────────────────────────────────────────────────────────────────────
2180
2181/// A minimal wiki-link found in a body: target, optional display, 1-based line.
2182#[derive(Debug)]
2183struct Link {
2184    target: String,
2185    line: u32,
2186}
2187
2188/// True if the store marker (`DB.md`, uppercase) is present at the root. On a
2189/// case-insensitive filesystem `db.md` would also match `DB.md`; we require the
2190/// exact-cased directory entry to be present.
2191fn store_marker_present(store: &Store) -> bool {
2192    let want = store.root.join("DB.md");
2193    if !want.is_file() {
2194        return false;
2195    }
2196    // Reject a case-folded match (`db.md`) on case-insensitive filesystems.
2197    match std::fs::read_dir(&store.root) {
2198        Ok(entries) => entries
2199            .flatten()
2200            .any(|e| e.file_name().to_str() == Some("DB.md")),
2201        Err(_) => true, // can't enumerate; trust the is_file() above
2202    }
2203}
2204
2205/// Validate the store's identity file, `DB.md`: its frontmatter `type:` must be
2206/// `db-md`, it must carry both `scope` and `owner`, and its body may contain
2207/// only the three recognized `##` sections (`Agent instructions`, `Policies`,
2208/// `Schemas`).
2209///
2210/// `DB.md` is not a content file (no `summary`), so it is checked here rather
2211/// than through `check_content_file`. The marker presence is established by the
2212/// caller (`store_marker_present`); a malformed-frontmatter `DB.md` still counts
2213/// as a store (the marker is the filename), so we report its shape rather than
2214/// `NOT_A_STORE`. Issues anchor to `DB.md` as the store-relative path.
2215fn check_db_md(store: &Store, issues: &mut Vec<Issue>) {
2216    let rel = Path::new("DB.md");
2217    let abs = store.root.join("DB.md");
2218    let Ok(text) = std::fs::read_to_string(&abs) else {
2219        return; // marker present but unreadable: nothing more to say.
2220    };
2221
2222    let Some((fm_yaml, body, fm_end_line)) = split_frontmatter(&text) else {
2223        // No frontmatter block at all → it cannot declare `type: db-md` and has
2224        // neither required field. Report the type and both missing fields,
2225        // anchored to line 1 (the would-be opening fence).
2226        push(
2227            issues,
2228            Severity::Error,
2229            codes::DB_MD_BAD_TYPE,
2230            rel,
2231            Some(1),
2232            Some("type".into()),
2233            "DB.md has no frontmatter; it must declare `type: db-md`".into(),
2234            Some("add a `---` frontmatter block with `type: db-md`".into()),
2235            vec![],
2236        );
2237        for field in ["scope", "owner"] {
2238            push(
2239                issues,
2240                Severity::Error,
2241                codes::DB_MD_MISSING_FIELD,
2242                rel,
2243                Some(1),
2244                Some(field.into()),
2245                format!("DB.md frontmatter is missing required field `{field}`"),
2246                Some(format!("add `{field}:` to the DB.md frontmatter")),
2247                vec![],
2248            );
2249        }
2250        return;
2251    };
2252
2253    // Parse the frontmatter mapping. If it doesn't parse, we can still say the
2254    // identity contract is unmet (no provable `type: db-md`, no provable fields).
2255    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
2256        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
2257        Ok(Value::Null) => Some(BTreeMap::new()),
2258        _ => None,
2259    };
2260
2261    match &fm {
2262        Some(map) => {
2263            // ── type: db-md ──────────────────────────────────────────────────
2264            let type_ = map.get("type").and_then(scalar_string);
2265            if type_.as_deref() != Some("db-md") {
2266                let (line, msg) = match &type_ {
2267                    Some(t) => (
2268                        fm_key_line(&fm_yaml, "type"),
2269                        format!("DB.md has `type: {t}`; a store's DB.md must be `type: db-md`"),
2270                    ),
2271                    None => (
2272                        Some(1),
2273                        "DB.md frontmatter has no `type:`; it must be `type: db-md`".to_string(),
2274                    ),
2275                };
2276                push(
2277                    issues,
2278                    Severity::Error,
2279                    codes::DB_MD_BAD_TYPE,
2280                    rel,
2281                    line,
2282                    Some("type".into()),
2283                    msg,
2284                    Some("set `type: db-md` in the DB.md frontmatter".into()),
2285                    vec![],
2286                );
2287            }
2288
2289            // ── required fields: scope + owner ───────────────────────────────
2290            for field in ["scope", "owner"] {
2291                let present = map
2292                    .get(field)
2293                    .and_then(scalar_string)
2294                    .map(|s| !s.trim().is_empty())
2295                    .unwrap_or(false);
2296                if !present {
2297                    push(
2298                        issues,
2299                        Severity::Error,
2300                        codes::DB_MD_MISSING_FIELD,
2301                        rel,
2302                        // A present-but-empty field anchors to its line; a fully
2303                        // absent one to the block top.
2304                        fm_key_line_or_top(&fm_yaml, field),
2305                        Some(field.into()),
2306                        format!("DB.md frontmatter is missing required field `{field}`"),
2307                        Some(format!("add `{field}:` to the DB.md frontmatter")),
2308                        vec![],
2309                    );
2310                }
2311            }
2312        }
2313        None => {
2314            // Unparseable frontmatter: the identity contract is unprovable. Emit
2315            // the type error and both field errors, anchored to the block top.
2316            push(
2317                issues,
2318                Severity::Error,
2319                codes::DB_MD_BAD_TYPE,
2320                rel,
2321                Some(1),
2322                Some("type".into()),
2323                "DB.md frontmatter isn't valid YAML; it must declare `type: db-md`".into(),
2324                Some("fix the DB.md frontmatter and set `type: db-md`".into()),
2325                vec![],
2326            );
2327            for field in ["scope", "owner"] {
2328                push(
2329                    issues,
2330                    Severity::Error,
2331                    codes::DB_MD_MISSING_FIELD,
2332                    rel,
2333                    Some(1),
2334                    Some(field.into()),
2335                    format!("DB.md frontmatter is missing required field `{field}`"),
2336                    Some(format!("add `{field}:` to the DB.md frontmatter")),
2337                    vec![],
2338                );
2339            }
2340        }
2341    }
2342
2343    // ── recognized `##` section headers only ─────────────────────────────────
2344    // The body's H2 headings must be one of the four the toolkit reads; any
2345    // other is a likely typo / misplacement (warning — the parser ignores it,
2346    // so the config is not corrupted, but the operator wrote a section that will
2347    // never be read). H3 sub-headings (Frozen pages, Ignored types, `### <type>`
2348    // schema blocks) live under their H2 and are not flagged here.
2349    //
2350    // `## Folders` is recognized: `parse_db_md` reads it into `Config.folders`
2351    // (parser.rs) and the index renders folder display names + descriptions from
2352    // it (index.rs `render_*_md_from_stats`). Flagging it `DB_MD_UNKNOWN_SECTION`
2353    // with "remove this heading" told the operator to delete a working,
2354    // round-tripped config block — destroying curator-authored rollup names. It
2355    // is a real, shipped section; SPEC.md documents it alongside the other three.
2356    for section in crate::parser::extract_sections(&body) {
2357        if section.level != 2 {
2358            continue;
2359        }
2360        let name = section.heading.trim().to_ascii_lowercase();
2361        if matches!(
2362            name.as_str(),
2363            "agent instructions" | "policies" | "schemas" | "folders"
2364        ) {
2365            continue;
2366        }
2367        // `Section::line` is 1-based within the body; the body begins at file
2368        // line `fm_end_line + 1`.
2369        let file_line = fm_end_line + section.line;
2370        push(
2371            issues,
2372            Severity::Warning,
2373            codes::DB_MD_UNKNOWN_SECTION,
2374            rel,
2375            Some(file_line),
2376            None,
2377            format!(
2378                "DB.md has an unrecognized `## {}` section",
2379                section.heading.trim()
2380            ),
2381            Some(
2382                "DB.md sections are `## Agent instructions`, `## Policies`, `## Schemas`, \
2383                 `## Folders` — remove or rename this heading"
2384                    .into(),
2385            ),
2386            vec![],
2387        );
2388    }
2389
2390    // ── `## Schemas` field-declaration lint ──────────────────────────────────
2391    // Without this, every schema misparse is silent: the operator/agent gets no
2392    // signal that DB.md is interpreting their schema differently from what they
2393    // wrote, and downstream records are validated against the degraded schema.
2394    check_db_md_schemas(store, rel, &body, fm_end_line, issues);
2395}
2396
2397/// Lint the parsed `## Schemas` field declarations: an empty field name, a
2398/// duplicate field name within a type, or an unrecognized modifier all parse
2399/// "successfully" into a degraded [`Schema`] today, so a bad declaration never
2400/// surfaces. The parsed schemas live in `store.config.schemas` (directives
2401/// already separated out); this pass reports the suspicious *field* shapes,
2402/// anchored to the `### <type>` heading line so the agent can find the block.
2403fn check_db_md_schemas(
2404    store: &Store,
2405    rel: &Path,
2406    body: &str,
2407    fm_end_line: u32,
2408    issues: &mut Vec<Issue>,
2409) {
2410    if store.config.schemas.is_empty() {
2411        return;
2412    }
2413
2414    // Map each `### <type>` heading (under `## Schemas`) to its file line, so a
2415    // per-type issue can anchor to the declaration block. `extract_sections`
2416    // returns a flat list with 1-based body lines; the body starts at file line
2417    // `fm_end_line + 1`.
2418    let mut type_line: BTreeMap<String, u32> = BTreeMap::new();
2419    let mut current_h2: Option<String> = None;
2420    for section in crate::parser::extract_sections(body) {
2421        match section.level {
2422            2 => current_h2 = Some(section.heading.trim().to_ascii_lowercase()),
2423            3 if current_h2.as_deref() == Some("schemas") => {
2424                // The H3 heading text (as written) is the type name — the same
2425                // key `parse_db_md` inserts into `config.schemas`.
2426                type_line
2427                    .entry(section.heading.trim().to_string())
2428                    .or_insert(fm_end_line + section.line);
2429            }
2430            _ => {}
2431        }
2432    }
2433
2434    for (type_name, schema) in &store.config.schemas {
2435        let line = type_line.get(type_name).copied();
2436        let mut seen: BTreeSet<String> = BTreeSet::new();
2437        for field in &schema.fields {
2438            let name = field.name.trim();
2439
2440            // Empty field name: a `- (string)` / bare `- ` bullet parses to a
2441            // nameless field that can never match a frontmatter key, so its
2442            // required/shape/enum constraints silently never apply.
2443            if name.is_empty() {
2444                push(
2445                    issues,
2446                    Severity::Warning,
2447                    codes::DB_MD_SCHEMA_FIELD,
2448                    rel,
2449                    line,
2450                    None,
2451                    format!("`### {type_name}` has a schema field bullet with no field name"),
2452                    Some(
2453                        "write each field as `- <name> (<modifiers>)`, e.g. `- email (required, email)`"
2454                            .into(),
2455                    ),
2456                    vec![],
2457                );
2458                continue;
2459            }
2460
2461            // Duplicate field name within a type: the second declaration's
2462            // constraints are interpreted independently of the first, so the
2463            // author's intent is ambiguous and likely wrong.
2464            if !seen.insert(name.to_string()) {
2465                push(
2466                    issues,
2467                    Severity::Warning,
2468                    codes::DB_MD_SCHEMA_FIELD,
2469                    rel,
2470                    line,
2471                    Some(name.to_string()),
2472                    format!("`### {type_name}` declares field `{name}` more than once"),
2473                    Some(
2474                        "remove the duplicate field bullet, or merge the modifiers onto one".into(),
2475                    ),
2476                    vec![],
2477                );
2478            }
2479
2480            // Unrecognized modifiers: the parser stashes anything outside the
2481            // known vocabulary (`required` / a shape / `link to …` / `default …`
2482            // / `enum: …`) in `unknown_modifiers`. Surface them as Info so a
2483            // typo'd modifier (`requierd`, `unqiue`) doesn't silently do nothing.
2484            for modifier in &field.unknown_modifiers {
2485                let modifier = modifier.trim();
2486                if modifier.is_empty() {
2487                    continue;
2488                }
2489                push(
2490                    issues,
2491                    Severity::Info,
2492                    codes::DB_MD_SCHEMA_FIELD,
2493                    rel,
2494                    line,
2495                    Some(name.to_string()),
2496                    format!(
2497                        "`### {type_name}` field `{name}` has an unrecognized modifier `{modifier}`"
2498                    ),
2499                    Some(
2500                        "recognized modifiers are `required`, a shape (`string`/`int`/`bool`/`date`/`email`/`currency`/`url`), `link to <prefix>/`, `default <value>`, `enum: <v1>, <v2>, …`"
2501                            .into(),
2502                    ),
2503                    vec![],
2504                );
2505            }
2506        }
2507    }
2508}
2509
2510/// The `NOT_A_STORE` issue for a root with no `DB.md`.
2511fn not_a_store_issue(store: &Store) -> Issue {
2512    Issue {
2513        severity: Severity::Error,
2514        code: codes::NOT_A_STORE,
2515        file: store.root.clone(),
2516        line: None,
2517        key: None,
2518        message: format!("{} has no DB.md; not a db.md store", store.root.display()),
2519        suggestion: Some("create a `DB.md` at the store root".into()),
2520        related: vec![],
2521    }
2522}
2523
2524/// True if a store-relative path is a content file: under `sources/` or
2525/// `records/` and not an `index.md`/`index.jsonl`/`log.md`.
2526fn is_content_file(rel: &Path) -> bool {
2527    // Defense in depth: a real content file is always a forward (Normal-only)
2528    // store-relative path. Reject any `..`/absolute/prefix component so a
2529    // malformed object slot judged only by its FIRST component (`records/../..`)
2530    // can never turn a per-file read into a store escape, even if a future caller
2531    // forgets the path-safety gate `changed_objects_since` now applies.
2532    if !is_safe_store_relative_path(rel) {
2533        return false;
2534    }
2535    let Some(first) = rel.iter().next().and_then(|s| s.to_str()) else {
2536        return false;
2537    };
2538    if !matches!(first, "sources" | "records") {
2539        return false;
2540    }
2541    let name = rel.file_name().and_then(|s| s.to_str()).unwrap_or("");
2542    // Only the derived catalog twins are meta INSIDE a layer. `DB.md` / `log.md`
2543    // are reserved meta only at the store ROOT, which the `first` layer check
2544    // above already excludes — so a content file named `log.md` / `DB.md` inside
2545    // a layer (e.g. `records/docs/log.md`) is real content, consistent with
2546    // `Store::walk`.
2547    if matches!(name, "index.md" | "index.jsonl") {
2548        return false;
2549    }
2550    name.ends_with(".md")
2551}
2552
2553/// True for the store's ROOT append-only meta files (`DB.md` / `log.md`): a
2554/// single-component store-relative path whose name is one of those two. An
2555/// in-layer `records/docs/log.md` is real content (multiple components), not a
2556/// root meta file. These reach `check_content_file` only via the working-set
2557/// incoming-linker scan; their bodies are deliberately not link-checked there
2558/// because `validate --all` doesn't link-check them either.
2559fn is_root_meta_file(rel: &Path) -> bool {
2560    let mut comps = rel.components();
2561    let Some(Component::Normal(only)) = comps.next() else {
2562        return false;
2563    };
2564    if comps.next().is_some() {
2565        return false; // has a parent dir → not a root file
2566    }
2567    matches!(only.to_str(), Some("DB.md") | Some("log.md"))
2568}
2569
2570/// True for a derived index-catalog file (`index.md` / `index.jsonl`) at any
2571/// depth. Its entries are GENERATED wiki-links to type-folder members, not
2572/// authored body links: in the working-set scope it is pulled in as an incoming
2573/// linker, but its integrity belongs to `check_indexes` under `--all` (which
2574/// reports a dangling entry as `INDEX_STALE_ENTRY`, not `WIKI_LINK_BROKEN`). So
2575/// `check_content_file` never body-link-checks it, matching `walk_content_files`
2576/// (which skips `index.md` under `--all`).
2577fn is_index_catalog_file(rel: &Path) -> bool {
2578    matches!(
2579        rel.file_name().and_then(|n| n.to_str()),
2580        Some("index.md") | Some("index.jsonl")
2581    )
2582}
2583
2584/// Split a file into `(frontmatter_yaml, body, closing_fence_line)`. The block
2585/// must start at the very first line with `---` and end at the next `---`.
2586/// Returns `None` if there's no leading frontmatter block.
2587fn split_frontmatter(text: &str) -> Option<(String, String, u32)> {
2588    // Tolerate a single leading UTF-8 BOM, matching parser/store/index (which
2589    // already strip it). Without this, a BOM-prefixed file is read as having no
2590    // frontmatter here while the catalog still indexes it — so validate would
2591    // silently skip frontmatter checks on a file the rest of the toolkit sees.
2592    let text = text.strip_prefix('\u{feff}').unwrap_or(text);
2593    let mut lines = text.lines();
2594    let first = lines.next()?;
2595    if first.trim_end() != "---" {
2596        return None;
2597    }
2598    let mut yaml = String::new();
2599    let mut close_line: Option<u32> = None;
2600    // line 1 is the opening fence; YAML starts at line 2.
2601    let mut current = 1u32;
2602    for line in lines {
2603        current += 1;
2604        if line.trim_end() == "---" {
2605            close_line = Some(current);
2606            break;
2607        }
2608        yaml.push_str(line);
2609        yaml.push('\n');
2610    }
2611    let close_line = close_line?;
2612    // Body = everything after the closing fence.
2613    let body: String = text
2614        .lines()
2615        .skip(close_line as usize)
2616        .collect::<Vec<_>>()
2617        .join("\n");
2618    Some((yaml, body, close_line))
2619}
2620
2621/// Read just the `summary` field of a file, or `None` if absent/unparseable.
2622fn read_summary(abs: &Path) -> Option<String> {
2623    let text = std::fs::read_to_string(abs).ok()?;
2624    let (yaml, _, _) = split_frontmatter(&text)?;
2625    let value: Value = serde_norway::from_str(&yaml).ok()?;
2626    if let Value::Mapping(m) = value {
2627        m.get(Value::String("summary".into()))
2628            .and_then(scalar_string)
2629    } else {
2630        None
2631    }
2632}
2633
2634/// Convert a `serde_norway` mapping into a string-keyed [`BTreeMap`], dropping
2635/// non-string keys (frontmatter keys are always strings).
2636fn yaml_map_to_btree(map: &serde_norway::Mapping) -> BTreeMap<String, Value> {
2637    let mut out = BTreeMap::new();
2638    for (k, v) in map {
2639        if let Value::String(s) = k {
2640            out.insert(s.clone(), v.clone());
2641        }
2642    }
2643    out
2644}
2645
2646/// A scalar YAML value as a string (`String`/`Number`/`Bool`); `None` for
2647/// sequences/mappings/null.
2648fn scalar_string(v: &Value) -> Option<String> {
2649    match v {
2650        Value::String(s) => Some(s.clone()),
2651        Value::Number(n) => Some(n.to_string()),
2652        Value::Bool(b) => Some(b.to_string()),
2653        _ => None,
2654    }
2655}
2656
2657/// True if a frontmatter value carries no content for a *required*-field check:
2658/// a YAML `null` (`name:`), an empty sequence (`name: []`), an empty mapping
2659/// (`name: {}`), or a blank/whitespace-only scalar (`name: ""`). A non-empty
2660/// list or mapping is NOT treated as empty here — a structurally-wrong value on
2661/// a shape/enum field is caught by the later non-scalar shape check, not by the
2662/// required-presence check.
2663fn is_empty_value(v: &Value) -> bool {
2664    match v {
2665        Value::Null => true,
2666        Value::Sequence(items) => items.is_empty(),
2667        Value::Mapping(map) => map.is_empty(),
2668        other => scalar_string(other)
2669            .map(|s| s.trim().is_empty())
2670            .unwrap_or(true),
2671    }
2672}
2673
2674/// True if `tags` is a flat YAML sequence of scalars. A mapping, a scalar, or a
2675/// sequence containing a nested sequence/mapping → false (`TAGS_MALFORMED`).
2676fn is_flat_scalar_list(v: &Value) -> bool {
2677    match v {
2678        Value::Sequence(items) => items.iter().all(|it| scalar_string(it).is_some()),
2679        _ => false,
2680    }
2681}
2682
2683/// Extract every frontmatter wiki-link, returning `(key, Link)` pairs with the
2684/// link's 1-based file line. **Text-based, by necessity:** an unquoted
2685/// `company: [[records/companies/x]]` parses in YAML as a nested *sequence*, not
2686/// a string (because `[[x]]` is YAML flow-list-in-a-list); a quoted
2687/// `"[[...]]"` parses as a string. Scanning the raw frontmatter text catches
2688/// both forms uniformly, the way the link textually appears — the doctrine view.
2689///
2690/// `fm_start_line` is the file line of the first YAML line (file line 2, since
2691/// line 1 is the opening `---`), so the returned `Link::line` is absolute.
2692fn frontmatter_link_fields_text(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, Link)> {
2693    let mut out = Vec::new();
2694    for (key, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2695        for link in links {
2696            out.push((key.clone(), link));
2697        }
2698    }
2699    out
2700}
2701
2702/// The wiki-link targets declared under a single top-level frontmatter key
2703/// (text-based; handles quoted + unquoted forms). Empty if the key is absent or
2704/// carries no `[[...]]`.
2705fn frontmatter_links_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Vec<Link> {
2706    for (k, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2707        if k == key {
2708            return links;
2709        }
2710    }
2711    Vec::new()
2712}
2713
2714/// The raw value text under a single top-level frontmatter key (the remainder of
2715/// the key line plus any indented continuation/sequence lines), trimmed. Used to
2716/// decide whether a `link to` field holds a plain string vs. a wiki-link.
2717fn frontmatter_raw_value_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Option<String> {
2718    for (k, value_text, _links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2719        if k == key {
2720            return Some(value_text);
2721        }
2722    }
2723    None
2724}
2725
2726/// Split a frontmatter YAML block into `(key, raw_value_text, wiki_links)` for
2727/// each top-level key. A top-level key is a line with no leading indentation in
2728/// `name:` form; its value spans the rest of that line plus any deeper-indented
2729/// continuation lines (block scalars, block sequences) until the next top-level
2730/// key. Wiki-links are every `[[...]]` found anywhere in that span, with their
2731/// absolute file line.
2732fn frontmatter_key_blocks(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, String, Vec<Link>)> {
2733    let mut blocks: Vec<(String, String, Vec<Link>)> = Vec::new();
2734    let mut current: Option<(String, String, Vec<Link>)> = None;
2735
2736    for (idx, raw_line) in fm_yaml.lines().enumerate() {
2737        let file_line = fm_start_line + idx as u32;
2738        let indented = raw_line.starts_with(' ') || raw_line.starts_with('\t');
2739        let trimmed = raw_line.trim();
2740
2741        // A new top-level key: no indentation, `name:` prefix, not a list dash or
2742        // comment. (Indented or dash lines belong to the current key's value.)
2743        let new_key = if !indented && !trimmed.starts_with('#') && !trimmed.starts_with('-') {
2744            top_level_key(raw_line)
2745        } else {
2746            None
2747        };
2748
2749        if let Some((key, after)) = new_key {
2750            if let Some(done) = current.take() {
2751                blocks.push(done);
2752            }
2753            let mut links = Vec::new();
2754            collect_line_links(after, file_line, &mut links);
2755            current = Some((key, after.trim().to_string(), links));
2756        } else if let Some((_k, value_text, links)) = current.as_mut() {
2757            // Continuation of the current key's value (indented or dash line).
2758            if !value_text.is_empty() {
2759                value_text.push('\n');
2760            }
2761            value_text.push_str(trimmed);
2762            collect_line_links(raw_line, file_line, links);
2763        }
2764    }
2765    if let Some(done) = current.take() {
2766        blocks.push(done);
2767    }
2768    blocks
2769}
2770
2771/// Parse a top-level frontmatter key line into `(key, value_after_colon)`.
2772/// `None` if the line isn't a `name:` mapping entry.
2773fn top_level_key(line: &str) -> Option<(String, &str)> {
2774    let (key, rest) = line.split_once(':')?;
2775    let key = key.trim();
2776    if key.is_empty()
2777        || !key
2778            .chars()
2779            .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
2780    {
2781        return None;
2782    }
2783    Some((key.to_string(), rest))
2784}
2785
2786/// Append every `[[target]]` / `[[target|display]]` found in `s` to `links`,
2787/// each tagged with `file_line`.
2788fn collect_line_links(s: &str, file_line: u32, links: &mut Vec<Link>) {
2789    let bytes = s.as_bytes();
2790    let mut i = 0;
2791    while i + 1 < bytes.len() {
2792        if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2793            if let Some(close) = s[i + 2..].find("]]") {
2794                let inner = &s[i + 2..i + 2 + close];
2795                // Guard against `[[[` (nested) double-counting: the inner must
2796                // not itself open another `[[`.
2797                let target = inner
2798                    .trim_start_matches('[')
2799                    .split('|')
2800                    .next()
2801                    .unwrap_or(inner)
2802                    .trim()
2803                    .to_string();
2804                if !target.is_empty() {
2805                    links.push(Link {
2806                        target,
2807                        line: file_line,
2808                    });
2809                }
2810                i = i + 2 + close + 2;
2811                continue;
2812            }
2813        }
2814        i += 1;
2815    }
2816}
2817
2818/// Extract every `[[...]]` wiki-link from a body, with 1-based line numbers.
2819/// Skips fenced code blocks, so example links in docs don't trip the validator.
2820///
2821/// Fence tracking matches the toolkit's parser ([`crate::parser`]'s
2822/// `extract_sections`): an open fence is `(fence char, run length)` and closes
2823/// only on a line that is the **same** fence character with a run **at least as
2824/// long**. A naive "toggle a bool on any ``` or ~~~ line" inverts the state when
2825/// a `~~~` block legally contains a ```` ``` ```` line (the standard way to
2826/// document a backtick fence) — the inner backtick line would flip `in_fence`
2827/// off and the demo `[[…]]` inside the code block would be checked as a live
2828/// link, falsely flagging a legal store.
2829fn extract_wiki_links(body: &str) -> Vec<Link> {
2830    let mut out = Vec::new();
2831    let mut fence: Option<(u8, usize)> = None;
2832    for (idx, line) in body.lines().enumerate() {
2833        let content = line.trim_end_matches('\r');
2834        if let Some(f) = fence {
2835            // Inside a fence: the only thing that matters is whether THIS line
2836            // closes it (matching char, run ≥ the opening run). Everything else
2837            // is opaque code — no link extraction.
2838            if fence_closes(content, f) {
2839                fence = None;
2840            }
2841            continue;
2842        }
2843        if let Some(opened) = fence_opens(content) {
2844            fence = Some(opened);
2845            continue;
2846        }
2847        let line_no = (idx + 1) as u32;
2848        let bytes = line.as_bytes();
2849        let mut i = 0;
2850        while i + 1 < bytes.len() {
2851            if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2852                if let Some(close) = line[i + 2..].find("]]") {
2853                    let inner = &line[i + 2..i + 2 + close];
2854                    let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
2855                    // Skip a triple-bracket `[[[…` opening: the inner content
2856                    // starts with `[`, so this is the rejected flow-form list
2857                    // mis-encoding (`[[[a]], [[b]]]`), not a real wiki-link. A
2858                    // legitimate target never starts with `[`. The frontmatter
2859                    // `WIKI_LINK_FLOW_FORM_LIST` check already owns that error;
2860                    // extracting a bogus body link here would double-report it as
2861                    // a spurious `WIKI_LINK_SHORT_FORM`.
2862                    if !target.is_empty() && !target.starts_with('[') {
2863                        out.push(Link {
2864                            target,
2865                            line: line_no,
2866                        });
2867                    }
2868                    i = i + 2 + close + 2;
2869                    continue;
2870                }
2871            }
2872            i += 1;
2873        }
2874    }
2875    out
2876}
2877
2878/// If `line` opens a fenced code block, return `(fence byte, run length)`. A
2879/// local mirror of the parser's `opening_fence` so the validator's fence
2880/// tracking matches the rest of the toolkit: a fence is ``` ``` ``` or `~~~`
2881/// (run ≥ 3) at ≤ 3 spaces of indent, and a backtick fence's info string may
2882/// not itself contain a backtick.
2883fn fence_opens(line: &str) -> Option<(u8, usize)> {
2884    let indent = line.len() - line.trim_start_matches(' ').len();
2885    if indent > 3 {
2886        return None;
2887    }
2888    let rest = &line[indent..];
2889    let byte = rest.bytes().next()?;
2890    if byte != b'`' && byte != b'~' {
2891        return None;
2892    }
2893    let run = rest.len() - rest.trim_start_matches(byte as char).len();
2894    if run < 3 {
2895        return None;
2896    }
2897    // A backtick fence's info string may not itself contain a backtick.
2898    if byte == b'`' && rest[run..].contains('`') {
2899        return None;
2900    }
2901    Some((byte, run))
2902}
2903
2904/// True if `line` closes the currently open `fence`: same char, run at least as
2905/// long, nothing but trailing whitespace after. Local mirror of the parser's
2906/// `is_closing_fence` — so an inner fence of the *other* character (a ``` ``` ```
2907/// line inside a `~~~` block) does NOT close the outer fence.
2908fn fence_closes(line: &str, fence: (u8, usize)) -> bool {
2909    let (byte, open_len) = fence;
2910    let indent = line.len() - line.trim_start_matches(' ').len();
2911    if indent > 3 {
2912        return false;
2913    }
2914    let rest = &line[indent..];
2915    let run = rest.len() - rest.trim_start_matches(byte as char).len();
2916    if run < open_len {
2917        return false;
2918    }
2919    rest[run..].trim().is_empty()
2920}
2921
2922/// Detect the frontmatter INLINE flow-form wiki-link-list mis-encoding —
2923/// `attendees: [[[a]], [[b]]]` — and return the offending keys.
2924///
2925/// **Scoped to the inline value on the key line.** The SPEC's canonical
2926/// list-of-links form is the *unquoted YAML block sequence* (`- [[a]]` per
2927/// indented line), which is explicitly correct (SPEC § Linking) and MUST NOT be
2928/// flagged — even though, parsed whole, it nests the same way the rejected
2929/// inline flow form does. So this check looks only at the value written *inline*
2930/// after the colon: if it opens a flow sequence (`[…]`) whose parsed shape is a
2931/// nested sequence (a list whose items are themselves lists — the wiki-link-list
2932/// mis-encoding), it is flagged. A key with no inline value (the block form,
2933/// whose items live on continuation lines) is never inspected here.
2934///
2935/// Parsing the inline value (rather than a literal `starts_with("[[[")` text
2936/// test) is what catches the whitespace variant `attendees: [ [[a]] ]`, which
2937/// encodes the identical nested sequence but evaded the old prefix match.
2938fn detect_flow_form_link_lists(fm_yaml: &str) -> Vec<String> {
2939    let mut out = Vec::new();
2940    for line in fm_yaml.lines() {
2941        // Top-level key lines only (no indentation, not a comment or list dash).
2942        if line.starts_with(' ') || line.starts_with('\t') {
2943            continue;
2944        }
2945        let Some((key, rest)) = line.split_once(':') else {
2946            continue;
2947        };
2948        let key = key.trim();
2949        if key.is_empty()
2950            || key.starts_with('#')
2951            || key.starts_with('-')
2952            || !key
2953                .chars()
2954                .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
2955        {
2956            continue;
2957        }
2958        let rest = rest.trim();
2959        // Only an inline flow sequence (`[…]`) on the key line is a candidate;
2960        // the unquoted block form has an empty inline value and is never flagged.
2961        if !rest.starts_with('[') {
2962            continue;
2963        }
2964        // Parse just the inline value and test its shape: a list whose items are
2965        // themselves lists is the wiki-link-list mis-encoding (`[[[a]]]` parses
2966        // to `Seq[Seq[Seq[String]]]`; the scalar inline link `[[a]]` is only
2967        // `Seq[Seq[String]]` and is NOT flagged).
2968        if let Ok(Value::Sequence(items)) = serde_norway::from_str::<Value>(rest) {
2969            let nested = items.iter().any(|item| match item {
2970                Value::Sequence(inner) => inner.iter().any(|x| matches!(x, Value::Sequence(_))),
2971                _ => false,
2972            });
2973            if nested {
2974                out.push(key.to_string());
2975            }
2976        }
2977    }
2978    out
2979}
2980
2981/// True if a bare target (no `.md`) is a full store-relative path: it contains a
2982/// `/` and its first segment is a known layer.
2983fn is_full_store_path(bare: &str) -> bool {
2984    let mut parts = bare.splitn(2, '/');
2985    let first = parts.next().unwrap_or("");
2986    let has_rest = parts.next().map(|r| !r.is_empty()).unwrap_or(false);
2987    matches!(first, "sources" | "records") && has_rest
2988}
2989
2990/// True if a path contains only normal relative components. Validator inputs
2991/// come from user-authored markdown/JSON sidecars; never let absolute paths,
2992/// platform prefixes, or `..` turn a validation probe into a filesystem escape.
2993fn is_safe_store_relative_path(path: &Path) -> bool {
2994    let mut saw_component = false;
2995    for component in path.components() {
2996        match component {
2997            Component::Normal(_) => saw_component = true,
2998            Component::CurDir => {}
2999            Component::ParentDir | Component::RootDir | Component::Prefix(_) => return false,
3000        }
3001    }
3002    saw_component
3003}
3004
3005fn safe_md_target_rel(bare: &str) -> Option<PathBuf> {
3006    let path = Path::new(bare);
3007    if !is_safe_store_relative_path(path) {
3008        return None;
3009    }
3010    Some(PathBuf::from(format!("{bare}.md")))
3011}
3012
3013/// How a wiki-link / index-entry target resolves on disk.
3014enum TargetResolution {
3015    /// The target exists (either as the literal path or with a `.md` suffix).
3016    Exists,
3017    /// The target is a safe store-relative path but no file exists for it.
3018    Missing,
3019    /// The target escapes the store (absolute, `..`, prefix) — never probe it.
3020    Unsafe,
3021}
3022
3023/// Resolve a bare wiki-link / index-entry target the way the graph engine does
3024/// ([`crate::graph`]'s `resolve_existing`): try the path **as written** first
3025/// (so a link to a raw non-`.md` source file kept verbatim under `sources/` —
3026/// `[[sources/emails/x.eml]]`, `[[sources/contracts/y.pdf]]` — resolves to the
3027/// real file), then the `.md`-appended path (the common case for content
3028/// pages). Without trying the literal path first, a legal link to a raw source
3029/// file is wrongly flagged `WIKI_LINK_BROKEN` even though `graph backlinks`
3030/// resolves it.
3031fn resolve_wiki_target(store: &Store, bare: &str) -> TargetResolution {
3032    // The literal path and the `.md`-appended path share the same safety check
3033    // (`safe_md_target_rel` only differs by appending `.md`), so an unsafe bare
3034    // target is unsafe in both forms.
3035    if !is_safe_store_relative_path(Path::new(bare)) {
3036        return TargetResolution::Unsafe;
3037    }
3038    match resolved_target_abs(store, bare) {
3039        Some(_) => TargetResolution::Exists,
3040        None => TargetResolution::Missing,
3041    }
3042}
3043
3044/// The absolute on-disk path a bare wiki-link / index-entry target resolves to,
3045/// trying the literal path first, then `.md`-appended — mirroring the graph
3046/// engine. `None` when neither exists, or when the bare target escapes the store
3047/// (callers that need to distinguish unsafe from merely-missing use
3048/// [`resolve_wiki_target`]).
3049fn resolved_target_abs(store: &Store, bare: &str) -> Option<PathBuf> {
3050    if !is_safe_store_relative_path(Path::new(bare)) {
3051        return None;
3052    }
3053    // The literal path, as written (e.g. an `.eml`/`.pdf` source file kept
3054    // verbatim under `sources/`).
3055    let literal = store.root.join(bare);
3056    if literal.is_file() {
3057        return Some(literal);
3058    }
3059    // The `.md`-appended path (a content page referenced without its extension).
3060    let with_md = store.root.join(format!("{bare}.md"));
3061    if with_md.is_file() {
3062        return Some(with_md);
3063    }
3064    None
3065}
3066
3067/// True if a bare target path is under `prefix` (both `.md`-stripped).
3068fn path_under_prefix(bare: &str, prefix: &str) -> bool {
3069    let prefix = prefix.trim_end_matches('/');
3070    bare == prefix || bare.starts_with(&format!("{prefix}/"))
3071}
3072
3073/// The type-folder for a store-relative content path: `<layer>/<type-folder>`
3074/// (the folder directly under the layer; date-shards roll up to it). `None` for
3075/// files directly in a layer folder or outside the two layers.
3076fn type_folder_of(rel: &Path) -> Option<PathBuf> {
3077    let comps: Vec<&str> = rel.iter().filter_map(|s| s.to_str()).collect();
3078    if comps.len() < 3 {
3079        return None; // need layer/type-folder/file at minimum
3080    }
3081    if !matches!(comps[0], "sources" | "records") {
3082        return None;
3083    }
3084    Some(PathBuf::from(comps[0]).join(comps[1]))
3085}
3086
3087/// The layer dir a *loose* content file sits directly in (`records`/`sources`):
3088/// exactly two path components, the first a known layer. `None` for a file
3089/// inside a type-folder or outside any layer. Counterpart to the index crate's
3090/// `loose_layer_of`, kept local so `validate` needs no index internals.
3091fn loose_layer_dir(rel: &Path) -> Option<PathBuf> {
3092    let comps: Vec<&str> = rel.iter().filter_map(|s| s.to_str()).collect();
3093    if comps.len() != 2 || !matches!(comps[0], "sources" | "records") {
3094        return None;
3095    }
3096    Some(PathBuf::from(comps[0]))
3097}
3098
3099/// **SWEEP.** Walk every `.md` content file under `sources/`/`records/`,
3100/// returning store-relative paths to be parsed in full. Skips hidden dirs and
3101/// the index twin (`index.jsonl`). Used only by `validate_all`; the working-set
3102/// incoming-linker scan rides the embedded-ripgrep `Store::find_links_to_any`
3103/// (a single presence-only pass), so the loop default never walks-and-*parses*
3104/// the whole content tree.
3105///
3106/// **`log/` is NOT pruned here.** Only the *root-level* `log/` rotation archive
3107/// is reserved (`Store::is_in_log_dir` checks only the first path component);
3108/// the walk roots are the two layers, so the root archive is already out of
3109/// scope. A `log`-named folder *inside* a layer (e.g. `records/log/` — a
3110/// decision log) is real content (see `is_content_file`), so pruning every
3111/// `name == "log"` made `--all` silently skip those files — reporting fewer
3112/// errors than the default working-set scope on the same store.
3113fn walk_content_files(root: &Path) -> Vec<PathBuf> {
3114    let mut out = Vec::new();
3115    for layer in ["sources", "records"] {
3116        let base = root.join(layer);
3117        if !base.is_dir() {
3118            continue;
3119        }
3120        for entry in walkdir::WalkDir::new(&base)
3121            // Follow symlinks, matching the loop-default `md_walker`
3122            // (store.rs `follow_links(true)`): a content file that is a symlink
3123            // into the store, or that lives in a symlinked-in type-folder, is
3124            // checked by `dbmd validate` (the loop default rides `Store::walk` /
3125            // `walk_all_md`, both following symlinks). Without this the `--all`
3126            // sweep silently SKIPPED such files, so the authoritative superset
3127            // reported FEWER issues than the loop scope on the same store —
3128            // inverting the `--all`-is-the-superset contract. walkdir's loop
3129            // detection drops a symlink cycle (yields an Err that `.flatten()`
3130            // discards), so this cannot hang.
3131            .follow_links(true)
3132            .into_iter()
3133            .filter_entry(|e| {
3134                let name = e.file_name().to_str().unwrap_or("");
3135                !name.starts_with('.')
3136            })
3137            .flatten()
3138        {
3139            if !entry.file_type().is_file() {
3140                continue;
3141            }
3142            let name = entry.file_name().to_str().unwrap_or("");
3143            if name.ends_with(".md") && name != "index.md" {
3144                if let Ok(rel) = entry.path().strip_prefix(root) {
3145                    out.push(rel.to_path_buf());
3146                }
3147            }
3148        }
3149    }
3150    out.sort();
3151    out
3152}
3153
3154/// Every `index.md` under the store (root + layers + type-folders), as
3155/// store-relative paths. Used to detect orphan indexes. Like
3156/// [`walk_content_files`], a `log`-named folder *inside* a layer is real content
3157/// and its `index.md` is not pruned (only the root-level `log/` archive is
3158/// reserved, and the walk roots are the two layers, so it is already
3159/// out of scope).
3160fn walk_index_files(root: &Path) -> Vec<PathBuf> {
3161    let mut out = Vec::new();
3162    if root.join("index.md").is_file() {
3163        out.push(PathBuf::from("index.md"));
3164    }
3165    for layer in ["sources", "records"] {
3166        let base = root.join(layer);
3167        if !base.is_dir() {
3168            continue;
3169        }
3170        for entry in walkdir::WalkDir::new(&base)
3171            // Follow symlinks, matching the loop-default `md_walker`
3172            // (store.rs `follow_links(true)`): a content file that is a symlink
3173            // into the store, or that lives in a symlinked-in type-folder, is
3174            // checked by `dbmd validate` (the loop default rides `Store::walk` /
3175            // `walk_all_md`, both following symlinks). Without this the `--all`
3176            // sweep silently SKIPPED such files, so the authoritative superset
3177            // reported FEWER issues than the loop scope on the same store —
3178            // inverting the `--all`-is-the-superset contract. walkdir's loop
3179            // detection drops a symlink cycle (yields an Err that `.flatten()`
3180            // discards), so this cannot hang.
3181            .follow_links(true)
3182            .into_iter()
3183            .filter_entry(|e| {
3184                let name = e.file_name().to_str().unwrap_or("");
3185                !name.starts_with('.')
3186            })
3187            .flatten()
3188        {
3189            if entry.file_type().is_file() && entry.file_name().to_str() == Some("index.md") {
3190                if let Ok(rel) = entry.path().strip_prefix(root) {
3191                    out.push(rel.to_path_buf());
3192                }
3193            }
3194        }
3195    }
3196    out.sort();
3197    out
3198}
3199
3200/// A parsed `index.md` entry line: the wiki-link target, the optional summary
3201/// text after the `—`, and the 1-based line number.
3202struct IndexEntry {
3203    target: String,
3204    summary_text: Option<String>,
3205    line: u32,
3206}
3207
3208/// Parse the `- [[<path>]] — <summary>` entry lines of an `index.md`. Stops at a
3209/// `## More` footer (those lines aren't file entries). Root/layer entries with a
3210/// `|display` segment and a `(N)` count are parsed too — the target is the bare
3211/// path, the summary text is whatever follows the em dash.
3212fn parse_index_entries(text: &str) -> Vec<IndexEntry> {
3213    let mut out = Vec::new();
3214    let mut in_more = false;
3215    for (idx, line) in text.lines().enumerate() {
3216        let trimmed = line.trim_start();
3217        if trimmed.starts_with("## More") {
3218            in_more = true;
3219            continue;
3220        }
3221        if in_more {
3222            continue;
3223        }
3224        if !trimmed.starts_with("- ") {
3225            continue;
3226        }
3227        // Find the first `[[...]]`.
3228        let Some(open) = trimmed.find("[[") else {
3229            continue;
3230        };
3231        let Some(close_rel) = trimmed[open + 2..].find("]]") else {
3232            continue;
3233        };
3234        let inner = &trimmed[open + 2..open + 2 + close_rel];
3235        let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
3236
3237        // Summary text: whatever follows the first em dash (`—`) or ` - `.
3238        let after = &trimmed[open + 2 + close_rel + 2..];
3239        let summary_text = extract_index_entry_summary(after);
3240
3241        out.push(IndexEntry {
3242            target,
3243            summary_text,
3244            line: (idx + 1) as u32,
3245        });
3246    }
3247    out
3248}
3249
3250/// Pull the summary portion out of the text trailing an index entry's
3251/// wiki-link: drop a leading `(N files)` count, then the `—`/`-` separator, then
3252/// strip a trailing `  ·  #tag` suffix **only when it is a genuine tag block**
3253/// (so a literal `·` inside the summary text is preserved, not mistaken for the
3254/// renderer's tag separator).
3255fn extract_index_entry_summary(after: &str) -> Option<String> {
3256    let mut s = after.trim();
3257    // Drop a leading "(N ...)" count segment, if present.
3258    if s.starts_with('(') {
3259        if let Some(close) = s.find(')') {
3260            s = s[close + 1..].trim_start();
3261        }
3262    }
3263    // Require an em dash or hyphen separator before the summary.
3264    let s = if let Some(rest) = s.strip_prefix('—') {
3265        rest.trim()
3266    } else if let Some(rest) = s.strip_prefix('-') {
3267        rest.trim()
3268    } else {
3269        return None;
3270    };
3271    if s.is_empty() {
3272        return None;
3273    }
3274    // Strip a trailing tag block — but ONLY when it matches the EXACT delimiter
3275    // the renderer emits: `  ·  #tag #tag` (a *double*-spaced middot, per
3276    // `crate::index::format_md_entry`'s `format!("  ·  {tags}")`), dropped when
3277    // the file has no tags. The previous code also accepted a *single*-spaced
3278    // ` · ` separator, which collided with a legal summary whose own text ends
3279    // in a single-spaced middot-plus-hashtag tail — e.g. a tagless file with
3280    // `summary: "Standup notes · #standup"`. The renderer round-trips that
3281    // summary verbatim (no tag block, since there are no tags), but the loose
3282    // strip mistook the ` · #standup` for the renderer's tag suffix, compared
3283    // `"Standup notes"` against the file's full summary, and emitted a spurious
3284    // `INDEX_SUMMARY_MISMATCH` that `dbmd index rebuild` could never fix
3285    // (rebuild regenerates the identical line). Matching the renderer's exact
3286    // double-spaced delimiter makes the comparison round-trip. `rsplit_once`
3287    // matches from the right so only the real trailing tag block is considered.
3288    let s = match s.rsplit_once("  ·  ") {
3289        Some((summary, tags)) if is_tag_suffix(tags) => summary.trim(),
3290        _ => s,
3291    };
3292    Some(s.to_string())
3293}
3294
3295/// True if `s` is a non-empty tag block: one or more whitespace-separated tokens
3296/// each starting with `#`, the exact shape the index renderer appends after the
3297/// `·` separator (`crate::index::format_md_entry`). Used to distinguish the
3298/// renderer's `  ·  #tag` suffix from a literal `·` inside the summary text.
3299fn is_tag_suffix(s: &str) -> bool {
3300    let mut any = false;
3301    for tok in s.split_whitespace() {
3302        if !tok.starts_with('#') || tok.len() < 2 {
3303            return false;
3304        }
3305        any = true;
3306    }
3307    any
3308}
3309
3310/// Parse a `log.md` entry header `## [YYYY-MM-DD HH:MM] <kind> | <object>`.
3311/// Returns `(timestamp, kind, object)`; `None` if the timestamp is unparseable
3312/// or the header isn't well-formed.
3313fn parse_log_header(line: &str) -> Option<(DateTime<FixedOffset>, String, Option<String>)> {
3314    let rest = line.strip_prefix("## [")?;
3315    let close = rest.find(']')?;
3316    let ts_str = &rest[..close];
3317    let tail = rest[close + 1..].trim();
3318
3319    // Parse `YYYY-MM-DD HH:MM` (the SPEC header form) as a naive local time and
3320    // attach a zero offset — the log header carries minute precision, no zone.
3321    let naive = NaiveDateTime::parse_from_str(ts_str.trim(), "%Y-%m-%d %H:%M").ok()?;
3322    let offset = FixedOffset::east_opt(0)?;
3323    let ts = naive.and_local_timezone(offset).single()?;
3324
3325    // kind | object
3326    let (kind, object) = match tail.split_once('|') {
3327        Some((k, o)) => {
3328            let o = o.trim();
3329            (
3330                k.trim().to_string(),
3331                if o.is_empty() {
3332                    None
3333                } else {
3334                    Some(o.to_string())
3335                },
3336            )
3337        }
3338        None => (tail.to_string(), None),
3339    };
3340    if kind.is_empty() {
3341        return None;
3342    }
3343    Some((ts, kind, object))
3344}
3345
3346/// Every log file that holds entries for the working-set scan: the active
3347/// `log.md` plus every `log/<YYYY-MM>.md` archive. [`Log::append`] rotates
3348/// strictly-prior-month entries into the archives, so the active file alone is
3349/// NOT the full timeline — both the last `validate` cutoff and a changed-but-
3350/// unvalidated object can live in an archive after a month rollover. Reading the
3351/// archives here keeps the working-set readers in sync with the rest of the log
3352/// layer (`Log::since`/`Log::tail`), which deliberately cross archives, and
3353/// prevents `dbmd validate` from silently skipping archived changed files. Reads
3354/// only log headers, never the content store, so the loop budget is preserved.
3355fn log_files_for_working_set(store: &Store) -> Vec<PathBuf> {
3356    let mut files = vec![store.root.join("log.md")];
3357    let archive_dir = store.root.join("log");
3358    if let Ok(entries) = std::fs::read_dir(&archive_dir) {
3359        let mut archives: Vec<PathBuf> = entries
3360            .flatten()
3361            .map(|e| e.path())
3362            .filter(|p| {
3363                p.is_file()
3364                    && p.file_name()
3365                        .and_then(|s| s.to_str())
3366                        .and_then(|n| n.strip_suffix(".md"))
3367                        .is_some_and(is_year_month_archive)
3368            })
3369            .collect();
3370        // Deterministic order (oldest month first); the callers fold across all
3371        // files so order doesn't affect the result, but a stable order keeps the
3372        // scan reproducible.
3373        archives.sort();
3374        files.extend(archives);
3375    }
3376    files
3377}
3378
3379/// True if `s` looks like a `YYYY-MM` archive stem (4 digits, `-`, 2 digits) —
3380/// the `log/<YYYY-MM>.md` naming the rotation in [`crate::log`] emits.
3381fn is_year_month_archive(s: &str) -> bool {
3382    let b = s.as_bytes();
3383    b.len() == 7
3384        && b[..4].iter().all(u8::is_ascii_digit)
3385        && b[4] == b'-'
3386        && b[5..7].iter().all(u8::is_ascii_digit)
3387}
3388
3389/// The timestamp of the most recent `validate` entry across the active `log.md`
3390/// **and** the `log/<YYYY-MM>.md` archives — the default working-set cutoff.
3391/// Reads only headers; never the whole store. Archive-aware so a `validate`
3392/// entry that rotated into an archive after a month rollover still anchors the
3393/// cutoff (without this, the cutoff silently resets to `None`).
3394fn last_validate_at(store: &Store) -> Option<DateTime<FixedOffset>> {
3395    let mut latest: Option<DateTime<FixedOffset>> = None;
3396    for file in log_files_for_working_set(store) {
3397        let Ok(text) = std::fs::read_to_string(&file) else {
3398            continue;
3399        };
3400        for line in text.lines() {
3401            if !line.starts_with("## [") {
3402                continue;
3403            }
3404            if let Some((ts, kind, _)) = parse_log_header(line) {
3405                if kind == "validate" {
3406                    latest = Some(match latest {
3407                        Some(p) if p >= ts => p,
3408                        _ => ts,
3409                    });
3410                }
3411            }
3412        }
3413    }
3414    latest
3415}
3416
3417/// The set of content objects changed since `cutoff`, read from log entries
3418/// whose kind mutates a file. When `cutoff` is `None`, every mutating entry
3419/// counts (no prior validate window). Returns store-relative `.md` paths.
3420///
3421/// Scans the active `log.md` **and** every `log/<YYYY-MM>.md` archive: after a
3422/// month rollover [`Log::append`] rotates prior-month entries out of the active
3423/// file, so an object changed-but-never-validated in a prior month lives only in
3424/// an archive. Reading the archives here is what keeps `dbmd validate` from
3425/// silently skipping those files. Reads only log headers, never the content
3426/// store.
3427fn changed_objects_since(
3428    store: &Store,
3429    cutoff: Option<DateTime<FixedOffset>>,
3430) -> BTreeSet<PathBuf> {
3431    let mut out = BTreeSet::new();
3432    for file in log_files_for_working_set(store) {
3433        let Ok(text) = std::fs::read_to_string(&file) else {
3434            continue;
3435        };
3436        for line in text.lines() {
3437            if !line.starts_with("## [") {
3438                continue;
3439            }
3440            let Some((ts, kind, object)) = parse_log_header(line) else {
3441                continue;
3442            };
3443            if let Some(c) = cutoff {
3444                if ts < c {
3445                    continue;
3446                }
3447            }
3448            if !matches!(
3449                kind.as_str(),
3450                "create" | "update" | "ingest" | "rename" | "delete" | "link"
3451            ) {
3452                continue;
3453            }
3454            if let Some(obj) = object {
3455                // The object slot is a store-relative path (or a wiki-link target).
3456                let bare = obj
3457                    .trim()
3458                    .trim_start_matches("[[")
3459                    .trim_end_matches("]]")
3460                    .split('|')
3461                    .next()
3462                    .unwrap_or("")
3463                    .trim()
3464                    .trim_end_matches(".md")
3465                    .to_string();
3466                if bare.is_empty() {
3467                    continue;
3468                }
3469                // Containment: the object slot is a log-header field that can
3470                // carry a `..`/absolute/prefix path (a hand-edited or
3471                // merge-malformed log line). Route it through the same safety gate
3472                // every other disk-touching validator path uses
3473                // (`safe_md_target_rel`, which `link_target_type` already applies)
3474                // so a `records/../../leaky` object cannot make
3475                // `validate_working_set` read + frontmatter-report on a file
3476                // OUTSIDE the store root. An unsafe object is dropped from the
3477                // changed set rather than probed.
3478                if let Some(rel) = safe_md_target_rel(&bare) {
3479                    out.insert(rel);
3480                }
3481            }
3482        }
3483    }
3484    out
3485}
3486
3487/// The result of the [`derived_from_ignored_type`] policy check: the
3488/// `derived_from` target that resolves to an ignored-type record, plus that
3489/// record's type. Carries exactly what both the validate finding and the
3490/// write-time warning need to render their message.
3491#[derive(Debug, Clone, PartialEq, Eq)]
3492pub struct DerivedFromIgnored {
3493    /// The `derived_from` wiki-link target as written (bare store-relative path,
3494    /// no `.md`).
3495    pub target: String,
3496    /// The resolved `type` of that target, which is present in
3497    /// `store.config.ignored_types`.
3498    pub target_type: String,
3499}
3500
3501/// **The single authoritative `### Ignored types` derivation check.** Decides
3502/// whether a conclusion record derives from an ignored-type record: the
3503/// `meta-type` must be `conclusion`, `### Ignored types` must be non-empty, and
3504/// some `derived_from` target must resolve to a record whose `type` is in
3505/// `ignored_types`. Returns the first such target (and its type), or `None`.
3506///
3507/// Both surfaces call this so the policy lives in exactly one place:
3508/// [`check_content_file`] (read side — `dbmd validate`) feeds it the
3509/// `derived_from` targets it scanned from the raw frontmatter, and the write
3510/// surface (`dbmd write`) feeds it the targets from the composed frontmatter.
3511/// The link *extraction* differs per surface (text-scan with line numbers vs.
3512/// the parsed `Frontmatter`); the *decision* — type gate, target-type
3513/// resolution, and `ignored_types` membership — does not.
3514pub fn derived_from_ignored_type<I, S>(
3515    store: &Store,
3516    meta_type: &str,
3517    derived_from_targets: I,
3518) -> Option<DerivedFromIgnored>
3519where
3520    I: IntoIterator<Item = S>,
3521    S: AsRef<str>,
3522{
3523    if meta_type != "conclusion" || store.config.ignored_types.is_empty() {
3524        return None;
3525    }
3526    for target in derived_from_targets {
3527        let target = target.as_ref();
3528        if let Some(target_type) = link_target_type(store, target) {
3529            if store.config.ignored_types.contains(&target_type) {
3530                return Some(DerivedFromIgnored {
3531                    target: target.to_string(),
3532                    target_type,
3533                });
3534            }
3535        }
3536    }
3537    None
3538}
3539
3540/// Resolve the `type` of a wiki-link target file (bare, no `.md`), or `None`.
3541fn link_target_type(store: &Store, target: &str) -> Option<String> {
3542    let bare = target.trim_end_matches(".md");
3543    let abs = store.root.join(safe_md_target_rel(bare)?);
3544    let text = std::fs::read_to_string(&abs).ok()?;
3545    let (yaml, _, _) = split_frontmatter(&text)?;
3546    let value: Value = serde_norway::from_str(&yaml).ok()?;
3547    if let Value::Mapping(m) = value {
3548        m.get(Value::String("type".into())).and_then(scalar_string)
3549    } else {
3550        None
3551    }
3552}
3553
3554// ── Shape validators ─────────────────────────────────────────────────────────
3555
3556/// True if a string is RFC3339 / ISO-8601 with a time + zone (the
3557/// `created`/`updated` contract: `2026-05-27T08:00:00-07:00`).
3558fn is_iso8601(s: &str) -> bool {
3559    DateTime::parse_from_rfc3339(s.trim()).is_ok()
3560}
3561
3562/// True if a string is an ISO-8601 *date* (`2026-05-27`) or a full RFC3339
3563/// datetime. Type-specific date fields (`expense.date`, `contact.last_touch`)
3564/// accept the date-only form per the SPEC's worked example.
3565fn is_iso8601_date_or_datetime(s: &str) -> bool {
3566    let s = s.trim();
3567    if DateTime::parse_from_rfc3339(s).is_ok() {
3568        return true;
3569    }
3570    chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_ok()
3571}
3572
3573/// True for `<local>@<domain>` with a non-empty local part and a dotted domain.
3574/// There must be exactly one `@`: a domain that still contains an `@` after the
3575/// split (the common double-`@` typo `sarah@@acme.com`, or `a@b@c.com`) is
3576/// rejected — without this the domain `@acme.com` passed every other check.
3577fn is_email(s: &str) -> bool {
3578    let s = s.trim();
3579    let Some((local, domain)) = s.split_once('@') else {
3580        return false;
3581    };
3582    !local.is_empty()
3583        && !domain.contains('@')
3584        && domain.contains('.')
3585        && !domain.starts_with('.')
3586        && !domain.ends_with('.')
3587        && !domain.contains(' ')
3588        && !local.contains(' ')
3589}
3590
3591/// True for a currency amount: an optional symbol or 3-letter ISO code, then a
3592/// plain decimal number with optional thousands separators and ≤ 2 decimals.
3593///
3594/// The numeric part is validated by hand (not `f64::parse`) so the non-numeric
3595/// floats `f64` accepts — `inf`, `-inf`, `NaN`, and `1e3`-style exponents — are
3596/// rejected, and the ≤ 2-decimal rule is actually enforced.
3597fn is_currency(s: &str) -> bool {
3598    let mut t = s.trim();
3599    // Strip a leading currency symbol …
3600    for sym in ["$", "€", "£", "¥"] {
3601        if let Some(rest) = t.strip_prefix(sym) {
3602            t = rest.trim_start();
3603            break;
3604        }
3605    }
3606    // … or a leading 3-letter ISO-4217-ish code (`USD 100`, `EUR 9.50`). The
3607    // code must be exactly three ASCII letters and separated from the number by
3608    // whitespace, so a bare `USD` with no amount still fails.
3609    if let Some((head, rest)) = t.split_once(char::is_whitespace) {
3610        if head.len() == 3 && head.chars().all(|c| c.is_ascii_alphabetic()) {
3611            t = rest.trim_start();
3612        }
3613    }
3614
3615    let cleaned: String = t.chars().filter(|c| *c != ',').collect();
3616    is_plain_amount(cleaned.trim())
3617}
3618
3619/// True for a bare decimal amount: optional sign, ≥ 1 digit, an optional
3620/// fractional part of 1–2 digits. No exponents, no `inf`/`NaN`, no empty string.
3621fn is_plain_amount(s: &str) -> bool {
3622    let digits = s.strip_prefix(['+', '-']).unwrap_or(s);
3623    let (int_part, frac_part) = match digits.split_once('.') {
3624        Some((i, f)) => (i, Some(f)),
3625        None => (digits, None),
3626    };
3627    if int_part.is_empty() || !int_part.bytes().all(|b| b.is_ascii_digit()) {
3628        return false;
3629    }
3630    match frac_part {
3631        None => true,
3632        Some(f) => (1..=2).contains(&f.len()) && f.bytes().all(|b| b.is_ascii_digit()),
3633    }
3634}
3635
3636/// True for an http(s) URL: a recognized scheme prefix with at least one
3637/// character after it. The length guard uses the *matched* scheme's own length,
3638/// so a single-character host on the shorter `http://` scheme (`http://x`, 8
3639/// bytes — e.g. an intranet/container hostname) is accepted; a bare scheme with
3640/// nothing after it (`http://`, `https://`) is rejected.
3641fn is_url(s: &str) -> bool {
3642    let s = s.trim();
3643    for scheme in ["http://", "https://"] {
3644        if let Some(rest) = s.strip_prefix(scheme) {
3645            return !rest.is_empty();
3646        }
3647    }
3648    false
3649}
3650
3651/// A short, deterministic suggestion for a `SCHEMA_SHAPE_MISMATCH`.
3652fn shape_suggestion(shape: Shape) -> String {
3653    match shape {
3654        Shape::String => "use a scalar string".into(),
3655        Shape::Int => "use an integer".into(),
3656        Shape::Bool => "use `true` or `false`".into(),
3657        Shape::Date => "use an ISO-8601 date, e.g. 2026-05-27".into(),
3658        Shape::Email => "use a `<local>@<domain>` address".into(),
3659        Shape::Currency => "use a numeric amount, e.g. 1234.56".into(),
3660        Shape::Url => "use an http(s) URL".into(),
3661    }
3662}
3663
3664/// Suggest a full-path rewrite for a short-form wiki-link. Without the layer we
3665/// can't know the folder, so the suggestion is generic but actionable.
3666fn short_form_suggestion(bare: &str) -> Option<String> {
3667    Some(format!(
3668        "use a full store-relative path, e.g. [[records/contacts/{}]]",
3669        slugish(bare)
3670    ))
3671}
3672
3673/// A filesystem-ish leaf for a plain string (lowercase, spaces → hyphens).
3674fn slugish(s: &str) -> String {
3675    s.trim()
3676        .to_lowercase()
3677        .chars()
3678        .map(|c| if c.is_whitespace() { '-' } else { c })
3679        .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '/' || *c == '_')
3680        .collect()
3681}
3682
3683/// Cross-file asset-manifest integrity (the `--all` sweep). Text-only: it never
3684/// hashes a byte or reads an asset file's contents — byte presence and hash
3685/// correctness are `dbmd assets verify`, not `validate`, so a fresh clone with
3686/// no restored bytes still passes. Cross-checks `assets.jsonl` against every
3687/// content file's `asset`/`assets` declarations.
3688fn check_assets(store: &Store, parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
3689    use crate::assets;
3690
3691    let manifest_rel = Path::new(assets::MANIFEST_FILE);
3692    let manifest_abs = store.root.join(assets::MANIFEST_FILE);
3693
3694    // Lenient manifest read: a malformed line is reported, not fatal.
3695    let mut manifest: BTreeMap<String, assets::AssetRecord> = BTreeMap::new();
3696    if let Ok(text) = std::fs::read_to_string(&manifest_abs) {
3697        for (i, line) in text.lines().enumerate() {
3698            if line.trim().is_empty() {
3699                continue;
3700            }
3701            match serde_json::from_str::<assets::AssetRecord>(line) {
3702                Ok(rec) => {
3703                    manifest.insert(rec.path.clone(), rec);
3704                }
3705                Err(e) => push(
3706                    issues,
3707                    Severity::Error,
3708                    codes::ASSET_MANIFEST_MALFORMED,
3709                    manifest_rel,
3710                    Some((i as u32) + 1),
3711                    None,
3712                    format!("invalid {} record: {e}", assets::MANIFEST_FILE),
3713                    Some("run `dbmd assets scan` to rebuild the manifest".to_string()),
3714                    vec![],
3715                ),
3716            }
3717        }
3718    }
3719
3720    // Per-wrapper declarations: every declared asset must be in the manifest and
3721    // must not point at a markdown content file.
3722    let mut declared: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
3723    for (rel, p) in parsed {
3724        let Some(map) = &p.fm else {
3725            continue;
3726        };
3727        for decl in assets::declarations_from_yaml_map(map) {
3728            let norm = match assets::normalize_asset_path(&decl.path) {
3729                Ok(n) => n,
3730                Err(_) => continue, // a bad declared path is surfaced by `scan`, not here
3731            };
3732            declared.insert(norm.clone());
3733            let is_md = Path::new(&norm)
3734                .extension()
3735                .and_then(|e| e.to_str())
3736                .map(|e| e.eq_ignore_ascii_case("md"))
3737                .unwrap_or(false);
3738            if is_md {
3739                push(
3740                    issues,
3741                    Severity::Warning,
3742                    codes::ASSET_PATH_IS_CONTENT,
3743                    rel,
3744                    None,
3745                    Some("asset".to_string()),
3746                    format!("asset path `{norm}` points at a markdown content file"),
3747                    Some("assets are raw binaries; reference a non-markdown path".to_string()),
3748                    vec![PathBuf::from(&norm)],
3749                );
3750            }
3751            if !manifest.contains_key(&norm) {
3752                push(
3753                    issues,
3754                    Severity::Error,
3755                    codes::ASSET_UNDECLARED,
3756                    rel,
3757                    None,
3758                    Some("asset".to_string()),
3759                    format!(
3760                        "references asset `{norm}` with no record in {}",
3761                        assets::MANIFEST_FILE
3762                    ),
3763                    Some("run `dbmd assets scan` to catalog it".to_string()),
3764                    vec![PathBuf::from(&norm)],
3765                );
3766            }
3767        }
3768    }
3769
3770    // Per-record: wrapper existence + orphan detection.
3771    for (path, rec) in &manifest {
3772        for w in &rec.wrappers {
3773            if !store.root.join(w).is_file() {
3774                push(
3775                    issues,
3776                    Severity::Error,
3777                    codes::ASSET_WRAPPER_BROKEN,
3778                    Path::new(path),
3779                    None,
3780                    None,
3781                    format!("manifest record for `{path}` names a missing wrapper `{w}`"),
3782                    Some("run `dbmd assets scan` to reconcile the manifest".to_string()),
3783                    vec![PathBuf::from(w)],
3784                );
3785            }
3786        }
3787        if !declared.contains(path) {
3788            push(
3789                issues,
3790                Severity::Warning,
3791                codes::ASSET_MANIFEST_ORPHAN,
3792                Path::new(path),
3793                None,
3794                None,
3795                format!(
3796                    "`{path}` is in {} but no wrapper references it",
3797                    assets::MANIFEST_FILE
3798                ),
3799                Some("run `dbmd assets scan` to drop the orphan, or add a wrapper".to_string()),
3800                vec![],
3801            );
3802        }
3803    }
3804}
3805
3806/// Push a fully-formed [`Issue`].
3807#[allow(clippy::too_many_arguments)]
3808fn push(
3809    issues: &mut Vec<Issue>,
3810    severity: Severity,
3811    code: &'static str,
3812    file: &Path,
3813    line: Option<u32>,
3814    key: Option<String>,
3815    message: String,
3816    suggestion: Option<String>,
3817    related: Vec<PathBuf>,
3818) {
3819    issues.push(Issue {
3820        severity,
3821        code,
3822        file: file.to_path_buf(),
3823        line,
3824        key,
3825        message,
3826        suggestion,
3827        related,
3828    });
3829}
3830
3831/// 1-based line of a top-level frontmatter key inside the YAML block, offset to
3832/// the file (the YAML starts at file line 2). `None` if not found.
3833fn fm_key_line(fm_yaml: &str, key: &str) -> Option<u32> {
3834    for (i, line) in fm_yaml.lines().enumerate() {
3835        let trimmed = line.trim_start();
3836        // A top-level key line: `key:` with no leading list dash.
3837        if let Some(rest) = trimmed.strip_prefix(key) {
3838            if rest.starts_with(':') && line.starts_with(key) {
3839                // +2: file line 1 is the opening `---`, YAML line 0 → file line 2.
3840                return Some((i as u32) + 2);
3841            }
3842        }
3843    }
3844    None
3845}
3846
3847/// The line a *field-absence* issue (a required key that is missing entirely)
3848/// anchors to: the key's line when present, else line `1` — the frontmatter
3849/// block's opening `---`. A missing key has no line of its own; anchoring it to
3850/// the block top gives the agent (and the `EXPECTED` golden) a stable, non-null
3851/// line to point at instead of an unhelpful `null`.
3852fn fm_key_line_or_top(fm_yaml: &str, key: &str) -> Option<u32> {
3853    fm_key_line(fm_yaml, key).or(Some(1))
3854}
3855
3856/// A stable sort order for issues: by file, then line, then code. Keeps `--json`
3857/// output deterministic across runs.
3858fn issue_order(a: &Issue, b: &Issue) -> std::cmp::Ordering {
3859    a.file
3860        .cmp(&b.file)
3861        .then(a.line.cmp(&b.line))
3862        .then(a.code.cmp(b.code))
3863        .then(a.key.cmp(&b.key))
3864}
3865
3866// ═════════════════════════════════════════════════════════════════════════════
3867//  Tests
3868// ═════════════════════════════════════════════════════════════════════════════
3869
3870#[cfg(test)]
3871mod tests {
3872    use super::*;
3873    use crate::parser::{Config, FieldSpec};
3874    use std::fs;
3875    use tempfile::TempDir;
3876
3877    #[test]
3878    fn split_frontmatter_tolerates_leading_bom() {
3879        // Regression (finding #19 cross-module): a UTF-8 BOM before the opening
3880        // fence must not make validate treat the file as frontmatter-less while
3881        // the catalog indexes it. Pre-fix `first.trim_end() != "---"` was true
3882        // for `\u{feff}---` and the function returned None.
3883        let text = "\u{feff}---\ntype: contact\nsummary: hi\n---\nbody\n";
3884        let parsed = split_frontmatter(text);
3885        assert!(
3886            parsed.is_some(),
3887            "a leading BOM must not hide frontmatter from validate"
3888        );
3889        let (yaml, body, close_line) = parsed.unwrap();
3890        assert_eq!(yaml, "type: contact\nsummary: hi\n");
3891        assert_eq!(body, "body");
3892        assert_eq!(close_line, 4, "BOM is inline on line 1, not a new line");
3893    }
3894
3895    /// A test store builder over a real tempdir. Every helper writes real files
3896    /// so the assertions exercise real behavior, not mocks.
3897    struct Fixture {
3898        dir: TempDir,
3899        config: Config,
3900    }
3901
3902    impl Fixture {
3903        /// A fresh store with a **valid** `DB.md` (the identity contract:
3904        /// `type: db-md` + `scope` + `owner`) and the two layer dirs. A valid
3905        /// DB.md keeps `check_db_md` silent so a "clean store" fixture is truly
3906        /// clean; tests that want a broken DB.md write their own via `write`.
3907        fn new() -> Self {
3908            let dir = TempDir::new().unwrap();
3909            fs::write(
3910                dir.path().join("DB.md"),
3911                "---\ntype: db-md\nscope: company\nowner: Test\n---\n",
3912            )
3913            .unwrap();
3914            for layer in ["sources", "records"] {
3915                fs::create_dir_all(dir.path().join(layer)).unwrap();
3916            }
3917            Fixture {
3918                dir,
3919                config: Config::default(),
3920            }
3921        }
3922
3923        /// A store with no `DB.md` marker.
3924        fn bare() -> Self {
3925            let dir = TempDir::new().unwrap();
3926            Fixture {
3927                dir,
3928                config: Config::default(),
3929            }
3930        }
3931
3932        /// Write a file at a store-relative path, creating parent dirs.
3933        fn write(&self, rel: &str, contents: &str) {
3934            let abs = self.dir.path().join(rel);
3935            fs::create_dir_all(abs.parent().unwrap()).unwrap();
3936            fs::write(abs, contents).unwrap();
3937        }
3938
3939        fn store(&self) -> Store {
3940            Store {
3941                root: self.dir.path().to_path_buf(),
3942                config: self.config.clone(),
3943            }
3944        }
3945
3946        fn store_all(&self) -> Vec<Issue> {
3947            validate_all(&self.store()).unwrap()
3948        }
3949
3950        /// Write the canonical `index.md` + `index.jsonl` at every level via the
3951        /// real builder ([`crate::index::Index::rebuild_all`]) — the same
3952        /// projection a `dbmd index rebuild` produces. Use this (rather than a
3953        /// hand-typed sidecar line) whenever a test asserts a *clean* store, so
3954        /// the sidecar carries the COMPLETE per-field projection and the fixture
3955        /// can't silently drift from what the index writer emits.
3956        fn rebuild_indexes(&self) {
3957            crate::index::Index::rebuild_all(&self.store()).unwrap();
3958        }
3959    }
3960
3961    /// True if any issue has this code.
3962    fn has(issues: &[Issue], code: &str) -> bool {
3963        issues.iter().any(|i| i.code == code)
3964    }
3965
3966    /// Count issues with a code.
3967    fn count(issues: &[Issue], code: &str) -> usize {
3968        issues.iter().filter(|i| i.code == code).count()
3969    }
3970
3971    /// The first issue with a code, or panic.
3972    fn find<'a>(issues: &'a [Issue], code: &str) -> &'a Issue {
3973        issues
3974            .iter()
3975            .find(|i| i.code == code)
3976            .unwrap_or_else(|| panic!("expected an issue with code {code}; got {issues:#?}"))
3977    }
3978
3979    /// A minimal valid `contact` body for reuse.
3980    fn valid_contact(summary: &str) -> String {
3981        format!(
3982            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{summary}\"\nname: A\n---\n\n# A\n"
3983        )
3984    }
3985
3986    // ── store marker ──────────────────────────────────────────────────────────
3987
3988    #[test]
3989    fn not_a_store_when_db_md_absent() {
3990        let fx = Fixture::bare();
3991        let issues = fx.store_all();
3992        assert_eq!(issues.len(), 1, "only NOT_A_STORE expected: {issues:#?}");
3993        assert_eq!(issues[0].code, codes::NOT_A_STORE);
3994        assert!(issues[0].is_error());
3995    }
3996
3997    #[test]
3998    fn working_set_also_reports_not_a_store() {
3999        let fx = Fixture::bare();
4000        let issues = validate_working_set(&fx.store(), None).unwrap();
4001        assert!(has(&issues, codes::NOT_A_STORE));
4002    }
4003
4004    #[test]
4005    fn clean_store_has_no_issues() {
4006        let fx = Fixture::new();
4007        fx.write("records/contacts/a.md", &valid_contact("A contact"));
4008        // Build the canonical indexes (complete per-field jsonl included) the
4009        // same way `dbmd index rebuild` does, so a freshly-rebuilt store is
4010        // proven clean across every projected field, not just summary/type.
4011        fx.rebuild_indexes();
4012        let issues = fx.store_all();
4013        assert!(
4014            issues.is_empty(),
4015            "expected a clean store, got: {issues:#?}"
4016        );
4017    }
4018
4019    // ── meta-type closed enum ─────────────────────────────────────────────────
4020
4021    /// Regression (adversarial review): a NON-SCALAR `meta-type` (a YAML list or
4022    /// mapping) must be rejected with `FM_BAD_META_TYPE`, not silently slip past
4023    /// the enum check (and then get reclassified as the default `fact`). Pre-fix
4024    /// the check was gated on `and_then(scalar_string)`, which returned `None`
4025    /// for a sequence/mapping and short-circuited the whole branch.
4026    #[test]
4027    fn meta_type_enum_is_closed_for_scalars_and_non_scalars() {
4028        let fx = Fixture::new();
4029        let body = |mt: &str| {
4030            format!(
4031                "---\ntype: profile\nmeta-type: {mt}\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n"
4032            )
4033        };
4034
4035        // Valid enum members + absent (default fact) → no FM_BAD_META_TYPE.
4036        for ok in ["fact", "operational", "conclusion"] {
4037            fx.write("records/profiles/ok.md", &body(ok));
4038            let issues = validate_working_set(&fx.store(), None).unwrap();
4039            assert!(
4040                !has(&issues, codes::FM_BAD_META_TYPE),
4041                "`meta-type: {ok}` must be accepted; got {issues:#?}"
4042            );
4043        }
4044        fx.write(
4045            "records/profiles/absent.md",
4046            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n",
4047        );
4048        assert!(
4049            !has(
4050                &validate_working_set(&fx.store(), None).unwrap(),
4051                codes::FM_BAD_META_TYPE
4052            ),
4053            "an absent meta-type is the default `fact` and must be accepted"
4054        );
4055
4056        // Scalar-but-wrong, AND non-scalar (list / mapping) → FM_BAD_META_TYPE.
4057        for bad in ["xyz", "Fact", "[fact, conclusion]", "{kind: conclusion}"] {
4058            let fx2 = Fixture::new();
4059            fx2.write("records/profiles/bad.md", &body(bad));
4060            let issues = validate_working_set(&fx2.store(), None).unwrap();
4061            assert!(
4062                has(&issues, codes::FM_BAD_META_TYPE),
4063                "`meta-type: {bad}` must be rejected with FM_BAD_META_TYPE; got {issues:#?}"
4064            );
4065        }
4066    }
4067
4068    // ── DB.md structure ───────────────────────────────────────────────────────
4069
4070    /// The `Fixture::new` DB.md is valid → no `DB_MD_*` issue. This pins the
4071    /// "valid identity file is silent" half (a bug that flagged a valid DB.md
4072    /// would fail here).
4073    #[test]
4074    fn valid_db_md_emits_no_structure_issue() {
4075        let fx = Fixture::new();
4076        let issues = fx.store_all();
4077        assert!(
4078            !has(&issues, codes::DB_MD_BAD_TYPE)
4079                && !has(&issues, codes::DB_MD_MISSING_FIELD)
4080                && !has(&issues, codes::DB_MD_UNKNOWN_SECTION),
4081            "a valid DB.md (type: db-md + scope + owner, recognized sections) is silent: {issues:#?}"
4082        );
4083    }
4084
4085    /// A DB.md whose `type:` isn't `db-md` → `DB_MD_BAD_TYPE`, keyed on `type`,
4086    /// anchored to the `type:` line (file line 2). Failing to read the type, or
4087    /// accepting a non-`db-md` type, breaks this.
4088    #[test]
4089    fn db_md_wrong_type_is_error() {
4090        let fx = Fixture::new();
4091        fx.write("DB.md", "---\ntype: notes\nscope: company\nowner: T\n---\n");
4092        let issues = fx.store_all();
4093        let i = find(&issues, codes::DB_MD_BAD_TYPE);
4094        assert!(i.is_error());
4095        assert_eq!(i.file, PathBuf::from("DB.md"));
4096        assert_eq!(i.key.as_deref(), Some("type"));
4097        assert_eq!(i.line, Some(2), "anchors to the `type:` line");
4098    }
4099
4100    /// A DB.md missing `scope` and `owner` → one `DB_MD_MISSING_FIELD` per
4101    /// absent field, each keyed on its field name, anchored to the block top.
4102    #[test]
4103    fn db_md_missing_scope_and_owner_each_report() {
4104        let fx = Fixture::new();
4105        fx.write("DB.md", "---\ntype: db-md\n---\n");
4106        let issues = fx.store_all();
4107        assert_eq!(
4108            count(&issues, codes::DB_MD_MISSING_FIELD),
4109            2,
4110            "both scope and owner absent → two issues: {issues:#?}"
4111        );
4112        let keys: BTreeSet<Option<String>> = issues
4113            .iter()
4114            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
4115            .map(|i| i.key.clone())
4116            .collect();
4117        assert_eq!(
4118            keys,
4119            BTreeSet::from([Some("scope".to_string()), Some("owner".to_string())]),
4120            "one issue keyed on each missing field"
4121        );
4122        for i in issues
4123            .iter()
4124            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
4125        {
4126            assert!(i.is_error());
4127            assert_eq!(i.line, Some(1), "absent field anchors to the block top");
4128        }
4129    }
4130
4131    /// A present-but-blank required field is still missing (`DB_MD_MISSING_FIELD`),
4132    /// anchored to its own line — guarding against an "is the key textually
4133    /// present?" shortcut that would miss `owner:` with an empty value.
4134    #[test]
4135    fn db_md_blank_required_field_is_missing() {
4136        let fx = Fixture::new();
4137        fx.write(
4138            "DB.md",
4139            "---\ntype: db-md\nscope: company\nowner: \"\"\n---\n",
4140        );
4141        let issues = fx.store_all();
4142        let i = find(&issues, codes::DB_MD_MISSING_FIELD);
4143        assert_eq!(i.key.as_deref(), Some("owner"));
4144        assert_eq!(
4145            i.line,
4146            Some(4),
4147            "a present-but-empty field anchors to its line"
4148        );
4149        assert!(
4150            count(&issues, codes::DB_MD_MISSING_FIELD) == 1,
4151            "scope is present and non-empty → only owner reported"
4152        );
4153    }
4154
4155    /// An unrecognized `##` section → `DB_MD_UNKNOWN_SECTION` (warning), anchored
4156    /// to the heading's file line; the three recognized sections stay silent.
4157    #[test]
4158    fn db_md_unknown_section_is_warning() {
4159        let fx = Fixture::new();
4160        fx.write(
4161            "DB.md",
4162            // line 1 `---`, 2 type, 3 scope, 4 owner, 5 `---`, 6 blank,
4163            // 7 `## Agent instructions`, 8 blank, 9 prose, 10 blank,
4164            // 11 `## Glossary`.
4165            "---\ntype: db-md\nscope: company\nowner: T\n---\n\n## Agent instructions\n\nbe good\n\n## Glossary\n\nterms\n",
4166        );
4167        let issues = fx.store_all();
4168        let i = find(&issues, codes::DB_MD_UNKNOWN_SECTION);
4169        assert!(!i.is_error(), "unknown section is a warning, not an error");
4170        assert_eq!(i.severity, Severity::Warning);
4171        assert_eq!(
4172            i.line,
4173            Some(11),
4174            "anchors to the `## Glossary` heading line"
4175        );
4176        assert!(
4177            i.message.contains("Glossary"),
4178            "the message names the offending section: {}",
4179            i.message
4180        );
4181        // The recognized `## Agent instructions` section did NOT fire.
4182        assert_eq!(
4183            count(&issues, codes::DB_MD_UNKNOWN_SECTION),
4184            1,
4185            "only the unrecognized section is flagged: {issues:#?}"
4186        );
4187    }
4188
4189    /// A DB.md with no frontmatter at all → `DB_MD_BAD_TYPE` plus both
4190    /// `DB_MD_MISSING_FIELD`s (no provable type, no provable fields).
4191    #[test]
4192    fn db_md_no_frontmatter_reports_type_and_both_fields() {
4193        let fx = Fixture::new();
4194        fx.write("DB.md", "# just a heading, no frontmatter\n");
4195        let issues = fx.store_all();
4196        assert!(has(&issues, codes::DB_MD_BAD_TYPE));
4197        assert_eq!(count(&issues, codes::DB_MD_MISSING_FIELD), 2);
4198    }
4199
4200    // ── frontmatter ─────────────────────────────────────────────────────────
4201
4202    #[test]
4203    fn missing_type_is_error() {
4204        let fx = Fixture::new();
4205        fx.write(
4206            "records/contacts/a.md",
4207            "---\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\n# A\n",
4208        );
4209        let issues = fx.store_all();
4210        assert!(has(&issues, codes::FM_MISSING_TYPE));
4211        assert!(find(&issues, codes::FM_MISSING_TYPE).is_error());
4212    }
4213
4214    #[test]
4215    fn missing_universal_timestamps_are_errors_on_content_files() {
4216        let fx = Fixture::new();
4217        fx.write(
4218            "records/contacts/a.md",
4219            "---\ntype: contact\nsummary: x\nname: A\n---\n\n# A\n",
4220        );
4221        let issues = fx.store_all();
4222
4223        let missing_created = find(&issues, codes::FM_MISSING_CREATED);
4224        assert_eq!(missing_created.key.as_deref(), Some("created"));
4225        assert!(missing_created.is_error());
4226
4227        let missing_updated = find(&issues, codes::FM_MISSING_UPDATED);
4228        assert_eq!(missing_updated.key.as_deref(), Some("updated"));
4229        assert!(missing_updated.is_error());
4230    }
4231
4232    #[test]
4233    fn meta_files_do_not_require_universal_timestamps() {
4234        let fx = Fixture::new();
4235        let issues = fx.store_all();
4236
4237        assert!(
4238            !has(&issues, codes::FM_MISSING_CREATED),
4239            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
4240        );
4241        assert!(
4242            !has(&issues, codes::FM_MISSING_UPDATED),
4243            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
4244        );
4245    }
4246
4247    #[test]
4248    fn content_file_with_no_frontmatter_block_reports_type_and_summary() {
4249        let fx = Fixture::new();
4250        fx.write(
4251            "records/profiles/a.md",
4252            "# Just a heading\n\nNo frontmatter here.\n",
4253        );
4254        let issues = fx.store_all();
4255        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
4256        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4257    }
4258
4259    #[test]
4260    fn content_file_with_empty_frontmatter_reports_type_and_summary() {
4261        let fx = Fixture::new();
4262        fx.write("records/profiles/a.md", "---\n---\n\nbody\n");
4263        let issues = fx.store_all();
4264        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
4265        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4266    }
4267
4268    #[test]
4269    fn malformed_yaml_is_error_and_suppresses_field_checks() {
4270        let fx = Fixture::new();
4271        // A tab inside a mapping value is invalid YAML.
4272        fx.write(
4273            "records/contacts/a.md",
4274            "---\ntype: contact\n  bad: : : :\n: : nope\n---\n\nbody\n",
4275        );
4276        let issues = fx.store_all();
4277        let issue = find(&issues, codes::FM_MALFORMED_YAML);
4278        assert!(issue.is_error());
4279        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4280        // When YAML doesn't parse we don't *also* claim the summary is missing;
4281        // the agent fixes the YAML first.
4282        assert!(
4283            !has(&issues, codes::SUMMARY_MISSING),
4284            "malformed YAML should suppress SUMMARY_MISSING: {issues:#?}"
4285        );
4286    }
4287
4288    #[test]
4289    fn bad_created_timestamp_is_error() {
4290        let fx = Fixture::new();
4291        fx.write(
4292            "records/contacts/a.md",
4293            "---\ntype: contact\ncreated: not-a-date\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
4294        );
4295        let issues = fx.store_all();
4296        let issue = find(&issues, codes::FM_BAD_TIMESTAMP);
4297        assert_eq!(issue.key.as_deref(), Some("created"));
4298        assert!(issue.is_error());
4299    }
4300
4301    #[test]
4302    fn date_only_created_is_rejected_but_type_date_field_accepted() {
4303        let fx = Fixture::new();
4304        // `created` must be a full RFC3339 datetime → a date-only value is bad.
4305        // `last_touch` is a type-specific date field → date-only is fine.
4306        fx.write(
4307            "records/contacts/a.md",
4308            "---\ntype: contact\ncreated: 2026-05-22\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\nlast_touch: 2026-05-22\n---\n\n# A\n",
4309        );
4310        let issues = fx.store_all();
4311        let created_issues: Vec<_> = issues
4312            .iter()
4313            .filter(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created"))
4314            .collect();
4315        assert_eq!(
4316            created_issues.len(),
4317            1,
4318            "date-only `created` must fail: {issues:#?}"
4319        );
4320        assert!(
4321            !issues.iter().any(
4322                |i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("last_touch")
4323            ),
4324            "date-only `last_touch` is valid: {issues:#?}"
4325        );
4326    }
4327
4328    // ── summary ─────────────────────────────────────────────────────────────
4329
4330    #[test]
4331    fn summary_missing_empty_multiline_toolong() {
4332        let fx = Fixture::new();
4333        fx.write(
4334            "records/profiles/missing.md",
4335            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\n---\n\nbody\n",
4336        );
4337        fx.write(
4338            "records/profiles/empty.md",
4339            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"   \"\n---\n\nbody\n",
4340        );
4341        let long = "x".repeat(201);
4342        fx.write(
4343            "records/profiles/long.md",
4344            &format!("---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{long}\"\n---\n\nbody\n"),
4345        );
4346        let issues = fx.store_all();
4347        assert!(has(&issues, codes::SUMMARY_MISSING));
4348        assert_eq!(
4349            find(&issues, codes::SUMMARY_MISSING).file,
4350            PathBuf::from("records/profiles/missing.md")
4351        );
4352        assert!(has(&issues, codes::SUMMARY_EMPTY));
4353        assert!(has(&issues, codes::SUMMARY_TOO_LONG));
4354        assert_eq!(
4355            find(&issues, codes::SUMMARY_TOO_LONG).severity,
4356            Severity::Warning
4357        );
4358    }
4359
4360    #[test]
4361    fn summary_multiline_via_yaml_block_scalar() {
4362        let fx = Fixture::new();
4363        // A literal block scalar produces a value with a newline.
4364        fx.write(
4365            "records/profiles/a.md",
4366            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: |\n  line one\n  line two\n---\n\nbody\n",
4367        );
4368        let issues = fx.store_all();
4369        assert!(has(&issues, codes::SUMMARY_MULTILINE), "{issues:#?}");
4370    }
4371
4372    #[test]
4373    fn summary_exactly_200_chars_is_ok() {
4374        let fx = Fixture::new();
4375        let s = "y".repeat(200);
4376        fx.write(
4377            "records/profiles/a.md",
4378            &format!("---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{s}\"\n---\n\nbody\n"),
4379        );
4380        let issues = fx.store_all();
4381        assert!(
4382            !has(&issues, codes::SUMMARY_TOO_LONG),
4383            "200 is the bound, inclusive: {issues:#?}"
4384        );
4385    }
4386
4387    #[test]
4388    fn meta_files_need_no_summary() {
4389        let fx = Fixture::new();
4390        // The root/layer/type indexes + log carry no summary and must not be
4391        // flagged. (A lone DB.md store with one contact and full indexes.)
4392        fx.write("records/contacts/a.md", &valid_contact("A contact"));
4393        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n# I\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4394        fx.write(
4395            "records/index.md",
4396            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4397        );
4398        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — A contact\n");
4399        fx.write(
4400            "records/contacts/index.jsonl",
4401            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"A contact\"}\n",
4402        );
4403        fx.write("log.md", "---\ntype: log\n---\n\n# Log\n");
4404        let issues = fx.store_all();
4405        assert!(!has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4406    }
4407
4408    // ── tags ────────────────────────────────────────────────────────────────
4409
4410    #[test]
4411    fn nested_tags_warns_flat_tags_ok() {
4412        let fx = Fixture::new();
4413        fx.write(
4414            "records/contacts/nested.md",
4415            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags:\n  - good\n  - [nested, list]\n---\n\n# A\n",
4416        );
4417        fx.write(
4418            "records/contacts/flat.md",
4419            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags: [customer, vip]\n---\n\n# A\n",
4420        );
4421        let issues = fx.store_all();
4422        let tag_issues: Vec<_> = issues
4423            .iter()
4424            .filter(|i| i.code == codes::TAGS_MALFORMED)
4425            .collect();
4426        assert_eq!(
4427            tag_issues.len(),
4428            1,
4429            "only the nested-tags file should warn: {issues:#?}"
4430        );
4431        assert_eq!(
4432            tag_issues[0].file,
4433            PathBuf::from("records/contacts/nested.md")
4434        );
4435        assert_eq!(tag_issues[0].severity, Severity::Warning);
4436    }
4437
4438    // ── wiki-links ────────────────────────────────────────────────────────────
4439
4440    #[test]
4441    fn short_form_wiki_link_is_error() {
4442        let fx = Fixture::new();
4443        let mut body = valid_contact("links to a short form");
4444        body.push_str("\nSee [[sarah-chen]] for details.\n");
4445        fx.write("records/contacts/a.md", &body);
4446        let issues = fx.store_all();
4447        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
4448        assert!(issue.is_error());
4449        assert!(issue.message.contains("sarah-chen"));
4450        // A short-form link must NOT also be reported broken — fix the form first.
4451        assert!(
4452            !issues
4453                .iter()
4454                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.message.contains("sarah-chen")),
4455            "short-form should suppress broken: {issues:#?}"
4456        );
4457    }
4458
4459    #[test]
4460    fn broken_full_path_wiki_link_is_error() {
4461        let fx = Fixture::new();
4462        let mut body = valid_contact("links to a missing file");
4463        body.push_str("\nSee [[records/contacts/ghost]].\n");
4464        fx.write("records/contacts/a.md", &body);
4465        let issues = fx.store_all();
4466        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
4467        assert!(issue.is_error());
4468        assert!(issue.message.contains("records/contacts/ghost"));
4469        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4470    }
4471
4472    #[test]
4473    fn traversal_full_path_wiki_link_is_rejected_before_probe() {
4474        let fx = Fixture::new();
4475        let mut body = valid_contact("links with traversal");
4476        body.push_str("\nSee [[records/contacts/../../ghost]].\n");
4477        fx.write("records/contacts/a.md", &body);
4478        let issues = fx.store_all();
4479        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
4480        assert!(issue.message.contains("not a safe store-relative path"));
4481        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4482    }
4483
4484    #[test]
4485    fn valid_full_path_wiki_link_passes() {
4486        let fx = Fixture::new();
4487        fx.write("records/contacts/target.md", &valid_contact("target"));
4488        let mut body = valid_contact("links to target");
4489        body.push_str("\nSee [[records/contacts/target]].\n");
4490        fx.write("records/contacts/a.md", &body);
4491        let issues = fx.store_all();
4492        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4493        assert!(!has(&issues, codes::WIKI_LINK_SHORT_FORM), "{issues:#?}");
4494    }
4495
4496    #[test]
4497    fn md_extension_wiki_link_warns_and_resolves() {
4498        let fx = Fixture::new();
4499        fx.write("records/contacts/target.md", &valid_contact("target"));
4500        let mut body = valid_contact("links with extension");
4501        body.push_str("\nSee [[records/contacts/target.md]].\n");
4502        fx.write("records/contacts/a.md", &body);
4503        let issues = fx.store_all();
4504        let issue = find(&issues, codes::WIKI_LINK_HAS_EXTENSION);
4505        assert_eq!(issue.severity, Severity::Warning);
4506        assert_eq!(
4507            issue.suggestion.as_deref(),
4508            Some("drop the extension: [[records/contacts/target]]")
4509        );
4510        // The target exists once `.md` is stripped → not broken.
4511        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4512    }
4513
4514    #[test]
4515    fn wiki_links_in_code_fences_are_ignored() {
4516        let fx = Fixture::new();
4517        let mut body = valid_contact("has a fenced example");
4518        body.push_str("\n```\n[[sarah-chen]]\n```\n");
4519        fx.write("records/contacts/a.md", &body);
4520        let issues = fx.store_all();
4521        assert!(
4522            !has(&issues, codes::WIKI_LINK_SHORT_FORM),
4523            "fenced wiki-links must be ignored: {issues:#?}"
4524        );
4525    }
4526
4527    #[test]
4528    fn flow_form_link_list_in_frontmatter_is_error() {
4529        let fx = Fixture::new();
4530        fx.write(
4531            "records/meetings/m.md",
4532            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees: [[[records/contacts/a]], [[records/contacts/b]]]\n---\n\n# M\n",
4533        );
4534        let issues = fx.store_all();
4535        let issue = find(&issues, codes::WIKI_LINK_FLOW_FORM_LIST);
4536        assert!(issue.is_error());
4537        assert_eq!(issue.key.as_deref(), Some("attendees"));
4538    }
4539
4540    #[test]
4541    fn block_form_link_list_in_frontmatter_is_not_flow_form() {
4542        let fx = Fixture::new();
4543        fx.write("records/contacts/a.md", &valid_contact("a"));
4544        fx.write("records/contacts/b.md", &valid_contact("b"));
4545        fx.write(
4546            "records/meetings/m.md",
4547            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees:\n  - [[records/contacts/a]]\n  - [[records/contacts/b]]\n---\n\n# M\n",
4548        );
4549        let issues = fx.store_all();
4550        assert!(
4551            !has(&issues, codes::WIKI_LINK_FLOW_FORM_LIST),
4552            "{issues:#?}"
4553        );
4554        // Block-form link targets are still integrity-checked (both exist here).
4555        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4556    }
4557
4558    #[test]
4559    fn frontmatter_short_form_link_field_is_error() {
4560        let fx = Fixture::new();
4561        // `related` is a *custom* (non-schema) wiki-link field, so it goes
4562        // through the generic doctrine path → a short form is WIKI_LINK_SHORT_FORM.
4563        fx.write(
4564            "records/synthesis/a.md",
4565            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: \"[[sarah-chen]]\"\n---\n\n# A\n",
4566        );
4567        let issues = fx.store_all();
4568        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
4569        assert!(issue.is_error());
4570        assert_eq!(issue.key.as_deref(), Some("related"));
4571    }
4572
4573    #[test]
4574    fn unquoted_frontmatter_link_is_recognized() {
4575        // An UNQUOTED `[[...]]` parses in YAML as a nested sequence, not a
4576        // string. The validator must still see it as a wiki-link (text-based
4577        // extraction). A short-form custom field must report SHORT_FORM, and a
4578        // full-path one with a missing target must report BROKEN.
4579        let fx = Fixture::new();
4580        fx.write(
4581            "records/synthesis/short.md",
4582            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[sarah-chen]]\n---\n\n# A\n",
4583        );
4584        fx.write(
4585            "records/synthesis/broken.md",
4586            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[records/contacts/ghost]]\n---\n\n# A\n",
4587        );
4588        let issues = fx.store_all();
4589        assert!(
4590            issues.iter().any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
4591                && i.file == Path::new("records/synthesis/short.md")
4592                && i.key.as_deref() == Some("related")),
4593            "unquoted short-form frontmatter link must be caught: {issues:#?}"
4594        );
4595        assert!(
4596            issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN
4597                && i.file == Path::new("records/synthesis/broken.md")),
4598            "unquoted full-path frontmatter link to a missing file must be caught: {issues:#?}"
4599        );
4600    }
4601
4602    #[test]
4603    fn short_form_in_declared_link_field_is_prefix_mismatch_not_double_reported() {
4604        // A short-form value in a *declared* link field (a `### contact` schema
4605        // with `company link to records/companies/`) is SCHEMA_LINK_PREFIX_MISMATCH
4606        // (the target isn't under the prefix), and must NOT also be reported as a
4607        // bare WIKI_LINK_SHORT_FORM — the schema path owns that field once.
4608        let mut fx = Fixture::new();
4609        fx.config.schemas.insert(
4610            "contact".into(),
4611            Schema {
4612                fields: vec![FieldSpec {
4613                    name: "company".into(),
4614                    link_prefix: Some(PathBuf::from("records/companies")),
4615                    ..Default::default()
4616                }],
4617                ..Default::default()
4618            },
4619        );
4620        fx.write(
4621            "records/contacts/a.md",
4622            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[northstar]]\"\n---\n\n# A\n",
4623        );
4624        let issues = fx.store_all();
4625        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
4626        assert_eq!(issue.key.as_deref(), Some("company"));
4627        // The same link must NOT also be double-reported via the generic path.
4628        assert!(
4629            !issues
4630                .iter()
4631                .any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
4632                    && i.key.as_deref() == Some("company")),
4633            "schema link fields are checked once, by the schema path: {issues:#?}"
4634        );
4635    }
4636
4637    #[test]
4638    fn schema_link_field_with_md_extension_still_warns() {
4639        let mut fx = Fixture::new();
4640        fx.config.schemas.insert(
4641            "contact".into(),
4642            Schema {
4643                fields: vec![FieldSpec {
4644                    name: "company".into(),
4645                    link_prefix: Some(PathBuf::from("records/companies")),
4646                    ..Default::default()
4647                }],
4648                ..Default::default()
4649            },
4650        );
4651        fx.write(
4652            "records/companies/acme.md",
4653            "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: Acme\nname: Acme\n---\n\n# Acme\n",
4654        );
4655        fx.write(
4656            "records/contacts/a.md",
4657            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[records/companies/acme.md]]\"\n---\n\n# A\n",
4658        );
4659        let issues = fx.store_all();
4660        let issue = issues
4661            .iter()
4662            .find(|i| {
4663                i.code == codes::WIKI_LINK_HAS_EXTENSION && i.key.as_deref() == Some("company")
4664            })
4665            .unwrap_or_else(|| panic!("schema link extension warning missing: {issues:#?}"));
4666        assert_eq!(issue.severity, Severity::Warning);
4667        assert!(
4668            !issues
4669                .iter()
4670                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.key.as_deref() == Some("company")),
4671            "extensionless existence check should still find acme.md: {issues:#?}"
4672        );
4673    }
4674
4675    // ── schema: explicit DB.md schema (required / shape / enum) ───────────────
4676
4677    #[test]
4678    fn explicit_schema_required_shape_enum() {
4679        let fx = {
4680            let mut fx = Fixture::new();
4681            // contact schema: name required, email required+email shape,
4682            // status enum: active|inactive
4683            let schema = Schema {
4684                fields: vec![
4685                    FieldSpec {
4686                        name: "name".into(),
4687                        required: true,
4688                        ..Default::default()
4689                    },
4690                    FieldSpec {
4691                        name: "email".into(),
4692                        required: true,
4693                        shape: Some(Shape::Email),
4694                        ..Default::default()
4695                    },
4696                    FieldSpec {
4697                        name: "status".into(),
4698                        enum_values: Some(vec!["active".into(), "inactive".into()]),
4699                        ..Default::default()
4700                    },
4701                ],
4702                ..Default::default()
4703            };
4704            fx.config.schemas.insert("contact".into(), schema);
4705            fx
4706        };
4707        fx.write(
4708            "records/contacts/a.md",
4709            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nemail: not-an-email\nstatus: archived\n---\n\n# A\n",
4710        );
4711        let issues = fx.store_all();
4712        // name absent → MISSING_REQUIRED
4713        assert!(
4714            issues
4715                .iter()
4716                .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
4717                    && i.key.as_deref() == Some("name")),
4718            "{issues:#?}"
4719        );
4720        // email malformed → SHAPE_MISMATCH
4721        assert!(
4722            issues.iter().any(
4723                |i| i.code == codes::SCHEMA_SHAPE_MISMATCH && i.key.as_deref() == Some("email")
4724            ),
4725            "{issues:#?}"
4726        );
4727        // status archived not in enum → ENUM_VIOLATION
4728        assert!(
4729            issues
4730                .iter()
4731                .any(|i| i.code == codes::SCHEMA_ENUM_VIOLATION
4732                    && i.key.as_deref() == Some("status")),
4733            "{issues:#?}"
4734        );
4735    }
4736
4737    #[test]
4738    fn schema_without_link_field_allows_plain_value() {
4739        // A `contact` schema with no `company` link field means a plain `company`
4740        // string is fine — schema enforcement is exactly what the store declares,
4741        // nothing implicit.
4742        let mut fx = Fixture::new();
4743        fx.config.schemas.insert(
4744            "contact".into(),
4745            Schema {
4746                fields: vec![FieldSpec {
4747                    name: "name".into(),
4748                    required: true,
4749                    ..Default::default()
4750                }],
4751                ..Default::default()
4752            },
4753        );
4754        fx.write(
4755            "records/contacts/a.md",
4756            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
4757        );
4758        let issues = fx.store_all();
4759        assert!(
4760            !has(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH),
4761            "no declared link field for `company` → a plain value is fine: {issues:#?}"
4762        );
4763    }
4764
4765    #[test]
4766    fn schema_link_field_plain_value_is_prefix_mismatch() {
4767        // The surviving link-enforcement path: a declared `link to <prefix>/`
4768        // field with a plain-string value is SCHEMA_LINK_PREFIX_MISMATCH.
4769        let mut fx = Fixture::new();
4770        fx.config.schemas.insert(
4771            "contact".into(),
4772            Schema {
4773                fields: vec![FieldSpec {
4774                    name: "company".into(),
4775                    link_prefix: Some(PathBuf::from("records/companies")),
4776                    ..Default::default()
4777                }],
4778                ..Default::default()
4779            },
4780        );
4781        fx.write(
4782            "records/contacts/a.md",
4783            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
4784        );
4785        let issues = fx.store_all();
4786        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
4787        assert_eq!(issue.key.as_deref(), Some("company"));
4788        assert!(issue
4789            .suggestion
4790            .as_deref()
4791            .unwrap()
4792            .contains("records/companies/"));
4793    }
4794
4795    #[test]
4796    fn schema_shape_int_and_url_and_currency() {
4797        let mut fx = Fixture::new();
4798        fx.config.schemas.insert(
4799            "widget".into(),
4800            Schema {
4801                fields: vec![
4802                    FieldSpec {
4803                        name: "qty".into(),
4804                        shape: Some(Shape::Int),
4805                        ..Default::default()
4806                    },
4807                    FieldSpec {
4808                        name: "site".into(),
4809                        shape: Some(Shape::Url),
4810                        ..Default::default()
4811                    },
4812                    FieldSpec {
4813                        name: "price".into(),
4814                        shape: Some(Shape::Currency),
4815                        ..Default::default()
4816                    },
4817                ],
4818                ..Default::default()
4819            },
4820        );
4821        // `USD 100` is the corpus-realistic shape (an `expense.currency`-style
4822        // ISO code + amount). It must pass — it used to spuriously fail.
4823        fx.write(
4824            "records/widgets/ok.md",
4825            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nqty: 5\nsite: https://example.com\nprice: \"USD 1,234.50\"\n---\n\n# ok\n",
4826        );
4827        // `free` is non-numeric; `inf`/`NaN`/3-decimal used to slip through
4828        // because the old impl leaned on `f64::parse`. `price: inf` here guards
4829        // the under-rejection half of the finding.
4830        fx.write(
4831            "records/widgets/bad.md",
4832            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nqty: five\nsite: ftp://nope\nprice: inf\n---\n\n# bad\n",
4833        );
4834        let issues = fx.store_all();
4835        let bad_shape: Vec<_> = issues
4836            .iter()
4837            .filter(|i| {
4838                i.code == codes::SCHEMA_SHAPE_MISMATCH
4839                    && i.file == Path::new("records/widgets/bad.md")
4840            })
4841            .map(|i| i.key.clone().unwrap_or_default())
4842            .collect();
4843        assert!(bad_shape.contains(&"qty".to_string()), "{issues:#?}");
4844        assert!(bad_shape.contains(&"site".to_string()), "{issues:#?}");
4845        assert!(
4846            bad_shape.contains(&"price".to_string()),
4847            "inf must be rejected as currency: {issues:#?}"
4848        );
4849        assert!(
4850            !issues.iter().any(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH
4851                && i.file == Path::new("records/widgets/ok.md")),
4852            "valid shapes (incl. `USD 1,234.50`) must not fire: {issues:#?}"
4853        );
4854    }
4855
4856    #[test]
4857    fn schema_shape_or_enum_field_with_non_scalar_value_is_shape_mismatch() {
4858        let mut fx = Fixture::new();
4859        fx.config.schemas.insert(
4860            "contact".into(),
4861            Schema {
4862                fields: vec![
4863                    FieldSpec {
4864                        name: "email".into(),
4865                        required: true,
4866                        shape: Some(Shape::Email),
4867                        ..Default::default()
4868                    },
4869                    FieldSpec {
4870                        name: "status".into(),
4871                        enum_values: Some(vec!["active".into(), "inactive".into()]),
4872                        ..Default::default()
4873                    },
4874                ],
4875                ..Default::default()
4876            },
4877        );
4878        // A required EMAIL field and an ENUM field, each holding a LIST. Both
4879        // used to slip through entirely (`scalar_string` → None → the shape and
4880        // enum bodies silently no-op); now they flag SCHEMA_SHAPE_MISMATCH.
4881        fx.write(
4882            "records/contacts/bad.md",
4883            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nemail:\n  - a@b.com\n  - c@d.com\nstatus:\n  - active\n---\n\n# bad\n",
4884        );
4885        let issues = fx.store_all();
4886        let mismatched: Vec<_> = issues
4887            .iter()
4888            .filter(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH)
4889            .map(|i| i.key.clone().unwrap_or_default())
4890            .collect();
4891        assert!(
4892            mismatched.contains(&"email".to_string()),
4893            "list-valued required email must flag: {issues:#?}"
4894        );
4895        assert!(
4896            mismatched.contains(&"status".to_string()),
4897            "list-valued enum must flag: {issues:#?}"
4898        );
4899    }
4900
4901    #[test]
4902    fn is_currency_accepts_codes_and_rejects_non_numeric() {
4903        // Symbols and 3-letter ISO codes both strip; plain numbers pass.
4904        for ok in [
4905            "100",
4906            "1234.56",
4907            "$1,234.50",
4908            "USD 100", // the finding's headline probe — used to be false
4909            "usd 100", // case-insensitive code
4910            "EUR 9.50",
4911            "£12",
4912            "¥1000",
4913            "-5.00", // signed amounts are real (refunds)
4914            "+5",
4915            "1,000,000",
4916        ] {
4917            assert!(is_currency(ok), "expected currency: {ok:?}");
4918        }
4919        // Non-numeric floats `f64::parse` would accept, and the > 2-decimal /
4920        // bare-code / exponent cases the docstring forbids.
4921        for bad in [
4922            "inf", "-inf", "infinity", "NaN", "nan",    // f64 accepts these; we must not
4923            "12.999", // 3 decimals
4924            "1.2345", // 4 decimals
4925            "USD",    // bare code, no amount
4926            "$",      // bare symbol
4927            "free", "", " ", "1e3",      // exponent form
4928            "1.",       // trailing dot, no fractional digits
4929            ".5",       // leading dot, no integer digits
4930            "1 000",    // space as separator is not a thousands separator
4931            "USDD 100", // 4-letter "code" must not strip
4932        ] {
4933            assert!(!is_currency(bad), "expected NOT currency: {bad:?}");
4934        }
4935    }
4936
4937    // ── policies ───────────────────────────────────────────────────────────
4938
4939    #[test]
4940    fn ignored_type_present_is_info() {
4941        let mut fx = Fixture::new();
4942        fx.config.ignored_types.push("temp".into());
4943        fx.write(
4944            "records/temps/x.md",
4945            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
4946        );
4947        let issues = fx.store_all();
4948        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_PRESENT);
4949        assert_eq!(issue.severity, Severity::Info);
4950        assert!(!issue.is_error());
4951        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4952    }
4953
4954    #[test]
4955    fn conclusion_record_derived_from_ignored_type_warns() {
4956        let mut fx = Fixture::new();
4957        fx.config.ignored_types.push("temp".into());
4958        fx.write(
4959            "records/temps/x.md",
4960            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
4961        );
4962        // The policy now gates on `meta-type: conclusion` (not the retired
4963        // `type: wiki-page`): a conclusion record that derives from an
4964        // ignored-type record warns.
4965        fx.write(
4966            "records/synthesis/t.md",
4967            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: derived\nderived_from: \"[[records/temps/x]]\"\n---\n\n# t\n",
4968        );
4969        let issues = fx.store_all();
4970        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_DERIVED);
4971        assert_eq!(issue.severity, Severity::Warning);
4972        assert_eq!(issue.key.as_deref(), Some("derived_from"));
4973        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4974    }
4975
4976    /// The shared `derived_from_ignored_type` entry point — the single
4977    /// policy-decision both `dbmd validate` (read) and `dbmd write` (write-time
4978    /// warning) now route through, so they cannot diverge. This pins its
4979    /// contract directly: the meta-type gate (now `meta-type: conclusion`, not
4980    /// the retired `type: wiki-page`), the empty-ignored-types gate, a positive
4981    /// match carrying the resolved target type, and a non-ignored target
4982    /// rejected.
4983    #[test]
4984    fn derived_from_ignored_type_is_the_shared_policy_decision() {
4985        let mut fx = Fixture::new();
4986        fx.config.ignored_types.push("secret".into());
4987        // An ignored-type record …
4988        fx.write(
4989            "records/secrets/s.md",
4990            "---\ntype: secret\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: hush\n---\n\n# s\n",
4991        );
4992        // … and a non-ignored record.
4993        fx.write(
4994            "records/contacts/c.md",
4995            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nname: C\n---\n\n# c\n",
4996        );
4997        let store = fx.store();
4998
4999        // Positive: a conclusion record deriving from the ignored-type record
5000        // matches, and the hit carries both the target (as written) and its
5001        // resolved type.
5002        let hit =
5003            derived_from_ignored_type(&store, "conclusion", std::iter::once("records/secrets/s"))
5004                .expect("conclusion → ignored-type record must match");
5005        assert_eq!(hit.target, "records/secrets/s");
5006        assert_eq!(hit.target_type, "secret");
5007
5008        // Meta-type gate: a non-`conclusion` meta-type never triggers, even with
5009        // the same ignored-type target.
5010        assert_eq!(
5011            derived_from_ignored_type(&store, "fact", std::iter::once("records/secrets/s")),
5012            None,
5013            "only conclusion derivation is policed"
5014        );
5015
5016        // Target gate: a conclusion deriving from a non-ignored record is fine.
5017        assert_eq!(
5018            derived_from_ignored_type(&store, "conclusion", std::iter::once("records/contacts/c")),
5019            None,
5020            "deriving from a non-ignored type is allowed"
5021        );
5022
5023        // First match wins across multiple targets (here the second is the hit).
5024        let hit = derived_from_ignored_type(
5025            &store,
5026            "conclusion",
5027            ["records/contacts/c", "records/secrets/s"],
5028        )
5029        .expect("a later ignored-type target must still be found");
5030        assert_eq!(hit.target, "records/secrets/s");
5031
5032        // Empty-policy gate: with no `### Ignored types`, nothing is policed.
5033        fx.config.ignored_types.clear();
5034        let store = fx.store();
5035        assert_eq!(
5036            derived_from_ignored_type(&store, "conclusion", std::iter::once("records/secrets/s")),
5037            None,
5038            "an empty ignored-types policy short-circuits"
5039        );
5040    }
5041
5042    // ── duplicates ───────────────────────────────────────────────────────────
5043
5044    #[test]
5045    fn dup_id_is_hard_error_with_related() {
5046        let fx = Fixture::new();
5047        fx.write(
5048            "records/contacts/a.md",
5049            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
5050        );
5051        fx.write(
5052            "records/contacts/b.md",
5053            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
5054        );
5055        let issues = fx.store_all();
5056        // Reporting rule #1: ONE issue per collision group, keyed on the
5057        // lexicographically smallest path (`a.md`), partner in `related`.
5058        assert_eq!(
5059            count(&issues, codes::DUP_ID),
5060            1,
5061            "one issue per group: {issues:#?}"
5062        );
5063        let a = issues.iter().find(|i| i.code == codes::DUP_ID).unwrap();
5064        assert_eq!(a.file, PathBuf::from("records/contacts/a.md"));
5065        assert!(a.is_error());
5066        assert_eq!(a.key.as_deref(), Some("id"));
5067        assert_eq!(
5068            a.line,
5069            Some(3),
5070            "anchors to the `id` line on the reported file"
5071        );
5072        assert_eq!(a.related, vec![PathBuf::from("records/contacts/b.md")]);
5073    }
5074
5075    #[test]
5076    fn dup_id_not_fired_in_working_set() {
5077        // DUP_* is an --all-only cross-file check; the working set must not run it.
5078        let fx = Fixture::new();
5079        fx.write(
5080            "records/contacts/a.md",
5081            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
5082        );
5083        fx.write(
5084            "records/contacts/b.md",
5085            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
5086        );
5087        // Log says both changed since epoch, so they're in the working set.
5088        fx.write(
5089            "log.md",
5090            "---\ntype: log\n---\n\n## [2026-05-22 10:00] create | records/contacts/a\nx\n\n## [2026-05-22 10:01] create | records/contacts/b\nx\n",
5091        );
5092        let issues = validate_working_set(&fx.store(), None).unwrap();
5093        assert!(
5094            !has(&issues, codes::DUP_ID),
5095            "DUP_ID is --all only: {issues:#?}"
5096        );
5097    }
5098
5099    #[test]
5100    fn dup_unique_key_single_field_is_warning() {
5101        let mut fx = Fixture::new();
5102        // contact declares `- unique: email`.
5103        fx.config.schemas.insert(
5104            "contact".into(),
5105            Schema {
5106                unique_keys: vec![vec!["email".into()]],
5107                ..Default::default()
5108            },
5109        );
5110        for (f, name) in [("a", "A"), ("b", "B")] {
5111            fx.write(
5112                &format!("records/contacts/{f}.md"),
5113                &format!("---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: s\nname: {name}\nemail: dup@x.com\n---\n\n# {name}\n"),
5114            );
5115        }
5116        let issues = fx.store_all();
5117        // One issue per group (rule #1), keyed on the smallest path, anchored to
5118        // the single `email` field.
5119        assert_eq!(count(&issues, codes::DUP_UNIQUE_KEY), 1);
5120        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
5121        assert_eq!(dup.severity, Severity::Warning);
5122        assert_eq!(dup.file, PathBuf::from("records/contacts/a.md"));
5123        assert_eq!(dup.key.as_deref(), Some("email"));
5124        assert_eq!(dup.related, vec![PathBuf::from("records/contacts/b.md")]);
5125    }
5126
5127    #[test]
5128    fn dup_unique_key_compound_and_clean_when_one_field_differs() {
5129        let mut fx = Fixture::new();
5130        // expense declares `- unique: date, amount, vendor` (a compound key).
5131        fx.config.schemas.insert(
5132            "expense".into(),
5133            Schema {
5134                unique_keys: vec![vec!["date".into(), "amount".into(), "vendor".into()]],
5135                ..Default::default()
5136            },
5137        );
5138        fx.write("records/companies/acme.md", "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: c\nname: Acme\n---\n# A\n");
5139        let exp = |f: &str, amount: &str| {
5140            format!(
5141            "---\ntype: expense\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: e\ndate: 2026-05-01\namount: {amount}\nvendor: \"[[records/companies/acme]]\"\n---\n\n# {f}\n"
5142        )
5143        };
5144        fx.write("records/expenses/e1.md", &exp("e1", "100"));
5145        fx.write("records/expenses/e2.md", &exp("e2", "100"));
5146        fx.write("records/expenses/e3.md", &exp("e3", "200")); // different amount
5147        let issues = fx.store_all();
5148        // One issue for the e1+e2 group (rule #1), keyed on the smallest path
5149        // (e1) with e2 in `related`; e3 differs on amount and never appears.
5150        assert_eq!(
5151            count(&issues, codes::DUP_UNIQUE_KEY),
5152            1,
5153            "only e1+e2 collide, one issue: {issues:#?}"
5154        );
5155        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
5156        assert_eq!(dup.file, PathBuf::from("records/expenses/e1.md"));
5157        assert_eq!(
5158            dup.line,
5159            Some(1),
5160            "compound-key collision anchors to line 1"
5161        );
5162        assert_eq!(dup.related, vec![PathBuf::from("records/expenses/e2.md")]);
5163        assert!(
5164            !issues.iter().any(|i| i.code == codes::DUP_UNIQUE_KEY
5165                && i.related.contains(&PathBuf::from("records/expenses/e3.md"))),
5166            "e3 differs on amount and must not collide: {issues:#?}"
5167        );
5168    }
5169
5170    #[test]
5171    fn dup_unique_key_list_field_is_order_independent() {
5172        let mut fx = Fixture::new();
5173        // meeting declares `- unique: date, attendees`; the list field is a set.
5174        fx.config.schemas.insert(
5175            "meeting".into(),
5176            Schema {
5177                unique_keys: vec![vec!["date".into(), "attendees".into()]],
5178                ..Default::default()
5179            },
5180        );
5181        fx.write("records/contacts/a.md", &valid_contact("a"));
5182        fx.write("records/contacts/b.md", &valid_contact("b"));
5183        let m = |f: &str, order: &str| {
5184            let attendees = if order == "ab" {
5185                "  - [[records/contacts/a]]\n  - [[records/contacts/b]]"
5186            } else {
5187                "  - [[records/contacts/b]]\n  - [[records/contacts/a]]"
5188            };
5189            format!(
5190                "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nattendees:\n{attendees}\n---\n\n# {f}\n"
5191            )
5192        };
5193        fx.write("records/meetings/m1.md", &m("m1", "ab"));
5194        fx.write("records/meetings/m2.md", &m("m2", "ba"));
5195        let issues = fx.store_all();
5196        // The attendee SET is order-independent, so m1 (ab) and m2 (ba) collide
5197        // → a single issue on the smaller path.
5198        assert_eq!(
5199            count(&issues, codes::DUP_UNIQUE_KEY),
5200            1,
5201            "same date + same attendee set (any order) collide as one issue: {issues:#?}"
5202        );
5203        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
5204        assert_eq!(dup.file, PathBuf::from("records/meetings/m1.md"));
5205        assert_eq!(dup.related, vec![PathBuf::from("records/meetings/m2.md")]);
5206    }
5207
5208    // ── indexes ───────────────────────────────────────────────────────────────
5209
5210    #[test]
5211    fn missing_indexes_at_all_three_levels() {
5212        let fx = Fixture::new();
5213        fx.write("records/contacts/a.md", &valid_contact("a"));
5214        let issues = fx.store_all();
5215        // root, layer (records), and type-folder (records/contacts) all missing.
5216        // The type-folder INDEX_MISSING is keyed on the FOLDER path (not its
5217        // would-be index.md), per the field convention `EXPECTED` pins.
5218        let missing_files: BTreeSet<PathBuf> = issues
5219            .iter()
5220            .filter(|i| i.code == codes::INDEX_MISSING)
5221            .map(|i| i.file.clone())
5222            .collect();
5223        assert!(
5224            missing_files.contains(&PathBuf::from("index.md")),
5225            "{issues:#?}"
5226        );
5227        assert!(
5228            missing_files.contains(&PathBuf::from("records/index.md")),
5229            "{issues:#?}"
5230        );
5231        assert!(
5232            missing_files.contains(&PathBuf::from("records/contacts")),
5233            "{issues:#?}"
5234        );
5235        // When the index.md is entirely absent we do NOT additionally fire
5236        // INDEX_JSONL_MISSING — one INDEX_MISSING covers the folder (rule #4).
5237        assert!(!has(&issues, codes::INDEX_JSONL_MISSING), "{issues:#?}");
5238    }
5239
5240    #[test]
5241    fn index_stale_entry_and_missing_entry() {
5242        let fx = Fixture::new();
5243        fx.write(
5244            "records/contacts/present.md",
5245            &valid_contact("present contact"),
5246        );
5247        // Indexes for the parents (root/layer) present so we isolate type-folder.
5248        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5249        fx.write(
5250            "records/index.md",
5251            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5252        );
5253        // Type-folder index lists a GHOST (stale) and omits `present` (missing).
5254        fx.write(
5255            "records/contacts/index.md",
5256            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/ghost]] — gone\n",
5257        );
5258        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/present.md\",\"type\":\"contact\",\"summary\":\"present contact\"}\n");
5259        let issues = fx.store_all();
5260        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
5261        assert!(stale.message.contains("ghost"));
5262        assert!(stale.is_error());
5263        let missing = find(&issues, codes::INDEX_MISSING_ENTRY);
5264        assert!(
5265            missing.message.contains("present.md"),
5266            "{}",
5267            missing.message
5268        );
5269    }
5270
5271    #[test]
5272    fn index_md_entry_with_traversal_path_is_stale_not_probe() {
5273        let fx = Fixture::new();
5274        fx.write("records/contacts/a.md", &valid_contact("a"));
5275        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5276        fx.write(
5277            "records/index.md",
5278            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5279        );
5280        fx.write(
5281            "records/contacts/index.md",
5282            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/../../ghost]] — unsafe\n",
5283        );
5284        fx.write(
5285            "records/contacts/index.jsonl",
5286            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5287        );
5288        let issues = fx.store_all();
5289        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
5290        assert!(stale.message.contains("not a safe store-relative path"));
5291    }
5292
5293    #[test]
5294    fn index_summary_mismatch() {
5295        let fx = Fixture::new();
5296        fx.write("records/contacts/a.md", &valid_contact("the real summary"));
5297        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5298        fx.write(
5299            "records/index.md",
5300            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5301        );
5302        fx.write(
5303            "records/contacts/index.md",
5304            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a STALE summary\n",
5305        );
5306        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"the real summary\"}\n");
5307        let issues = fx.store_all();
5308        let issue = find(&issues, codes::INDEX_SUMMARY_MISMATCH);
5309        assert!(issue.is_error());
5310        assert_eq!(issue.related, vec![PathBuf::from("records/contacts/a.md")]);
5311    }
5312
5313    #[test]
5314    fn index_summary_match_passes() {
5315        let fx = Fixture::new();
5316        fx.write("records/contacts/a.md", &valid_contact("matching summary"));
5317        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5318        fx.write(
5319            "records/index.md",
5320            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5321        );
5322        fx.write(
5323            "records/contacts/index.md",
5324            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — matching summary\n",
5325        );
5326        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"matching summary\"}\n");
5327        let issues = fx.store_all();
5328        assert!(!has(&issues, codes::INDEX_SUMMARY_MISMATCH), "{issues:#?}");
5329    }
5330
5331    #[test]
5332    fn index_entry_with_tag_suffix_matches_summary() {
5333        let fx = Fixture::new();
5334        fx.write("records/contacts/a.md", &valid_contact("clean summary"));
5335        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5336        fx.write(
5337            "records/index.md",
5338            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5339        );
5340        // Entry carries the renderer's `  ·  #tag` suffix (the EXACT double-spaced
5341        // delimiter `crate::index::format_md_entry` emits for a tagged file),
5342        // which must be stripped before comparing against the file's summary.
5343        fx.write(
5344            "records/contacts/index.md",
5345            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — clean summary  ·  #customer\n",
5346        );
5347        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"clean summary\"}\n");
5348        let issues = fx.store_all();
5349        assert!(
5350            !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
5351            "tag suffix should be stripped: {issues:#?}"
5352        );
5353    }
5354
5355    #[test]
5356    fn index_entry_single_spaced_middot_tail_is_part_of_summary() {
5357        // Regression (the finding): a tagless file whose `summary` legitimately
5358        // ends in a single-spaced ` · #word` tail round-trips through `index
5359        // rebuild` verbatim (the renderer appends NO `  ·  #tag` block, since the
5360        // file has no tags). The validator must NOT mistake that single-spaced
5361        // tail for the renderer's tag suffix, or it reports a spurious — and
5362        // unfixable — INDEX_SUMMARY_MISMATCH on a freshly rebuilt store.
5363        let fx = Fixture::new();
5364        fx.write(
5365            "records/contacts/a.md",
5366            &valid_contact("Standup notes · #standup"),
5367        );
5368        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5369        fx.write(
5370            "records/index.md",
5371            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5372        );
5373        fx.write(
5374            "records/contacts/index.md",
5375            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — Standup notes · #standup\n",
5376        );
5377        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"Standup notes · #standup\"}\n");
5378        let issues = fx.store_all();
5379        assert!(
5380            !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
5381            "a single-spaced middot tail is part of the summary, not a tag block: {issues:#?}"
5382        );
5383    }
5384
5385    #[test]
5386    fn index_jsonl_desync_missing_file_in_jsonl() {
5387        let fx = Fixture::new();
5388        fx.write("records/contacts/a.md", &valid_contact("a"));
5389        fx.write("records/contacts/b.md", &valid_contact("b"));
5390        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (2 files)\n");
5391        fx.write(
5392            "records/index.md",
5393            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5394        );
5395        fx.write(
5396            "records/contacts/index.md",
5397            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n- [[records/contacts/b]] — b\n",
5398        );
5399        // jsonl only lists `a` → `b` is a desync (the twin must be complete).
5400        fx.write(
5401            "records/contacts/index.jsonl",
5402            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5403        );
5404        let issues = fx.store_all();
5405        let desync = find(&issues, codes::INDEX_JSONL_DESYNC);
5406        assert!(desync.message.contains("b.md"), "{}", desync.message);
5407    }
5408
5409    #[test]
5410    fn index_jsonl_desync_record_points_at_missing_file() {
5411        let fx = Fixture::new();
5412        fx.write("records/contacts/a.md", &valid_contact("a"));
5413        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5414        fx.write(
5415            "records/index.md",
5416            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5417        );
5418        fx.write(
5419            "records/contacts/index.md",
5420            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
5421        );
5422        fx.write(
5423            "records/contacts/index.jsonl",
5424            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
5425        );
5426        let issues = fx.store_all();
5427        assert!(
5428            issues
5429                .iter()
5430                .any(|i| i.code == codes::INDEX_JSONL_DESYNC && i.message.contains("ghost.md")),
5431            "{issues:#?}"
5432        );
5433    }
5434
5435    #[test]
5436    fn index_jsonl_record_with_traversal_path_is_desync_not_probe() {
5437        let fx = Fixture::new();
5438        fx.write("records/contacts/a.md", &valid_contact("a"));
5439        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5440        fx.write(
5441            "records/index.md",
5442            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5443        );
5444        fx.write(
5445            "records/contacts/index.md",
5446            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
5447        );
5448        fx.write(
5449            "records/contacts/index.jsonl",
5450            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/../../ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
5451        );
5452        let issues = fx.store_all();
5453        assert!(
5454            issues.iter().any(|i| i.code == codes::INDEX_JSONL_DESYNC
5455                && i.message.contains("not a safe store-relative path")),
5456            "{issues:#?}"
5457        );
5458    }
5459
5460    #[test]
5461    fn index_jsonl_stale_summary() {
5462        let fx = Fixture::new();
5463        fx.write("records/contacts/a.md", &valid_contact("real summary"));
5464        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5465        fx.write(
5466            "records/index.md",
5467            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5468        );
5469        fx.write(
5470            "records/contacts/index.md",
5471            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — real summary\n",
5472        );
5473        // jsonl summary disagrees with the file frontmatter.
5474        fx.write(
5475            "records/contacts/index.jsonl",
5476            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"OUTDATED\"}\n",
5477        );
5478        let issues = fx.store_all();
5479        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5480        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
5481        assert!(stale.key.as_deref().unwrap().contains("summary"));
5482    }
5483
5484    /// The whole point of `INDEX_JSONL_STALE`: a sidecar field the query/search
5485    /// path actually reads (`email`, `domain`, the `(date,amount,vendor)` dedup
5486    /// tuple, `tags`, `updated`, `links`, `company` …) that disagrees with the
5487    /// `.md` is STALE — even when `summary` and `type` are perfectly correct.
5488    /// Pre-fix the validator only diffed summary+type, so a sidecar with a wrong
5489    /// `email` validated clean and answered `--where email=…` with a phantom
5490    /// value present in no file. This is the direct regression guard.
5491    #[test]
5492    fn index_jsonl_stale_queryable_field_email() {
5493        let fx = Fixture::new();
5494        let contact = "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"a contact\"\nname: A\nemail: real@correct.com\n---\n\n# A\n";
5495        fx.write("records/contacts/a.md", contact);
5496        // Start from the canonical, fully-correct sidecar set …
5497        fx.rebuild_indexes();
5498        let jsonl_path = fx.dir.path().join("records/contacts/index.jsonl");
5499        let good = fs::read_to_string(&jsonl_path).unwrap();
5500        // sanity: the canonical store is clean (no STALE on a fresh rebuild).
5501        assert!(
5502            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
5503            "freshly-rebuilt sidecar must not be stale"
5504        );
5505        // … then desync ONLY the email so it's the single differing field.
5506        assert!(
5507            good.contains("real@correct.com"),
5508            "sidecar projects email: {good}"
5509        );
5510        fx.write(
5511            "records/contacts/index.jsonl",
5512            &good.replace("real@correct.com", "STALE-WRONG@evil.com"),
5513        );
5514
5515        let issues = fx.store_all();
5516        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5517        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
5518        // The mismatch is reported precisely on `email`, and summary/type — which
5519        // still match — are NOT named.
5520        let key = stale.key.as_deref().unwrap();
5521        assert!(
5522            key.contains("email"),
5523            "expected `email` in stale key, got {key:?}"
5524        );
5525        assert!(!key.contains("summary"), "summary still matches: {key:?}");
5526        assert!(!key.contains("type"), "type still matches: {key:?}");
5527    }
5528
5529    /// Broaden the guard across the typed/list/timestamp projections at once:
5530    /// a wrong `tags`, `updated`, and a custom dedup field (`amount`) are each
5531    /// caught, with all three named in one issue.
5532    #[test]
5533    fn index_jsonl_stale_typed_and_list_fields() {
5534        let fx = Fixture::new();
5535        let expense = "---\ntype: expense\ncreated: 2026-05-20T08:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"office chairs\"\ntags: [furniture, q2]\namount: 1299\nvendor: Acme\ndate: 2026-05-20\n---\n\n# Expense\n";
5536        fx.write("records/expenses/e.md", expense);
5537        fx.rebuild_indexes();
5538        let jsonl_path = fx.dir.path().join("records/expenses/index.jsonl");
5539        let good = fs::read_to_string(&jsonl_path).unwrap();
5540        assert!(
5541            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
5542            "freshly-rebuilt sidecar must not be stale"
5543        );
5544        // Desync a list field (tags), a timestamp (updated), and a number (amount).
5545        let stale_line = good
5546            .replace("\"q2\"", "\"WRONG-TAG\"")
5547            .replace("2026-05-22T10:00:00-07:00", "2099-01-01T00:00:00-07:00")
5548            .replace("1299", "9999");
5549        fx.write("records/expenses/index.jsonl", &stale_line);
5550
5551        let issues = fx.store_all();
5552        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5553        let key = stale.key.as_deref().unwrap();
5554        for expected in ["amount", "tags", "updated"] {
5555            assert!(
5556                key.contains(expected),
5557                "expected `{expected}` in stale key, got {key:?}"
5558            );
5559        }
5560    }
5561
5562    #[test]
5563    fn index_orphan_in_noncanonical_folder() {
5564        let fx = Fixture::new();
5565        fx.write("records/contacts/a.md", &valid_contact("a"));
5566        // Build the canonical indexes so they aren't reported as orphans.
5567        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5568        fx.write(
5569            "records/index.md",
5570            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5571        );
5572        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
5573        fx.write(
5574            "records/contacts/index.jsonl",
5575            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5576        );
5577        // An index.md inside a sub-sub-folder (operator territory) is an orphan.
5578        fx.write(
5579            "records/contacts/subfolder/index.md",
5580            "---\ntype: index\nscope: type-folder\n---\n\n# stray\n",
5581        );
5582        let issues = fx.store_all();
5583        let orphan = find(&issues, codes::INDEX_ORPHAN);
5584        assert_eq!(orphan.severity, Severity::Warning);
5585        assert_eq!(
5586            orphan.file,
5587            PathBuf::from("records/contacts/subfolder/index.md")
5588        );
5589    }
5590
5591    #[test]
5592    fn index_wrong_scope() {
5593        let fx = Fixture::new();
5594        fx.write("records/contacts/a.md", &valid_contact("a"));
5595        // Root index declares the wrong scope.
5596        fx.write("index.md", "---\ntype: index\nscope: layer\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5597        fx.write(
5598            "records/index.md",
5599            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5600        );
5601        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
5602        fx.write(
5603            "records/contacts/index.jsonl",
5604            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5605        );
5606        let issues = fx.store_all();
5607        let issue = find(&issues, codes::INDEX_WRONG_SCOPE);
5608        assert_eq!(issue.severity, Severity::Warning);
5609        assert_eq!(issue.file, PathBuf::from("index.md"));
5610    }
5611
5612    #[test]
5613    fn capped_type_folder_index_does_not_flag_missing_entries() {
5614        // Over the 500-entry cap, omitted entries are expected, not an error.
5615        let fx = Fixture::new();
5616        for i in 0..501 {
5617            fx.write(
5618                &format!("records/contacts/c{i:04}.md"),
5619                &valid_contact(&format!("contact {i}")),
5620            );
5621        }
5622        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (501 files)\n");
5623        fx.write(
5624            "records/index.md",
5625            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5626        );
5627        // Type-folder index lists only ONE entry + a More footer.
5628        fx.write(
5629            "records/contacts/index.md",
5630            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/c0000]] — contact 0\n\n## More\n\nThis folder has 501 files.\n",
5631        );
5632        // jsonl must still be complete — write all 501 lines.
5633        let mut jsonl = String::new();
5634        for i in 0..501 {
5635            jsonl.push_str(&format!(
5636                "{{\"path\":\"records/contacts/c{i:04}.md\",\"type\":\"contact\",\"summary\":\"contact {i}\"}}\n"
5637            ));
5638        }
5639        fx.write("records/contacts/index.jsonl", &jsonl);
5640        let issues = fx.store_all();
5641        assert!(
5642            !has(&issues, codes::INDEX_MISSING_ENTRY),
5643            "over the cap, missing browse entries are expected: {issues:#?}"
5644        );
5645        // But the jsonl is complete → no desync.
5646        assert!(
5647            !has(&issues, codes::INDEX_JSONL_DESYNC),
5648            "{:#?}",
5649            issues
5650                .iter()
5651                .filter(|i| i.code == codes::INDEX_JSONL_DESYNC)
5652                .collect::<Vec<_>>()
5653        );
5654    }
5655
5656    // ── log ────────────────────────────────────────────────────────────────
5657
5658    #[test]
5659    fn log_bad_timestamp_unknown_kind_out_of_order() {
5660        let fx = Fixture::new();
5661        fx.write(
5662            "log.md",
5663            concat!(
5664                "---\ntype: log\n---\n\n# Log\n\n",
5665                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5666                "## [2026-05-27 09:00] update | records/contacts/b\nx\n\n", // out of order
5667                "## [2026-05-27 11:00] frobnicate | records/contacts/c\nx\n\n", // unknown kind
5668                "## [not-a-date] create | records/contacts/d\nx\n",         // bad timestamp
5669            ),
5670        );
5671        let issues = fx.store_all();
5672        assert!(has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
5673        assert_eq!(
5674            find(&issues, codes::LOG_OUT_OF_ORDER).severity,
5675            Severity::Warning
5676        );
5677        let unknown = find(&issues, codes::LOG_UNKNOWN_KIND);
5678        assert_eq!(unknown.severity, Severity::Warning);
5679        assert!(unknown.message.contains("frobnicate"));
5680        assert!(unknown
5681            .suggestion
5682            .as_deref()
5683            .is_some_and(|s| s.contains("create")));
5684        let bad = find(&issues, codes::LOG_BAD_TIMESTAMP);
5685        assert!(bad.is_error());
5686    }
5687
5688    #[test]
5689    fn log_validate_entry_without_object_is_well_formed() {
5690        let fx = Fixture::new();
5691        fx.write(
5692            "log.md",
5693            "---\ntype: log\n---\n\n## [2026-05-27 10:00] validate\nPASS\n",
5694        );
5695        let issues = fx.store_all();
5696        assert!(!has(&issues, codes::LOG_BAD_TIMESTAMP), "{issues:#?}");
5697        assert!(!has(&issues, codes::LOG_UNKNOWN_KIND), "{issues:#?}");
5698    }
5699
5700    #[test]
5701    fn log_in_order_is_clean() {
5702        let fx = Fixture::new();
5703        fx.write(
5704            "log.md",
5705            concat!(
5706                "---\ntype: log\n---\n\n",
5707                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5708                "## [2026-05-27 10:05] update | records/contacts/a\nx\n",
5709            ),
5710        );
5711        let issues = fx.store_all();
5712        assert!(!has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
5713    }
5714
5715    #[test]
5716    fn log_not_checked_in_working_set() {
5717        // log.md ordering is an --all-only check.
5718        let fx = Fixture::new();
5719        fx.write(
5720            "log.md",
5721            concat!(
5722                "---\ntype: log\n---\n\n",
5723                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5724                "## [2026-05-27 09:00] update | records/contacts/a\nx\n",
5725            ),
5726        );
5727        let issues = validate_working_set(&fx.store(), None).unwrap();
5728        assert!(
5729            !has(&issues, codes::LOG_OUT_OF_ORDER),
5730            "log ordering is --all only: {issues:#?}"
5731        );
5732    }
5733
5734    // ── working-set scoping ───────────────────────────────────────────────────
5735
5736    #[test]
5737    fn working_set_validates_only_changed_files() {
5738        let fx = Fixture::new();
5739        // `dirty` has a bad timestamp; `clean_but_unlogged` also does but is NOT
5740        // in the log → working set must skip it.
5741        fx.write(
5742            "records/contacts/dirty.md",
5743            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5744        );
5745        fx.write(
5746            "records/contacts/unlogged.md",
5747            "---\ntype: contact\ncreated: ALSO-BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5748        );
5749        fx.write(
5750            "log.md",
5751            "---\ntype: log\n---\n\n## [2026-05-22 10:00] update | records/contacts/dirty\nedited\n",
5752        );
5753        let issues = validate_working_set(&fx.store(), None).unwrap();
5754        assert!(
5755            issues.iter().any(|i| i.code == codes::FM_BAD_TIMESTAMP
5756                && i.file == Path::new("records/contacts/dirty.md")),
5757            "{issues:#?}"
5758        );
5759        assert!(
5760            !issues
5761                .iter()
5762                .any(|i| i.file == Path::new("records/contacts/unlogged.md")),
5763            "unlogged file must not be in the working set: {issues:#?}"
5764        );
5765    }
5766
5767    #[test]
5768    fn working_set_includes_incoming_linkers_to_changed_path() {
5769        let fx = Fixture::new();
5770        // `changed` was renamed/removed (logged). `linker` points at it with a
5771        // now-broken link and was NOT itself logged — but must be pulled in.
5772        fx.write(
5773            "records/profiles/linker.md",
5774            "---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: links to a removed page\n---\n\nSee [[records/contacts/changed]].\n",
5775        );
5776        // `changed.md` does NOT exist on disk (removed).
5777        fx.write(
5778            "log.md",
5779            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/changed\nremoved\n",
5780        );
5781        let issues = validate_working_set(&fx.store(), None).unwrap();
5782        assert!(
5783            issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN
5784                && i.file == Path::new("records/profiles/linker.md")),
5785            "incoming linker to a removed path must be validated: {issues:#?}"
5786        );
5787    }
5788
5789    #[test]
5790    fn working_set_respects_explicit_since_cutoff() {
5791        let fx = Fixture::new();
5792        fx.write(
5793            "records/contacts/old.md",
5794            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5795        );
5796        fx.write(
5797            "records/contacts/new.md",
5798            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5799        );
5800        fx.write(
5801            "log.md",
5802            concat!(
5803                "---\ntype: log\n---\n\n",
5804                "## [2026-05-20 10:00] update | records/contacts/old\nx\n\n",
5805                "## [2026-05-25 10:00] update | records/contacts/new\nx\n",
5806            ),
5807        );
5808        // Cutoff after `old` but before `new`.
5809        let since = DateTime::parse_from_rfc3339("2026-05-22T00:00:00+00:00").unwrap();
5810        let issues = validate_working_set(&fx.store(), Some(since)).unwrap();
5811        assert!(
5812            issues
5813                .iter()
5814                .any(|i| i.file == Path::new("records/contacts/new.md")),
5815            "{issues:#?}"
5816        );
5817        assert!(
5818            !issues
5819                .iter()
5820                .any(|i| i.file == Path::new("records/contacts/old.md")),
5821            "old change is before the cutoff: {issues:#?}"
5822        );
5823    }
5824
5825    #[test]
5826    fn working_set_default_since_is_last_validate_entry() {
5827        let fx = Fixture::new();
5828        // `before` changed before the last validate; `after` changed after.
5829        fx.write(
5830            "records/contacts/before.md",
5831            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5832        );
5833        fx.write(
5834            "records/contacts/after.md",
5835            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5836        );
5837        fx.write(
5838            "log.md",
5839            concat!(
5840                "---\ntype: log\n---\n\n",
5841                "## [2026-05-20 10:00] update | records/contacts/before\nx\n\n",
5842                "## [2026-05-21 10:00] validate\nPASS\n\n",
5843                "## [2026-05-22 10:00] update | records/contacts/after\nx\n",
5844            ),
5845        );
5846        let issues = validate_working_set(&fx.store(), None).unwrap();
5847        assert!(
5848            issues
5849                .iter()
5850                .any(|i| i.file == Path::new("records/contacts/after.md")),
5851            "{issues:#?}"
5852        );
5853        assert!(
5854            !issues
5855                .iter()
5856                .any(|i| i.file == Path::new("records/contacts/before.md")),
5857            "change before the last validate entry is outside the default window: {issues:#?}"
5858        );
5859    }
5860
5861    // ── ordering / determinism ────────────────────────────────────────────────
5862
5863    #[test]
5864    fn issues_are_sorted_by_file_then_line() {
5865        let fx = Fixture::new();
5866        fx.write("records/profiles/z.md", "---\ntype: profile\nmeta-type: conclusion\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
5867        fx.write("records/profiles/a.md", "---\ntype: profile\nmeta-type: conclusion\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
5868        let issues = fx.store_all();
5869        let files: Vec<&PathBuf> = issues.iter().map(|i| &i.file).collect();
5870        let mut sorted = files.clone();
5871        sorted.sort();
5872        assert_eq!(
5873            files, sorted,
5874            "issues must be emitted in a stable file order"
5875        );
5876    }
5877
5878    // ── boundaries: codes validate must NOT emit ──────────────────────────────
5879
5880    #[test]
5881    fn frozen_page_is_not_a_validate_error() {
5882        // POLICY_FROZEN_PAGE is a *write-time* refusal, never a validate finding.
5883        // A clean file listed in `### Frozen pages` must validate clean.
5884        let mut fx = Fixture::new();
5885        fx.config
5886            .frozen_pages
5887            .push(PathBuf::from("records/decisions/d.md"));
5888        fx.write(
5889            "records/decisions/d.md",
5890            "---\ntype: decision\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a finalized decision\n---\n\n# D\n",
5891        );
5892        let issues = fx.store_all();
5893        assert!(
5894            !has(&issues, codes::POLICY_FROZEN_PAGE),
5895            "frozen pages are enforced at write-time, not by validate: {issues:#?}"
5896        );
5897    }
5898
5899    #[test]
5900    fn wiki_link_ambiguous_is_never_emitted_under_full_path_doctrine() {
5901        // The full-path doctrine makes ambiguity impossible; the defensive code
5902        // must never fire on a normal store.
5903        let fx = Fixture::new();
5904        fx.write("records/contacts/sarah-chen.md", &valid_contact("sarah"));
5905        let mut body = valid_contact("links to sarah");
5906        body.push_str("\nSee [[records/contacts/sarah-chen]].\n");
5907        fx.write("records/contacts/p.md", &body);
5908        let issues = fx.store_all();
5909        assert!(!has(&issues, codes::WIKI_LINK_AMBIGUOUS), "{issues:#?}");
5910    }
5911
5912    // ── unknown-type / unknown-field passthrough ──────────────────────────────
5913
5914    #[test]
5915    fn unknown_type_passes_through() {
5916        // A custom type is ambient context: it has a `type`, so no
5917        // FM_MISSING_TYPE, and with no matching schema there are no schema
5918        // errors. Only the universal contract (summary, timestamps) applies.
5919        let fx = Fixture::new();
5920        fx.write(
5921            "records/proposals/x.md",
5922            "---\ntype: proposal\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a proposal\ncustom_field: anything\nbudget: 5000\n---\n\n# Proposal\n",
5923        );
5924        let issues = fx.store_all();
5925        assert!(!has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
5926        assert!(!has(&issues, codes::SCHEMA_MISSING_REQUIRED), "{issues:#?}");
5927        assert!(!has(&issues, codes::SCHEMA_SHAPE_MISMATCH), "{issues:#?}");
5928        // The unknown fields don't trip anything.
5929        assert!(
5930            !issues
5931                .iter()
5932                .any(|i| i.key.as_deref() == Some("custom_field")
5933                    || i.key.as_deref() == Some("budget")),
5934            "unknown fields are ambient context: {issues:#?}"
5935        );
5936    }
5937
5938    // ── find_links_to prefix-collision safety (working set) ───────────────────
5939
5940    #[test]
5941    fn incoming_linker_scan_does_not_prefix_match() {
5942        // A changed `records/contacts/sarah` must NOT pull in a file that only
5943        // links to `records/contacts/sarah-chen` (a longer path sharing a prefix).
5944        let fx = Fixture::new();
5945        fx.write(
5946            "records/profiles/only-sarah-chen.md",
5947            "---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
5948        );
5949        // The log says `records/contacts/sarah` (the shorter path) changed.
5950        fx.write(
5951            "log.md",
5952            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah\nremoved\n",
5953        );
5954        let issues = validate_working_set(&fx.store(), None).unwrap();
5955        assert!(
5956            !issues
5957                .iter()
5958                .any(|i| i.file == Path::new("records/profiles/only-sarah-chen.md")),
5959            "a prefix-sharing link must not pull a file into the working set: {issues:#?}"
5960        );
5961    }
5962
5963    #[test]
5964    fn working_set_does_not_flag_stale_catalog_index_as_wiki_link_broken() {
5965        // The working-set incoming-linker scan rides embedded-ripgrep
5966        // `Store::find_links_to`, which scans EVERY `.md` — so a type-folder
5967        // `index.md` listing a now-deleted target IS pulled into the working set.
5968        // But its entries are GENERATED catalog entries, not authored body links:
5969        // a dangling one is an `INDEX_STALE_ENTRY` ("run `dbmd index rebuild`"),
5970        // the job of `check_indexes` under `--all` — NOT a `WIKI_LINK_BROKEN`
5971        // ("create the target"), whose remedy would steer an agent to recreate
5972        // the very data it just deleted. The loop default must therefore NOT
5973        // body-link-check the derived catalog (index integrity is an O(store)
5974        // sweep concern, not an O(changed) loop concern). Adversarial review #11:
5975        // the prior behavior gave WIKI_LINK_BROKEN here while `--all` gave
5976        // INDEX_STALE_ENTRY for the identical condition — two codes, opposite
5977        // remedies, across the loop default vs the sweep.
5978        let fx = Fixture::new();
5979        // A catalog that still lists the deleted contact (a real, common stale
5980        // state after an out-of-band `delete`).
5981        fx.write(
5982            "records/contacts/index.md",
5983            "---\ntype: index\n---\n\n- [[records/contacts/sarah-chen]] — Sarah Chen\n",
5984        );
5985        // The log says `records/contacts/sarah-chen` was deleted.
5986        fx.write(
5987            "log.md",
5988            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n",
5989        );
5990        let issues = validate_working_set(&fx.store(), None).unwrap();
5991        assert!(
5992            !issues
5993                .iter()
5994                .any(|i| i.file == Path::new("records/contacts/index.md")
5995                    && i.code == codes::WIKI_LINK_BROKEN),
5996            "a stale catalog `index.md` entry must NOT be WIKI_LINK_BROKEN in the \
5997             working set (it is an INDEX_STALE_ENTRY under `--all`): {issues:#?}"
5998        );
5999    }
6000
6001    #[test]
6002    fn incoming_linker_scan_covers_the_whole_changed_set_in_one_pass() {
6003        // CONTRACT (the O(changed × store) fix): the working-set scan finds
6004        // incoming linkers for EVERY changed object, and does so via the single
6005        // batch pass `Store::find_links_to_any` — not one full store read per
6006        // changed object. This test pins the behavior that makes the single-pass
6007        // correct: with two DISTINCT deleted targets, the linker to EACH is pulled
6008        // into the working set and flagged. A regression that scanned for only the
6009        // first/last changed object, or that dropped the batch union, would leave
6010        // one of the two broken links unreported and fail here.
6011        let fx = Fixture::new();
6012        // Linker A → deleted target #1 (in the body).
6013        fx.write(
6014            "records/profiles/refers-sarah.md",
6015            "---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
6016        );
6017        // Linker B → deleted target #2 (in a typed frontmatter field — an edge the
6018        // sidecar `links` projection would miss, which is why this must be a
6019        // content scan, not a sidecar read).
6020        fx.write(
6021            "records/meetings/2026/05/kickoff.md",
6022            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\ncompany: \"[[records/companies/acme]]\"\n---\n\n# Kickoff\n",
6023        );
6024        // The log says BOTH targets were deleted in this window.
6025        fx.write(
6026            "log.md",
6027            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n\n## [2026-05-22 10:05] delete | records/companies/acme\nremoved\n",
6028        );
6029
6030        let issues = validate_working_set(&fx.store(), None).unwrap();
6031        assert!(
6032            issues
6033                .iter()
6034                .any(|i| i.file == Path::new("records/profiles/refers-sarah.md")
6035                    && i.code == codes::WIKI_LINK_BROKEN),
6036            "linker to the FIRST deleted target must be pulled in and flagged: {issues:#?}"
6037        );
6038        assert!(
6039            issues.iter().any(
6040                |i| i.file == Path::new("records/meetings/2026/05/kickoff.md")
6041                    && i.code == codes::WIKI_LINK_BROKEN
6042            ),
6043            "linker to the SECOND deleted target (typed-field edge) must also be \
6044             pulled in and flagged — proves the scan covers the whole changed set, \
6045             not just one object: {issues:#?}"
6046        );
6047    }
6048
6049    #[test]
6050    fn frontmatter_block_sequence_links_each_get_their_own_line() {
6051        // Each block-sequence wiki-link reports on its own source line.
6052        let fx = Fixture::new();
6053        // Neither target exists → two WIKI_LINK_BROKEN, on different lines.
6054        fx.write(
6055            "records/meetings/m.md",
6056            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nparticipants:\n  - [[records/contacts/ghost1]]\n  - [[records/contacts/ghost2]]\n---\n\n# M\n",
6057        );
6058        let issues = fx.store_all();
6059        let broken_lines: BTreeSet<Option<u32>> = issues
6060            .iter()
6061            .filter(|i| i.code == codes::WIKI_LINK_BROKEN)
6062            .map(|i| i.line)
6063            .collect();
6064        assert_eq!(
6065            broken_lines.len(),
6066            2,
6067            "two distinct broken-link lines: {issues:#?}"
6068        );
6069    }
6070
6071    // ── Regression: null / non-scalar created/updated ────────────────────────
6072
6073    #[test]
6074    fn null_created_is_missing_not_silently_passed() {
6075        // Regression: a present-but-`null` `created:` previously slipped past
6076        // both FM_MISSING_CREATED (only `!contains_key` was checked) and
6077        // FM_BAD_TIMESTAMP (`scalar_string(null)` is None → branch no-oped).
6078        let fx = Fixture::new();
6079        fx.write(
6080            "records/contacts/a.md",
6081            "---\ntype: contact\ncreated:\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
6082        );
6083        let issues = fx.store_all();
6084        assert!(
6085            has(&issues, codes::FM_MISSING_CREATED),
6086            "null `created:` must read as missing: {issues:#?}"
6087        );
6088    }
6089
6090    #[test]
6091    fn sequence_created_is_bad_timestamp() {
6092        // A non-scalar `created: [2026]` is not a timestamp string → FM_BAD_TIMESTAMP.
6093        let fx = Fixture::new();
6094        fx.write(
6095            "records/contacts/a.md",
6096            "---\ntype: contact\ncreated: [2026]\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
6097        );
6098        let issues = fx.store_all();
6099        assert!(
6100            issues
6101                .iter()
6102                .any(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created")),
6103            "a sequence `created:` must be FM_BAD_TIMESTAMP: {issues:#?}"
6104        );
6105    }
6106
6107    // ── Regression: schema required null / empty-collection ──────────────────
6108
6109    #[test]
6110    fn required_field_null_or_empty_collection_is_missing() {
6111        // Regression: a plain required field (no shape/enum) holding YAML null
6112        // (`name:`), an empty list (`name: []`), or an empty mapping (`name: {}`)
6113        // previously validated with 0 issues — `scalar_string` returned None and
6114        // `.unwrap_or(false)` treated the value as non-empty.
6115        for value in ["", " []", " {}"] {
6116            let mut fx = Fixture::new();
6117            fx.config.schemas.insert(
6118                "contact".into(),
6119                Schema {
6120                    fields: vec![FieldSpec {
6121                        name: "name".into(),
6122                        required: true,
6123                        ..Default::default()
6124                    }],
6125                    ..Default::default()
6126                },
6127            );
6128            fx.write(
6129                "records/contacts/a.md",
6130                &format!(
6131                    "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname:{value}\n---\n\n# A\n"
6132                ),
6133            );
6134            let issues = fx.store_all();
6135            assert!(
6136                issues
6137                    .iter()
6138                    .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
6139                        && i.key.as_deref() == Some("name")),
6140                "required `name:{value}` must be SCHEMA_MISSING_REQUIRED: {issues:#?}"
6141            );
6142        }
6143    }
6144
6145    // ── Regression: WIKI_LINK_BROKEN on raw source files ─────────────────────
6146
6147    #[test]
6148    fn wiki_link_to_raw_source_file_resolves() {
6149        // Regression: a body link to a raw `.eml`/`.pdf` source kept verbatim
6150        // under `sources/` was flagged WIKI_LINK_BROKEN because the existence
6151        // probe only ever stat'd `{bare}.md`. It must resolve the literal path.
6152        let fx = Fixture::new();
6153        fx.write("sources/emails/2026-05-22-elena.eml", "raw email bytes\n");
6154        fx.write(
6155            "records/contacts/a.md",
6156            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\nSee [[sources/emails/2026-05-22-elena.eml]] for context.\n",
6157        );
6158        let issues = fx.store_all();
6159        assert!(
6160            !issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN),
6161            "a link to an existing raw source file must not be broken: {issues:#?}"
6162        );
6163    }
6164
6165    // ── Regression: unreadable (non-UTF-8) content file ──────────────────────
6166
6167    #[test]
6168    fn non_utf8_content_file_is_reported() {
6169        // Regression: a content file with invalid UTF-8 bytes made
6170        // check_content_file return None silently, so the store passed with exit
6171        // 0. It must surface FM_UNREADABLE instead of passing vacuously.
6172        let fx = Fixture::new();
6173        let abs = fx.dir.path().join("records/notes/corrupt.md");
6174        fs::create_dir_all(abs.parent().unwrap()).unwrap();
6175        fs::write(&abs, [0xFF, 0xFE, 0x00, 0x01]).unwrap();
6176        let issues = validate_working_set(&fx.store(), None).unwrap();
6177        assert!(
6178            has(&issues, codes::FM_UNREADABLE),
6179            "an unreadable content file must be reported, not silently skipped: {issues:#?}"
6180        );
6181    }
6182
6183    // ── Regression: code-fence char/run tracking ─────────────────────────────
6184
6185    #[test]
6186    fn tilde_fence_containing_backtick_fence_does_not_invert() {
6187        // Regression: a `~~~` block legally contains ``` lines (documenting a
6188        // backtick fence); a naive toggle inverted `in_fence` and checked the
6189        // demo `[[fake]]` inside the code block as a live link. The link inside
6190        // BOTH fences must be skipped.
6191        let body = "~~~markdown\n```\n[[fake-link]]\n```\n~~~\n";
6192        let links = extract_wiki_links(body);
6193        assert!(
6194            links.is_empty(),
6195            "wiki-link inside a nested code fence must be skipped: {links:?}"
6196        );
6197    }
6198
6199    // ── Regression: --all skips in-layer `log/` folder ───────────────────────
6200
6201    #[test]
6202    fn all_sweep_visits_in_layer_log_folder() {
6203        // Regression: `validate --all` pruned every dir named `log`, so a real
6204        // content folder like `records/log/` was invisible to the full sweep —
6205        // reporting FEWER errors than the default scope. A frontmatter-less file
6206        // there must still surface FM_MISSING_TYPE under --all.
6207        let fx = Fixture::new();
6208        fx.write("records/log/2026-06-01-pricing.md", "no frontmatter here\n");
6209        let issues = fx.store_all();
6210        assert!(
6211            has(&issues, codes::FM_MISSING_TYPE),
6212            "--all must validate files under an in-layer `log/` folder: {issues:#?}"
6213        );
6214    }
6215
6216    // ── Regression: flow-form list with whitespace ───────────────────────────
6217
6218    #[test]
6219    fn flow_form_link_list_with_spaces_is_flagged() {
6220        // Regression: `attendees: [ [[a]] ]` parses to the same nested-sequence
6221        // mis-encoding as `[[[a]]]` but evaded the literal `starts_with("[[[")`
6222        // text test. The value-based detector must catch the whitespace variant.
6223        let keys = detect_flow_form_link_lists("attendees: [ [[records/contacts/elena]] ]\n");
6224        assert!(
6225            keys.iter().any(|k| k == "attendees"),
6226            "spaced flow-form list must be detected: {keys:?}"
6227        );
6228    }
6229
6230    // ── Regression: INDEX_SUMMARY_MISMATCH middot tail ───────────────────────
6231
6232    #[test]
6233    fn middot_hashtag_summary_tail_round_trips() {
6234        // Regression: a tagless summary that legitimately ends in a single-spaced
6235        // ` · #word` tail round-trips through the renderer verbatim, but the loose
6236        // ` · ` strip mistook it for the tag block and reported a spurious,
6237        // unfixable INDEX_SUMMARY_MISMATCH. The strip must use the renderer's
6238        // exact double-spaced `  ·  ` delimiter.
6239        assert_eq!(
6240            extract_index_entry_summary("— Standup notes · #standup").as_deref(),
6241            Some("Standup notes · #standup"),
6242            "a single-spaced middot tail is part of the summary, not a tag block"
6243        );
6244        // The renderer's real double-spaced tag suffix IS still stripped.
6245        assert_eq!(
6246            extract_index_entry_summary("— Renewal champion  ·  #renewal #acme").as_deref(),
6247            Some("Renewal champion"),
6248            "the renderer's double-spaced `  ·  #tag` suffix is stripped"
6249        );
6250    }
6251
6252    // ── Regression: shape Url / Email edge cases ─────────────────────────────
6253
6254    #[test]
6255    fn url_shape_accepts_short_http_and_rejects_bare_scheme() {
6256        assert!(is_url("http://x"), "an 8-char http URL is valid");
6257        assert!(is_url("https://x"), "a 9-char https URL is valid");
6258        assert!(!is_url("http://"), "a bare scheme with no host is rejected");
6259        assert!(!is_url("https://"), "a bare https scheme is rejected");
6260    }
6261
6262    #[test]
6263    fn email_shape_rejects_double_at() {
6264        assert!(!is_email("sarah@@acme.com"), "double-@ domain is rejected");
6265        assert!(!is_email("a@b@c.com"), "two @ signs are rejected");
6266        assert!(is_email("sarah@acme.com"), "a normal address still passes");
6267    }
6268
6269    // ── Regression: working-set vs --all agree on log.md links ───────────────
6270
6271    #[test]
6272    fn working_set_does_not_flag_log_md_body_links() {
6273        // Regression: the working-set incoming-linker scan runs root `log.md`
6274        // through the body wiki-link check, flagging a historical `[[deleted]]`
6275        // mention as WIKI_LINK_BROKEN — an error `--all` never reports and that
6276        // the append-only log can't have "fixed". The root meta files must be
6277        // excluded from the body link check, matching --all.
6278        let fx = Fixture::new();
6279        fx.write("records/contacts/a.md", &valid_contact("A"));
6280        fx.write(
6281            "log.md",
6282            "---\ntype: log\n---\n\n## [2026-06-01 10:00] delete | records/contacts/ghost\n\nRemoved [[records/contacts/ghost]] per cleanup.\n",
6283        );
6284        let issues = validate_working_set(&fx.store(), None).unwrap();
6285        assert!(
6286            !issues
6287                .iter()
6288                .any(|i| i.code == codes::WIKI_LINK_BROKEN
6289                    && i.file == std::path::Path::new("log.md")),
6290            "a broken wiki-link inside append-only log.md must not be flagged: {issues:#?}"
6291        );
6292    }
6293
6294    // ── Regression: DB.md schema field lint ──────────────────────────────────
6295
6296    #[test]
6297    fn schema_duplicate_field_name_is_flagged() {
6298        let mut fx = Fixture::new();
6299        fx.config.schemas.insert(
6300            "contact".into(),
6301            Schema {
6302                fields: vec![
6303                    FieldSpec {
6304                        name: "name".into(),
6305                        required: true,
6306                        ..Default::default()
6307                    },
6308                    FieldSpec {
6309                        name: "name".into(),
6310                        ..Default::default()
6311                    },
6312                ],
6313                ..Default::default()
6314            },
6315        );
6316        let issues = fx.store_all();
6317        assert!(
6318            issues
6319                .iter()
6320                .any(|i| i.code == codes::DB_MD_SCHEMA_FIELD && i.key.as_deref() == Some("name")),
6321            "a duplicate schema field name must be flagged: {issues:#?}"
6322        );
6323    }
6324
6325    #[test]
6326    fn schema_unknown_modifier_is_info() {
6327        let mut fx = Fixture::new();
6328        fx.config.schemas.insert(
6329            "contact".into(),
6330            Schema {
6331                fields: vec![FieldSpec {
6332                    name: "name".into(),
6333                    unknown_modifiers: vec!["requierd".into()],
6334                    ..Default::default()
6335                }],
6336                ..Default::default()
6337            },
6338        );
6339        let issues = fx.store_all();
6340        assert!(
6341            issues.iter().any(|i| i.code == codes::DB_MD_SCHEMA_FIELD
6342                && i.severity == Severity::Info
6343                && i.key.as_deref() == Some("name")),
6344            "an unrecognized schema modifier must surface as Info: {issues:#?}"
6345        );
6346    }
6347
6348    /// Every code in `mod codes` must appear as a row in SPEC.md § Validation —
6349    /// the SPEC table is the declared "complete vocabulary" an agent branches on,
6350    /// and the module doc-comment promises this code implements "exactly those
6351    /// codes — no more, no fewer." This guards against the code/SPEC drift where a
6352    /// new validation code is added to the engine but never documented.
6353    #[test]
6354    fn every_code_constant_is_documented_in_spec() {
6355        // Parse the canonical constant *values* straight out of this module's
6356        // source, so a future `pub const X: &str = "X";` is covered with no test
6357        // edit. Format is uniform: `    pub const NAME: &str = "VALUE";`.
6358        let this_src = include_str!("validate.rs");
6359        let mut codes_in_module: Vec<String> = Vec::new();
6360        let mut in_codes_mod = false;
6361        for line in this_src.lines() {
6362            let t = line.trim();
6363            if t.starts_with("pub mod codes") {
6364                in_codes_mod = true;
6365                continue;
6366            }
6367            // The `mod codes` block ends at its closing brace at column 0.
6368            if in_codes_mod && line == "}" {
6369                break;
6370            }
6371            if in_codes_mod {
6372                if let Some(rest) = t.strip_prefix("pub const ") {
6373                    // rest = `NAME: &str = "VALUE";`
6374                    let value = rest
6375                        .split_once('=')
6376                        .map(|(_, v)| v.trim())
6377                        .and_then(|v| v.strip_prefix('"'))
6378                        .and_then(|v| v.strip_suffix("\";"))
6379                        .unwrap_or_else(|| panic!("unparseable code constant line: {line:?}"));
6380                    codes_in_module.push(value.to_string());
6381                }
6382            }
6383        }
6384        assert!(
6385            codes_in_module.len() >= 36,
6386            "parsed only {} code constants from `mod codes`; the parser likely \
6387             broke against a source-format change",
6388            codes_in_module.len()
6389        );
6390
6391        // SPEC.md lives at the repo root, two levels up from this crate's manifest.
6392        let spec_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../../SPEC.md");
6393        let spec = fs::read_to_string(&spec_path)
6394            .unwrap_or_else(|e| panic!("cannot read {}: {e}", spec_path.display()));
6395
6396        // Each code must appear as a SPEC § Validation table cell: `` | `CODE` | ``.
6397        let missing: Vec<&String> = codes_in_module
6398            .iter()
6399            .filter(|code| !spec.contains(&format!("| `{code}` |")))
6400            .collect();
6401        assert!(
6402            missing.is_empty(),
6403            "validation codes emitted by the engine but absent from SPEC.md \
6404             § Validation (the declared complete vocabulary): {missing:?}"
6405        );
6406    }
6407
6408    // ── loose files (directly at a layer root, no type-folder) ───────────────
6409
6410    const LOOSE_ALICE: &str = "---\ntype: contact\nid: alice\ncreated: 2026-06-01T08:00:00-07:00\nupdated: 2026-06-01T08:00:00-07:00\nsummary: Alice\n---\nbody\n";
6411    const LOOSE_BOB: &str = "---\ntype: contact\nid: bob\ncreated: 2026-06-01T08:00:00-07:00\nupdated: 2026-06-01T08:00:00-07:00\nsummary: Bob loose\n---\nbody\n";
6412
6413    #[test]
6414    fn loose_file_catalogued_in_layer_jsonl_validates_clean() {
6415        let fx = Fixture::new();
6416        fx.write("records/contacts/alice.md", LOOSE_ALICE);
6417        fx.write("records/bob.md", LOOSE_BOB); // loose, directly under records/
6418        fx.rebuild_indexes();
6419        let issues = fx.store_all();
6420        assert!(
6421            issues.is_empty(),
6422            "a rebuilt store with a catalogued loose file must validate clean, got: {issues:?}"
6423        );
6424    }
6425
6426    #[test]
6427    fn loose_file_with_missing_layer_jsonl_is_index_jsonl_missing() {
6428        let fx = Fixture::new();
6429        fx.write("records/contacts/alice.md", LOOSE_ALICE);
6430        fx.write("records/bob.md", LOOSE_BOB);
6431        fx.rebuild_indexes();
6432        // Simulate the layer sidecar going missing (a hand-deletion / bad sync).
6433        fs::remove_file(fx.dir.path().join("records/index.jsonl")).unwrap();
6434        let issues = fx.store_all();
6435        assert!(
6436            has(&issues, codes::INDEX_JSONL_MISSING),
6437            "a loose file with no layer index.jsonl must raise INDEX_JSONL_MISSING, got: {issues:?}"
6438        );
6439    }
6440
6441    #[test]
6442    fn loose_only_store_validates_clean_without_a_rollup_index_md() {
6443        // A store whose ONLY content is a loose file (no type-folder anywhere):
6444        // rebuild writes the layer `index.jsonl` but no root/layer `index.md`
6445        // rollup — there is nothing to roll up. `validate --all` must accept that;
6446        // the rollup is required only when type-folders exist. (Regression: this
6447        // emitted two false INDEX_MISSING errors in 0.4.4.)
6448        let fx = Fixture::new();
6449        fx.write("records/solo.md", LOOSE_ALICE);
6450        fx.rebuild_indexes();
6451        assert!(
6452            !fx.dir.path().join("index.md").is_file(),
6453            "no root rollup index.md should exist for a loose-only store"
6454        );
6455        let issues = fx.store_all();
6456        assert!(
6457            issues.is_empty(),
6458            "a loose-only store must validate clean (catalog is the layer index.jsonl), got: {issues:?}"
6459        );
6460    }
6461}