Skip to main content

dbmd_core/
validate.rs

1//! `validate` — the validation engine.
2//!
3//! The canonical issue-code vocabulary is **SPEC.md § Validation** (that table
4//! is the single source of truth). This module implements exactly those codes
5//! — no more, no fewer. If a code is added here it must be added to the SPEC
6//! table in the same change. The codes are exposed as the [`codes`] constants
7//! so call sites never spell a code as a bare string literal.
8//!
9//! **Two scopes.** [`validate_working_set`] is the loop default: content files
10//! changed since `since`, plus any file whose wiki-links target a changed path.
11//! The changed set and the per-file checks are O(changed); the incoming linkers
12//! are found by a *single* embedded-ripgrep pass over the store for the whole
13//! changed set at once ([`Store::find_links_to_any`], one scan — not a full read
14//! per changed object, and not the parse-the-tree walk `--all` does). On this
15//! changed-set path it never builds the global cross-file state.
16//!
17//! The **one** exception is the vacuous-pass guard: when the change log records
18//! no objects since the cutoff and no explicit `--since` was given (a fresh
19//! store, a missing/empty `log.md`, or external edits never logged), the default
20//! call falls back to a single per-file content sweep ([`Store::walk`]) so an
21//! externally edited or freshly copied store cannot pass validation vacuously.
22//! That fallback is O(store) by design; the O(changed) guarantee is about the
23//! normal post-write path, not this safety net.
24//!
25//! [`validate_all`] is the full SWEEP: it adds the checks that need the global
26//! cross-file state — entity-dedup `DUP_*`, every-index sync, and `log.md`
27//! ordering.
28//!
29//! ## Why this module is self-contained
30//!
31//! Validation does its own frontmatter split, YAML parse, wiki-link scan,
32//! log-header parse, and file walk here, reading only the two public,
33//! caller-populated fields of a [`Store`]: [`Store::root`] and
34//! [`Store::config`] — rather than routing through the sibling modules
35//! ([`crate::parser`], [`crate::store`], [`crate::log`], [`crate::index`]).
36//! Keeping the checks local lets the validator report precise, per-issue
37//! diagnostics (exact codes, file, and context) without coupling its output to
38//! incidental behavior of the shared readers; the public surface and the
39//! emitted issue vocabulary are the contract.
40
41use std::collections::{BTreeMap, BTreeSet, HashMap};
42use std::path::{Component, Path, PathBuf};
43
44use chrono::{DateTime, FixedOffset, NaiveDateTime};
45use serde_norway::Value;
46
47use crate::parser::{Schema, Shape};
48use crate::store::Store;
49
50/// Severity of a validation [`Issue`]. Any [`Severity::Error`] fails validation
51/// (non-zero exit); warnings and info do not.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum Severity {
54    /// Blocks: a hard violation of the format or doctrine.
55    Error,
56    /// A decision point the agent resolves at its discretion.
57    Warning,
58    /// Visibility only; never affects exit status.
59    Info,
60}
61
62/// A single structured validation finding. Agent-primary and machine-parseable
63/// via `--json`; `suggestion` is a deterministic remediation hint the agent
64/// applies without guessing.
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub struct Issue {
67    /// The severity; only [`Severity::Error`] fails validation.
68    pub severity: Severity,
69    /// The structured code, e.g. `"WIKI_LINK_SHORT_FORM"` — one of [`codes`].
70    pub code: &'static str,
71    /// The file the issue is about.
72    pub file: PathBuf,
73    /// The 1-based line, when applicable.
74    pub line: Option<u32>,
75    /// The frontmatter key, when the issue is about a specific field.
76    pub key: Option<String>,
77    /// A human-readable message.
78    pub message: String,
79    /// A deterministic remediation hint, when one exists.
80    pub suggestion: Option<String>,
81    /// Other files involved (e.g. the duplicate partner in a collision).
82    pub related: Vec<PathBuf>,
83}
84
85impl Issue {
86    /// True if this issue fails validation (i.e. its severity is
87    /// [`Severity::Error`]).
88    pub fn is_error(&self) -> bool {
89        matches!(self.severity, Severity::Error)
90    }
91}
92
93/// The canonical validation issue codes — one constant per row of the SPEC.md
94/// § Validation table. Call sites reference these instead of bare strings so
95/// the code and the SPEC table can never silently drift.
96pub mod codes {
97    /// path has no `DB.md`; not a db.md store.
98    pub const NOT_A_STORE: &str = "NOT_A_STORE";
99    /// the store's `DB.md` is not `type: db-md`.
100    pub const DB_MD_BAD_TYPE: &str = "DB_MD_BAD_TYPE";
101    /// the store's `DB.md` frontmatter lacks `scope` or `owner`.
102    pub const DB_MD_MISSING_FIELD: &str = "DB_MD_MISSING_FIELD";
103    /// `DB.md` has an `##` section other than the three recognized ones.
104    pub const DB_MD_UNKNOWN_SECTION: &str = "DB_MD_UNKNOWN_SECTION";
105    /// a `DB.md ## Schemas` field declaration is malformed (empty or duplicate
106    /// field name) or carries an unrecognized modifier.
107    pub const DB_MD_SCHEMA_FIELD: &str = "DB_MD_SCHEMA_FIELD";
108    /// content file has no `type:`.
109    pub const FM_MISSING_TYPE: &str = "FM_MISSING_TYPE";
110    /// content file has no `created:`.
111    pub const FM_MISSING_CREATED: &str = "FM_MISSING_CREATED";
112    /// content file has no `updated:`.
113    pub const FM_MISSING_UPDATED: &str = "FM_MISSING_UPDATED";
114    /// content file can't be read (not valid UTF-8, or an I/O error).
115    pub const FM_UNREADABLE: &str = "FM_UNREADABLE";
116    /// frontmatter block isn't valid YAML.
117    pub const FM_MALFORMED_YAML: &str = "FM_MALFORMED_YAML";
118    /// `created` or `updated` isn't ISO-8601.
119    pub const FM_BAD_TIMESTAMP: &str = "FM_BAD_TIMESTAMP";
120    /// `meta-type` is present but not one of fact / operational / conclusion.
121    pub const FM_BAD_META_TYPE: &str = "FM_BAD_META_TYPE";
122    /// content file has no `summary`.
123    pub const SUMMARY_MISSING: &str = "SUMMARY_MISSING";
124    /// `summary` present but empty.
125    pub const SUMMARY_EMPTY: &str = "SUMMARY_EMPTY";
126    /// `summary` contains newlines.
127    pub const SUMMARY_MULTILINE: &str = "SUMMARY_MULTILINE";
128    /// `summary` > 200 chars.
129    pub const SUMMARY_TOO_LONG: &str = "SUMMARY_TOO_LONG";
130    /// wiki-link target isn't a full store-relative path.
131    pub const WIKI_LINK_SHORT_FORM: &str = "WIKI_LINK_SHORT_FORM";
132    /// wiki-link target file doesn't exist.
133    pub const WIKI_LINK_BROKEN: &str = "WIKI_LINK_BROKEN";
134    /// wiki-link target matches multiple files (defensive).
135    pub const WIKI_LINK_AMBIGUOUS: &str = "WIKI_LINK_AMBIGUOUS";
136    /// wiki-link target carries a `.md` extension — drop it.
137    pub const WIKI_LINK_HAS_EXTENSION: &str = "WIKI_LINK_HAS_EXTENSION";
138    /// frontmatter list uses inline `[[[a]], [[b]]]` — use block form.
139    pub const WIKI_LINK_FLOW_FORM_LIST: &str = "WIKI_LINK_FLOW_FORM_LIST";
140    /// two files declare the same explicit `id`.
141    pub const DUP_ID: &str = "DUP_ID";
142    /// two records of a type collide on a `DB.md ## Schemas` `unique:` key.
143    pub const DUP_UNIQUE_KEY: &str = "DUP_UNIQUE_KEY";
144    /// a `DB.md` schema requires a field that's absent.
145    pub const SCHEMA_MISSING_REQUIRED: &str = "SCHEMA_MISSING_REQUIRED";
146    /// a value doesn't match the schema's shape modifier.
147    pub const SCHEMA_SHAPE_MISMATCH: &str = "SCHEMA_SHAPE_MISMATCH";
148    /// a `link to <prefix>/` field has a plain or wrong-prefix value.
149    pub const SCHEMA_LINK_PREFIX_MISMATCH: &str = "SCHEMA_LINK_PREFIX_MISMATCH";
150    /// a value isn't in the schema's `enum`.
151    pub const SCHEMA_ENUM_VIOLATION: &str = "SCHEMA_ENUM_VIOLATION";
152    /// a write was attempted on a `### Frozen pages` path (write-time).
153    pub const POLICY_FROZEN_PAGE: &str = "POLICY_FROZEN_PAGE";
154    /// a file with an `### Ignored types` type exists.
155    pub const POLICY_IGNORED_TYPE_PRESENT: &str = "POLICY_IGNORED_TYPE_PRESENT";
156    /// a `meta-type: conclusion` record derives from an ignored-type record.
157    pub const POLICY_IGNORED_TYPE_DERIVED: &str = "POLICY_IGNORED_TYPE_DERIVED";
158    /// a `log.md` entry header timestamp is unparseable.
159    pub const LOG_BAD_TIMESTAMP: &str = "LOG_BAD_TIMESTAMP";
160    /// a `log.md` entry kind isn't recognized.
161    pub const LOG_UNKNOWN_KIND: &str = "LOG_UNKNOWN_KIND";
162    /// `log.md` entries aren't in non-decreasing time order (possible rewrite).
163    pub const LOG_OUT_OF_ORDER: &str = "LOG_OUT_OF_ORDER";
164    /// a non-empty canonical folder lacks `index.md`.
165    pub const INDEX_MISSING: &str = "INDEX_MISSING";
166    /// an `index.md` lists a file that no longer exists.
167    pub const INDEX_STALE_ENTRY: &str = "INDEX_STALE_ENTRY";
168    /// a file isn't listed in its folder's `index.md`.
169    pub const INDEX_MISSING_ENTRY: &str = "INDEX_MISSING_ENTRY";
170    /// an `index.md` sits in an empty / non-canonical folder.
171    pub const INDEX_ORPHAN: &str = "INDEX_ORPHAN";
172    /// an index's `scope:` doesn't match its filesystem location.
173    pub const INDEX_WRONG_SCOPE: &str = "INDEX_WRONG_SCOPE";
174    /// an index entry's text doesn't match the target file's `summary`.
175    pub const INDEX_SUMMARY_MISMATCH: &str = "INDEX_SUMMARY_MISMATCH";
176    /// a type-folder's `index.jsonl` twin is missing.
177    pub const INDEX_JSONL_MISSING: &str = "INDEX_JSONL_MISSING";
178    /// a file isn't in the `index.jsonl`, or a jsonl record points at a missing
179    /// file.
180    pub const INDEX_JSONL_DESYNC: &str = "INDEX_JSONL_DESYNC";
181    /// a `index.jsonl` record's fields don't match the file's frontmatter.
182    pub const INDEX_JSONL_STALE: &str = "INDEX_JSONL_STALE";
183    /// `tags` isn't a flat YAML list of short scalar labels.
184    pub const TAGS_MALFORMED: &str = "TAGS_MALFORMED";
185    /// a line in `assets.jsonl` is not a valid asset record.
186    pub const ASSET_MANIFEST_MALFORMED: &str = "ASSET_MANIFEST_MALFORMED";
187    /// a content file references an `asset`/`assets` path with no record in
188    /// `assets.jsonl` (run `dbmd assets scan`).
189    pub const ASSET_UNDECLARED: &str = "ASSET_UNDECLARED";
190    /// an `assets.jsonl` record names a wrapper file that does not exist.
191    pub const ASSET_WRAPPER_BROKEN: &str = "ASSET_WRAPPER_BROKEN";
192    /// an `assets.jsonl` record's path is referenced by no wrapper.
193    pub const ASSET_MANIFEST_ORPHAN: &str = "ASSET_MANIFEST_ORPHAN";
194    /// an `asset`/`assets` path points at a tracked markdown content file.
195    pub const ASSET_PATH_IS_CONTENT: &str = "ASSET_PATH_IS_CONTENT";
196}
197
198/// The SPEC's `summary` length bound (chars). Over it → `SUMMARY_TOO_LONG`.
199const MAX_SUMMARY_LEN: usize = 200;
200
201/// Recognized `log.md` entry kinds (SPEC § `log.md`). Anything else →
202/// `LOG_UNKNOWN_KIND` (warning, not error).
203const RECOGNIZED_LOG_KINDS: &[&str] = &[
204    "ingest",
205    "create",
206    "update",
207    "delete",
208    "rename",
209    "link",
210    "validate",
211    "index-rebuild",
212    "contradiction",
213];
214
215// ─────────────────────────────────────────────────────────────────────────────
216//  Public entrypoints
217// ─────────────────────────────────────────────────────────────────────────────
218
219/// **Loop default.** Validate the working set: content files changed since
220/// `since` (default: the last `validate` entry in `log.md`), plus any file whose
221/// wiki-links target a changed/renamed/removed path. Per-file *checks* only —
222/// none of the cross-file global passes (entity-dedup, every-index sync,
223/// `log.md` ordering) that `--all` adds. If the default call finds no logged
224/// changed objects, it falls back to a per-file content sweep so an externally
225/// edited or freshly copied store cannot pass vacuously.
226///
227/// **Cost.** The changed set is read from `log.md` — O(changed): every
228/// `create`/`update`/`ingest`/`rename`/`delete`/`link` entry newer than the
229/// cutoff names an object. Per-file frontmatter + link-doctrine checks then run
230/// over that set plus its incoming linkers — also O(changed). The one part that
231/// is *not* O(changed) is discovering those incoming linkers: a link to a
232/// changed path can live in the body or a typed frontmatter field of any file,
233/// so it is found by a **single** embedded-ripgrep pass over the store
234/// ([`Store::find_links_to_any`]) for the whole changed set at once — one store
235/// scan, flat in the changed-set size. (It was previously a full store read
236/// *per* changed object — `O(changed × store)`; that is the blow-up this path
237/// no longer pays.) The unavoidable single content scan is the same shape as
238/// free-text `dbmd search`; the sidecar `links` projection can't replace it
239/// because it omits body/typed-field edges.
240pub fn validate_working_set(
241    store: &Store,
242    since: Option<DateTime<FixedOffset>>,
243) -> crate::Result<Vec<Issue>> {
244    if !store_marker_present(store) {
245        return Ok(vec![not_a_store_issue(store)]);
246    }
247
248    let cutoff = match since {
249        Some(ts) => Some(ts),
250        None => last_validate_at(store),
251    };
252
253    // 1. Changed objects, straight from the log (O(changed) — never a walk).
254    let changed = changed_objects_since(store, cutoff);
255    if changed.is_empty() && since.is_none() {
256        return validate_content_sweep(store);
257    }
258
259    // 2. Add every file with an incoming wiki-link to a changed/renamed/removed
260    //    path (the linker may now be stale even though it didn't change). The
261    //    incoming-linker scan is `Store::find_links_to_any` — ONE embedded-ripgrep
262    //    pass over the store for the WHOLE changed set (one `.md` walk, one
263    //    presence-only/early-exit scan per file), not one walk per object. This
264    //    is the fix for the `O(changed × store)` blow-up that calling
265    //    `find_links_to` in a loop produced (a full store read per changed
266    //    object); the cost is now a single store scan regardless of how many
267    //    objects changed. A returned self-link is harmlessly deduped by the set
268    //    (the object is already inserted below).
269    let changed_targets: Vec<PathBuf> = changed.iter().cloned().collect();
270    let mut working: BTreeSet<PathBuf> = changed;
271    for linker in store.find_links_to_any(&changed_targets)? {
272        working.insert(linker);
273    }
274
275    let mut issues = Vec::new();
276    for rel in &working {
277        let abs = store.root.join(rel);
278        // A changed path can be a *deletion* — skip files that no longer exist;
279        // the incoming-linker scan above already flagged links into them.
280        if !abs.is_file() {
281            continue;
282        }
283        // `None` basename index: the working-set pass does not build the
284        // store-wide basename map (that is a `--all`-only structure), so a bare
285        // short-form target is reported as plain `WIKI_LINK_SHORT_FORM` and the
286        // `--all` sweep does the ambiguity upgrade.
287        check_content_file(store, rel, &abs, None, &mut issues);
288    }
289    issues.sort_by(issue_order);
290    Ok(issues)
291}
292
293fn validate_content_sweep(store: &Store) -> crate::Result<Vec<Issue>> {
294    let mut issues = Vec::new();
295    for rel in store.walk()? {
296        let abs = store.root.join(&rel);
297        check_content_file(store, &rel, &abs, None, &mut issues);
298    }
299    issues.sort_by(issue_order);
300    Ok(issues)
301}
302
303/// **Full SWEEP (O(store)).** Validate every file, every link, and every index,
304/// adding the cross-file checks that need global state: entity-dedup `DUP_*`,
305/// every-index sync (md + jsonl), and `log.md` ordering. CI / recovery, not the
306/// loop.
307pub fn validate_all(store: &Store) -> crate::Result<Vec<Issue>> {
308    if !store_marker_present(store) {
309        return Ok(vec![not_a_store_issue(store)]);
310    }
311
312    let mut issues = Vec::new();
313
314    // Store-identity file: `DB.md` shape (type / required fields / section
315    // headers). A single root file, checked once in the sweep — not a content
316    // file (it carries no `summary`), so it is not part of `walk_content_files`.
317    check_db_md(store, &mut issues);
318
319    let files = walk_content_files(&store.root);
320
321    // The basename index makes the short-form wiki-link check able to upgrade a
322    // bare-basename target to `WIKI_LINK_AMBIGUOUS` when it matches ≥2 files.
323    // Built once from the already-gathered sweep list (no extra walk); only the
324    // `--all` path has it (the working-set path stays O(changed)).
325    let basenames = build_basename_index(&files);
326
327    // Per-file checks over the whole store.
328    let mut parsed: Vec<(PathBuf, Parsed)> = Vec::new();
329    for rel in &files {
330        let abs = store.root.join(rel);
331        if let Some(p) = check_content_file(store, rel, &abs, Some(&basenames), &mut issues) {
332            parsed.push((rel.clone(), p));
333        }
334    }
335
336    // Cross-file: hard `id` + soft schema-declared `unique:` dedup collisions.
337    check_duplicates(store, &parsed, &mut issues);
338
339    // Cross-file: hierarchical index.md + index.jsonl sync.
340    check_indexes(store, &files, &mut issues);
341
342    // Cross-file: log.md well-formedness + ordering.
343    check_log(store, &mut issues);
344
345    // Cross-file: asset manifest (assets.jsonl) integrity against wrapper
346    // declarations. Text-only, no hashing, no byte reads — a SWEEP check like
347    // dedup. Byte presence/correctness is `dbmd assets verify`, not validate, so
348    // a fresh clone with no restored bytes still passes here.
349    check_assets(store, &parsed, &mut issues);
350
351    issues.sort_by(issue_order);
352    Ok(issues)
353}
354
355// ─────────────────────────────────────────────────────────────────────────────
356//  Per-file content checks (shared by both scopes)
357// ─────────────────────────────────────────────────────────────────────────────
358
359/// What `validate_all`'s cross-file pass needs from a per-file parse: the
360/// parsed YAML mapping (for dedup keys) and the raw frontmatter text (for
361/// text-based wiki-link extraction). The body and fence-line are consumed
362/// inline during the per-file pass and not carried here.
363struct Parsed {
364    /// The parsed top-level YAML mapping, keyed by string. `None` ⇒ malformed
365    /// YAML (a `FM_MALFORMED_YAML` was already emitted).
366    fm: Option<BTreeMap<String, Value>>,
367    /// The raw frontmatter YAML text (between the fences) — the source for
368    /// text-based wiki-link extraction in dedup.
369    fm_yaml: String,
370}
371
372/// Run every per-file check on one content file, pushing issues. Returns the
373/// parsed file so `validate_all` can reuse it for cross-file checks. Returns
374/// `None` only when the file is unreadable or has no frontmatter block at all
375/// (which for a content file is itself reported).
376fn check_content_file(
377    store: &Store,
378    rel: &Path,
379    abs: &Path,
380    basenames: Option<&BasenameIndex>,
381    issues: &mut Vec<Issue>,
382) -> Option<Parsed> {
383    let text = match std::fs::read_to_string(abs) {
384        Ok(t) => t,
385        Err(e) => {
386            // The file exists in the walk but can't be read as UTF-8 text
387            // (invalid bytes) or hit an I/O error. Returning `None` silently
388            // here let a store whose only content file was binary garbage pass
389            // `dbmd validate` with exit 0 — the exact vacuous-pass the fallback
390            // sweep exists to prevent. Report it so the agent gets an actionable
391            // diagnostic naming the unreadable file (and `index rebuild`, which
392            // hard-fails on the same file, isn't the only signal).
393            let detail = if e.kind() == std::io::ErrorKind::InvalidData {
394                "file is not valid UTF-8 text".to_string()
395            } else {
396                format!("file could not be read: {e}")
397            };
398            push(
399                issues,
400                Severity::Error,
401                codes::FM_UNREADABLE,
402                rel,
403                None,
404                None,
405                format!("content file is unreadable: {detail}"),
406                Some(
407                    "save the file as UTF-8 text, or remove it if it isn't a db.md content file"
408                        .into(),
409                ),
410                vec![],
411            );
412            return None;
413        }
414    };
415
416    let is_content = is_content_file(rel);
417
418    let (fm_yaml, body, fm_end_line) = match split_frontmatter(&text) {
419        Some(split) => split,
420        None => {
421            // No frontmatter at all. For a content file that means there's no
422            // `type:` and no `summary:` — report both the way a parsed-but-empty
423            // file would, so the agent gets the same actionable codes.
424            if is_content {
425                push(
426                    issues,
427                    Severity::Error,
428                    codes::FM_MISSING_TYPE,
429                    rel,
430                    None,
431                    Some("type".into()),
432                    "content file has no frontmatter `type:`".into(),
433                    Some("add a YAML frontmatter block with `type:`".into()),
434                    vec![],
435                );
436                push(
437                    issues,
438                    Severity::Error,
439                    codes::SUMMARY_MISSING,
440                    rel,
441                    None,
442                    Some("summary".into()),
443                    "content file has no `summary`".into(),
444                    Some("run `dbmd fm init`".into()),
445                    vec![],
446                );
447            }
448            return None;
449        }
450    };
451
452    // Parse the YAML block.
453    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
454        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
455        // An empty frontmatter block parses as Null; treat as an empty mapping.
456        Ok(Value::Null) => Some(BTreeMap::new()),
457        Ok(_) => {
458            // A scalar / sequence at the top level isn't a frontmatter mapping.
459            // Anchor to line 1 — the frontmatter block's opening `---`; the whole
460            // block is opaque, so there is no single offending field line.
461            push(
462                issues,
463                Severity::Error,
464                codes::FM_MALFORMED_YAML,
465                rel,
466                Some(1),
467                None,
468                "frontmatter is not a YAML mapping".into(),
469                Some("repair the frontmatter YAML mapping, then rerun `dbmd validate`".into()),
470                vec![],
471            );
472            None
473        }
474        Err(e) => {
475            // Anchor to line 1 (the opening `---`): an unparseable block has no
476            // single offending field line; the agent re-reads the whole block.
477            push(
478                issues,
479                Severity::Error,
480                codes::FM_MALFORMED_YAML,
481                rel,
482                Some(1),
483                None,
484                format!("frontmatter block isn't valid YAML: {e}"),
485                Some("repair the frontmatter YAML block, then rerun `dbmd validate`".into()),
486                vec![],
487            );
488            None
489        }
490    };
491
492    if let Some(map) = &fm {
493        // The detailed frontmatter checks only run when the YAML parsed.
494        check_frontmatter(store, rel, map, &fm_yaml, basenames, issues, is_content);
495    }
496
497    // Wiki-link doctrine checks run on the body of content files. They are NOT
498    // run on:
499    //   - the root append-only meta files `log.md`/`DB.md` — they reach this
500    //     function only via the working-set incoming-linker scan (`walk_all_md`
501    //     includes them), and `validate --all` never link-checks their bodies. A
502    //     historical `[[deleted-page]]` mention in a `log.md` note, or a `[[…]]`
503    //     in DB.md's `## Agent instructions`, must not be `WIKI_LINK_BROKEN`; the
504    //     log is append-only, so "fix the link" can't even be applied.
505    //   - the derived catalogs `index.md`/`index.jsonl` — their "links" are
506    //     GENERATED catalog entries, not authored body wiki-links. A folder's
507    //     `index.md` is pulled into the working set as an incoming linker (an
508    //     entry `[[records/contacts/a]]` IS a wiki-link to a member, so touching
509    //     or deleting any member drags its folder `index.md` in). Its integrity
510    //     is the job of `check_indexes` under `--all`, which reports a dangling
511    //     entry as `INDEX_STALE_ENTRY` ("run `dbmd index rebuild`"). Body-link-
512    //     checking it here instead emitted `WIKI_LINK_BROKEN` ("create the
513    //     target") for the SAME condition — a different code with the OPPOSITE
514    //     remedy across the loop default vs the sweep, steering an agent to
515    //     recreate deleted data. `walk_content_files` skips `index.md` under
516    //     `--all` for exactly this reason; the working-set scope must match.
517    // Without these guards the two scopes disagree on the same store.
518    if !is_root_meta_file(rel) && !is_index_catalog_file(rel) {
519        check_body_wiki_links(store, rel, &body, fm_end_line, basenames, issues);
520    }
521
522    Some(Parsed { fm, fm_yaml })
523}
524
525/// All frontmatter-level checks for a content file with valid YAML.
526fn check_frontmatter(
527    store: &Store,
528    rel: &Path,
529    fm: &BTreeMap<String, Value>,
530    fm_yaml: &str,
531    basenames: Option<&BasenameIndex>,
532    issues: &mut Vec<Issue>,
533    is_content: bool,
534) {
535    let type_ = fm.get("type").and_then(scalar_string);
536
537    // ── type ────────────────────────────────────────────────────────────────
538    if is_content && type_.is_none() {
539        push(
540            issues,
541            Severity::Error,
542            codes::FM_MISSING_TYPE,
543            rel,
544            fm_key_line_or_top(fm_yaml, "type"),
545            Some("type".into()),
546            "content file has no `type:`".into(),
547            Some("add a `type:` field (e.g. `type: contact`)".into()),
548            vec![],
549        );
550    }
551
552    // ── meta-type (records-only epistemic class; closed enum) ─────────────────
553    // Present-but-out-of-enum is an error; absent is fine (effective default
554    // `fact`). Sources don't normally carry one, but validating the value when
555    // present is layer-agnostic and harmless.
556    if is_content {
557        // Branch on the raw value, NOT `and_then(scalar_string)`. Pre-filtering
558        // through `scalar_string` made a list/mapping value (which returns `None`)
559        // short-circuit the whole check, so a structurally-wrong `meta-type`
560        // slipped through clean AND was silently reclassified as the default
561        // `fact` by the rest of the toolkit. Absent or explicit-`null` is fine
562        // (effective default `fact`); a present non-null value must be a scalar in
563        // the closed enum. This mirrors the sibling timestamp check below, which
564        // was already hardened against the same non-scalar escape.
565        if let Some(v) = fm.get("meta-type").filter(|v| !v.is_null()) {
566            match scalar_string(v) {
567                Some(mt) if matches!(mt.as_str(), "fact" | "operational" | "conclusion") => {}
568                Some(mt) => push(
569                    issues,
570                    Severity::Error,
571                    codes::FM_BAD_META_TYPE,
572                    rel,
573                    fm_key_line_or_top(fm_yaml, "meta-type"),
574                    Some("meta-type".into()),
575                    format!("`meta-type: {mt}` is not one of fact / operational / conclusion"),
576                    Some(
577                        "use one of: fact, operational, conclusion (or omit for the default `fact`)"
578                            .into(),
579                    ),
580                    vec![],
581                ),
582                None => push(
583                    issues,
584                    Severity::Error,
585                    codes::FM_BAD_META_TYPE,
586                    rel,
587                    fm_key_line_or_top(fm_yaml, "meta-type"),
588                    Some("meta-type".into()),
589                    "`meta-type` is not one of fact / operational / conclusion: expected a scalar \
590                     string, found a list or mapping"
591                        .to_string(),
592                    Some(
593                        "use one of: fact, operational, conclusion (or omit for the default `fact`)"
594                            .into(),
595                    ),
596                    vec![],
597                ),
598            }
599        }
600    }
601
602    // ── summary (universal on content files) ──────────────────────────────────
603    if is_content {
604        check_summary(rel, fm, fm_yaml, issues);
605    }
606
607    // ── timestamps: created / updated ─────────────────────────────────────────
608    // The `created`/`updated` contract is content-file-only; meta files
609    // (`DB.md`, `log.md`, index twins) legitimately carry no such timestamps.
610    if is_content {
611        for (key, missing_code) in [
612            ("created", codes::FM_MISSING_CREATED),
613            ("updated", codes::FM_MISSING_UPDATED),
614        ] {
615            // A key that is absent, or present-but-`null`, has *no* timestamp →
616            // `FM_MISSING_*`. The toolkit's parser also treats a null value as
617            // "no timestamp", so a null `created:` must read as missing, not
618            // silently pass.
619            let value = fm.get(key);
620            let missing = value.is_none() || value.is_some_and(Value::is_null);
621            if missing {
622                push(
623                    issues,
624                    Severity::Error,
625                    missing_code,
626                    rel,
627                    fm_key_line_or_top(fm_yaml, key),
628                    Some(key.into()),
629                    format!("content file has no `{key}:` timestamp"),
630                    Some(format!(
631                        "set `{key}` to an RFC3339 timestamp, e.g. 2026-05-27T08:00:00-07:00"
632                    )),
633                    vec![],
634                );
635            } else if let Some(v) = value {
636                // Present and non-null. A scalar is checked for ISO-8601; a
637                // sequence/mapping is not a timestamp string at all and so
638                // cannot be ISO-8601 → `FM_BAD_TIMESTAMP` (it must not slip
639                // through the way it did when `scalar_string` returned `None`
640                // and the branch silently no-oped).
641                match scalar_string(v) {
642                    Some(s) if is_iso8601(&s) => {}
643                    Some(s) => push(
644                        issues,
645                        Severity::Error,
646                        codes::FM_BAD_TIMESTAMP,
647                        rel,
648                        fm_key_line(fm_yaml, key),
649                        Some(key.into()),
650                        format!("`{key}` is not ISO-8601: {s:?}"),
651                        Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
652                        vec![],
653                    ),
654                    None => push(
655                        issues,
656                        Severity::Error,
657                        codes::FM_BAD_TIMESTAMP,
658                        rel,
659                        fm_key_line(fm_yaml, key),
660                        Some(key.into()),
661                        format!(
662                            "`{key}` is not ISO-8601: expected a timestamp string, found a list or mapping"
663                        ),
664                        Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
665                        vec![],
666                    ),
667                }
668            }
669        }
670    }
671    // ── tags shape ────────────────────────────────────────────────────────────
672    if let Some(tags) = fm.get("tags") {
673        if !is_flat_scalar_list(tags) {
674            push(
675                issues,
676                Severity::Warning,
677                codes::TAGS_MALFORMED,
678                rel,
679                fm_key_line(fm_yaml, "tags"),
680                Some("tags".into()),
681                "`tags` must be a flat YAML list of short scalar labels".into(),
682                Some("use block form: one `- <tag>` per line".into()),
683                vec![],
684            );
685        }
686    }
687
688    // ── inline flow-form wiki-link lists in frontmatter ──────────────────────
689    for key in detect_flow_form_link_lists(fm_yaml) {
690        push(
691            issues,
692            Severity::Error,
693            codes::WIKI_LINK_FLOW_FORM_LIST,
694            rel,
695            fm_key_line(fm_yaml, &key),
696            Some(key.clone()),
697            format!("`{key}` uses inline flow form `[[[a]], [[b]]]`"),
698            Some("use YAML block-sequence form: one `- [[...]]` per line".into()),
699            vec![],
700        );
701    }
702
703    // ── frontmatter wiki-link fields: doctrine + integrity ───────────────────
704    // Skip keys that have an explicit `link to` schema spec — those are checked
705    // (with prefix enforcement) in `check_schema`, and double-reporting the same
706    // link via two paths would be noise.
707    let schema_link_keys: BTreeSet<String> =
708        effective_schema(store, type_.as_deref().unwrap_or(""))
709            .map(|s| {
710                s.fields
711                    .iter()
712                    .filter(|f| f.link_prefix.is_some())
713                    .map(|f| f.name.clone())
714                    .collect()
715            })
716            .unwrap_or_default();
717    for (key, link) in frontmatter_link_fields_text(fm_yaml, 2) {
718        if schema_link_keys.contains(&key) {
719            continue;
720        }
721        check_wiki_link(
722            store,
723            rel,
724            &link,
725            Some(link.line),
726            Some(&key),
727            basenames,
728            issues,
729        );
730    }
731
732    // ── policies: ignored types ──────────────────────────────────────────────
733    if let Some(t) = &type_ {
734        if store.config.ignored_types.iter().any(|it| it == t) {
735            push(
736                issues,
737                Severity::Info,
738                codes::POLICY_IGNORED_TYPE_PRESENT,
739                rel,
740                fm_key_line(fm_yaml, "type"),
741                Some("type".into()),
742                format!("file has ignored type `{t}` (per DB.md ## Policies)"),
743                Some(
744                    "change the `type`, or remove it from DB.md `### Ignored types` if it should be managed"
745                        .into(),
746                ),
747                // The policy source: `DB.md` declares the ignored type.
748                vec![PathBuf::from("DB.md")],
749            );
750        }
751        // A conclusion record (`meta-type: conclusion`) deriving from an
752        // ignored-type record → warning. The decision lives in the shared
753        // `derived_from_ignored_type` entry point; this side only supplies the
754        // `derived_from` targets (with their line, which the issue carries) and
755        // renders the finding.
756        let meta_type = fm
757            .get("meta-type")
758            .and_then(scalar_string)
759            .unwrap_or_else(|| "fact".to_string());
760        for link in frontmatter_links_for_key(fm_yaml, "derived_from", 2) {
761            if let Some(hit) =
762                derived_from_ignored_type(store, &meta_type, std::iter::once(link.target.as_str()))
763            {
764                push(
765                    issues,
766                    Severity::Warning,
767                    codes::POLICY_IGNORED_TYPE_DERIVED,
768                    rel,
769                    Some(link.line),
770                    Some("derived_from".into()),
771                    format!(
772                        "conclusion record derives from ignored-type record `{}` (type `{}`)",
773                        hit.target, hit.target_type
774                    ),
775                    Some(
776                        "drop this `derived_from` link, or remove the target type from DB.md `### Ignored types`"
777                            .into(),
778                    ),
779                    // The ignored-type source record, plus `DB.md` (the policy
780                    // source that lists the ignored type).
781                    vec![
782                        PathBuf::from(format!("{}.md", hit.target)),
783                        PathBuf::from("DB.md"),
784                    ],
785                );
786            }
787        }
788    }
789
790    // ── schema enforcement: DB.md ## Schemas (the only schema source) ─────────
791    if let Some(t) = &type_ {
792        if let Some(schema) = effective_schema(store, t) {
793            check_schema(store, rel, fm, fm_yaml, &schema, issues);
794        }
795    }
796}
797
798/// `summary` rules: required, non-empty, single-line, ≤ 200 chars.
799fn check_summary(rel: &Path, fm: &BTreeMap<String, Value>, fm_yaml: &str, issues: &mut Vec<Issue>) {
800    let line = fm_key_line(fm_yaml, "summary");
801    match fm.get("summary") {
802        None => push(
803            issues,
804            Severity::Error,
805            codes::SUMMARY_MISSING,
806            rel,
807            // A missing `summary` key has no line of its own → anchor to the
808            // frontmatter block top (line 1), the EXPECTED field-absence rule.
809            fm_key_line_or_top(fm_yaml, "summary"),
810            Some("summary".into()),
811            "content file has no `summary`".into(),
812            Some("run `dbmd fm init`".into()),
813            vec![],
814        ),
815        Some(v) => {
816            let s = scalar_string(v).unwrap_or_default();
817            if s.trim().is_empty() {
818                push(
819                    issues,
820                    Severity::Error,
821                    codes::SUMMARY_EMPTY,
822                    rel,
823                    line,
824                    Some("summary".into()),
825                    "`summary` is present but empty".into(),
826                    Some("write a one-line summary, or run `dbmd fm init`".into()),
827                    vec![],
828                );
829            } else if s.contains('\n') {
830                push(
831                    issues,
832                    Severity::Error,
833                    codes::SUMMARY_MULTILINE,
834                    rel,
835                    line,
836                    Some("summary".into()),
837                    "`summary` must be one line (contains a newline)".into(),
838                    Some("collapse the summary to a single line".into()),
839                    vec![],
840                );
841            } else if s.chars().count() > MAX_SUMMARY_LEN {
842                push(
843                    issues,
844                    Severity::Warning,
845                    codes::SUMMARY_TOO_LONG,
846                    rel,
847                    line,
848                    Some("summary".into()),
849                    format!(
850                        "`summary` is {} chars (> {MAX_SUMMARY_LEN})",
851                        s.chars().count()
852                    ),
853                    Some(format!("trim the summary to ≤ {MAX_SUMMARY_LEN} chars")),
854                    vec![],
855                );
856            }
857        }
858    }
859}
860
861/// Wiki-link checks for a body. Per-link doctrine (`WIKI_LINK_*`).
862fn check_body_wiki_links(
863    store: &Store,
864    rel: &Path,
865    body: &str,
866    fm_end_line: u32,
867    basenames: Option<&BasenameIndex>,
868    issues: &mut Vec<Issue>,
869) {
870    for link in extract_wiki_links(body) {
871        // Body lines are offset past the frontmatter block. `link.line` is
872        // 1-based within `body`; the body starts at `fm_end_line + 1`.
873        let abs_line = fm_end_line + link.line;
874        check_wiki_link(store, rel, &link, Some(abs_line), None, basenames, issues);
875    }
876}
877
878/// A store-wide map from a file's bare basename (its stem, no `.md`) to every
879/// store-relative path carrying that basename. Built once per `validate --all`
880/// sweep so the short-form wiki-link check can distinguish a merely short-form
881/// target (`WIKI_LINK_SHORT_FORM`) from one that is *ambiguous* because the bare
882/// basename matches two or more files (`WIKI_LINK_AMBIGUOUS`, the defensive
883/// code). `None` in the working-set path — that loop is O(changed) and never
884/// walks the store, so it reports the plain short-form error without the scan.
885type BasenameIndex = HashMap<String, Vec<PathBuf>>;
886
887/// Build the [`BasenameIndex`] from the swept file list (already gathered by
888/// `validate_all`; no extra walk).
889fn build_basename_index(files: &[PathBuf]) -> BasenameIndex {
890    let mut idx: BasenameIndex = HashMap::new();
891    for rel in files {
892        if let Some(stem) = rel.file_stem().and_then(|s| s.to_str()) {
893            idx.entry(stem.to_string()).or_default().push(rel.clone());
894        }
895    }
896    idx
897}
898
899/// The shared per-wiki-link doctrine + integrity check used by both body links
900/// and frontmatter link-fields. `basenames` is `Some` only in the `--all`
901/// sweep, where a no-slash short-form target is upgraded to `WIKI_LINK_AMBIGUOUS`
902/// when its bare basename matches ≥2 files.
903fn check_wiki_link(
904    store: &Store,
905    rel: &Path,
906    link: &Link,
907    line: Option<u32>,
908    key: Option<&str>,
909    basenames: Option<&BasenameIndex>,
910    issues: &mut Vec<Issue>,
911) {
912    let bare = link.target.trim_end_matches(".md");
913
914    // Short-form: not a full store-relative path (no `/`, or first segment isn't
915    // a known layer).
916    if !is_full_store_path(bare) {
917        // Ambiguous (defensive) takes precedence over plain short-form when the
918        // target is a bare basename (no `/`) that matches ≥2 files in the store.
919        // Only computable in the sweep (where `basenames` is populated); the
920        // working-set path falls through to the plain short-form error.
921        if !bare.contains('/') {
922            if let Some(idx) = basenames {
923                if let Some(matches) = idx.get(bare) {
924                    if matches.len() >= 2 {
925                        let mut related = matches.clone();
926                        related.sort();
927                        push(
928                            issues,
929                            Severity::Error,
930                            codes::WIKI_LINK_AMBIGUOUS,
931                            rel,
932                            line,
933                            key.map(str::to_string),
934                            format!(
935                                "short-form wiki-link `[[{}]]` matches multiple files",
936                                link.target
937                            ),
938                            Some("use the full store-relative path to disambiguate".into()),
939                            related,
940                        );
941                        return;
942                    }
943                }
944            }
945        }
946        push(
947            issues,
948            Severity::Error,
949            codes::WIKI_LINK_SHORT_FORM,
950            rel,
951            line,
952            key.map(str::to_string),
953            format!(
954                "wiki-link `[[{}]]` is not a full store-relative path",
955                link.target
956            ),
957            short_form_suggestion(bare),
958            vec![],
959        );
960        // Don't also report broken; the agent must fix the form first.
961        return;
962    }
963
964    // `.md` extension → warning, then still check existence.
965    if link.target.ends_with(".md") {
966        push(
967            issues,
968            Severity::Warning,
969            codes::WIKI_LINK_HAS_EXTENSION,
970            rel,
971            line,
972            key.map(str::to_string),
973            format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
974            Some(format!("drop the extension: [[{bare}]]")),
975            vec![],
976        );
977    }
978
979    // Broken: target file doesn't exist (O(1) stat). Resolve the target the
980    // same way the graph engine does — the literal path first (so a link to a
981    // raw `.eml`/`.pdf` source kept verbatim under `sources/` resolves), then
982    // the `.md`-appended path.
983    match resolve_wiki_target(store, bare) {
984        TargetResolution::Exists => {}
985        TargetResolution::Missing => push(
986            issues,
987            Severity::Error,
988            codes::WIKI_LINK_BROKEN,
989            rel,
990            line,
991            key.map(str::to_string),
992            format!("wiki-link target `{bare}` doesn't exist"),
993            Some(format!(
994                "create `{bare}.md`, or point the link at an existing file"
995            )),
996            vec![],
997        ),
998        TargetResolution::Unsafe => push(
999            issues,
1000            Severity::Error,
1001            codes::WIKI_LINK_BROKEN,
1002            rel,
1003            line,
1004            key.map(str::to_string),
1005            format!("wiki-link target `{bare}` is not a safe store-relative path"),
1006            Some("use a full store-relative path under sources/ or records/".into()),
1007            vec![],
1008        ),
1009    }
1010}
1011
1012// ─────────────────────────────────────────────────────────────────────────────
1013//  Schema enforcement (user-declared DB.md ## Schemas — the only source)
1014// ─────────────────────────────────────────────────────────────────────────────
1015
1016/// The effective schema for a type: the store's explicit `DB.md ## Schemas`
1017/// block, or `None`. This is the **only** source of schema enforcement — the
1018/// toolkit ships no implicit or built-in per-type schema (SPEC § Schemas). A
1019/// store that wants its `contact` / `expense` / etc. fields enforced declares
1020/// them in `## Schemas`; the example schema pack in SPEC § Example types is a
1021/// copy-in starting point.
1022fn effective_schema(store: &Store, type_: &str) -> Option<Schema> {
1023    store.config.schemas.get(type_).cloned()
1024}
1025
1026/// Validate a file's frontmatter against a schema's [`FieldSpec`]s.
1027fn check_schema(
1028    store: &Store,
1029    rel: &Path,
1030    fm: &BTreeMap<String, Value>,
1031    fm_yaml: &str,
1032    schema: &Schema,
1033    issues: &mut Vec<Issue>,
1034) {
1035    for spec in &schema.fields {
1036        let present = fm.get(&spec.name);
1037        let line = fm_key_line(fm_yaml, &spec.name);
1038
1039        // Required. "Empty" means: the key is absent, or its value carries no
1040        // content — a YAML `null` (`name:`), an empty list (`name: []`), an
1041        // empty mapping (`name: {}`), or a blank/whitespace-only scalar
1042        // (`name: ""`). `scalar_string` returns `None` for null/list/mapping, so
1043        // a bare `.unwrap_or(false)` wrongly treated those as non-empty and let
1044        // a required field with a null or empty-collection value pass silently;
1045        // route them through `is_empty_value` instead.
1046        let is_empty = match present {
1047            None => true,
1048            Some(v) => is_empty_value(v),
1049        };
1050        if spec.required && is_empty {
1051            push(
1052                issues,
1053                Severity::Error,
1054                codes::SCHEMA_MISSING_REQUIRED,
1055                rel,
1056                // Absent key → anchor to the frontmatter top (line 1); a
1057                // present-but-empty value keeps its own line.
1058                fm_key_line_or_top(fm_yaml, &spec.name),
1059                Some(spec.name.clone()),
1060                format!("required field `{}` is absent or empty", spec.name),
1061                Some(format!("set `{}` to a non-empty value", spec.name)),
1062                vec![],
1063            );
1064            continue;
1065        }
1066        let Some(value) = present else { continue };
1067
1068        // An OPTIONAL field that is `null` or empty is simply unset — there is
1069        // no value to shape/enum/link-check. (The required+empty case already
1070        // returned above as `SCHEMA_MISSING_REQUIRED`.) Without this, an
1071        // `paid_at: null` on an `invoice` whose schema marks `paid_at (date)`
1072        // would wrongly fire `SCHEMA_SHAPE_MISMATCH` against the empty string.
1073        let value_empty = value.is_null()
1074            || scalar_string(value)
1075                .map(|s| s.trim().is_empty())
1076                .unwrap_or(false);
1077        if !spec.required && value_empty {
1078            continue;
1079        }
1080
1081        // link to <prefix>/ — extract the link target(s) from the raw frontmatter
1082        // text (unquoted `[[...]]` is a YAML nested-sequence, not a string).
1083        if let Some(prefix) = &spec.link_prefix {
1084            check_schema_link(store, rel, &spec.name, fm_yaml, prefix, line, issues);
1085            continue; // a link field is never also shape/enum-checked
1086        }
1087
1088        // A shape- or enum-constrained field expects a SCALAR. A YAML sequence
1089        // or mapping satisfies neither, and would otherwise slip through both
1090        // checks (`scalar_string` returns `None` for non-scalars, so the enum
1091        // and shape bodies silently no-op). Flag it as a shape mismatch rather
1092        // than let a structurally-wrong value validate clean. (Link fields,
1093        // which legitimately take block-form sequences, already `continue`d.)
1094        if (spec.shape.is_some() || spec.enum_values.is_some()) && scalar_string(value).is_none() {
1095            push(
1096                issues,
1097                Severity::Error,
1098                codes::SCHEMA_SHAPE_MISMATCH,
1099                rel,
1100                line,
1101                Some(spec.name.clone()),
1102                format!(
1103                    "`{}` must be a scalar value, found a list or mapping",
1104                    spec.name
1105                ),
1106                Some(format!("set `{}` to a single scalar value", spec.name)),
1107                vec![],
1108            );
1109            continue;
1110        }
1111
1112        // enum
1113        if let Some(allowed) = &spec.enum_values {
1114            if let Some(s) = scalar_string(value) {
1115                if !allowed.iter().any(|a| a == &s) {
1116                    push(
1117                        issues,
1118                        Severity::Error,
1119                        codes::SCHEMA_ENUM_VIOLATION,
1120                        rel,
1121                        line,
1122                        Some(spec.name.clone()),
1123                        format!("`{}` value {s:?} not in enum {allowed:?}", spec.name),
1124                        Some(format!("use one of: {}", allowed.join(", "))),
1125                        vec![],
1126                    );
1127                }
1128            }
1129            continue;
1130        }
1131
1132        // shape
1133        if let Some(shape) = spec.shape {
1134            check_schema_shape(rel, &spec.name, value, shape, line, issues);
1135        }
1136    }
1137}
1138
1139/// `link to <prefix>/` enforcement: the value must be a wiki-link whose target
1140/// starts with `<prefix>`. Reads the link target(s) from the raw frontmatter
1141/// text so unquoted `field: [[...]]` (a YAML nested-sequence, not a string) is
1142/// recognized exactly like the quoted form.
1143fn check_schema_link(
1144    store: &Store,
1145    rel: &Path,
1146    field: &str,
1147    fm_yaml: &str,
1148    prefix: &Path,
1149    line: Option<u32>,
1150    issues: &mut Vec<Issue>,
1151) {
1152    let prefix_str = prefix.to_string_lossy();
1153    let prefix_str = prefix_str.trim_end_matches('/');
1154    let suggestion = |target_leaf: &str| {
1155        Some(format!(
1156            "expected `link to {prefix_str}/`; replace with [[{prefix_str}/{target_leaf}]]"
1157        ))
1158    };
1159
1160    let links = frontmatter_links_for_key(fm_yaml, field, 2);
1161    if links.is_empty() {
1162        // No wiki-link in the field's value → it's a plain string.
1163        let raw = frontmatter_raw_value_for_key(fm_yaml, field, 2).unwrap_or_default();
1164        let raw = raw.trim().trim_matches('"').trim_matches('\'').trim();
1165        let leaf = slugish(raw);
1166        push(
1167            issues,
1168            Severity::Error,
1169            codes::SCHEMA_LINK_PREFIX_MISMATCH,
1170            rel,
1171            line,
1172            Some(field.to_string()),
1173            format!(
1174                "`{field}` is a plain string {raw:?}, expected a wiki-link under `{prefix_str}/`"
1175            ),
1176            suggestion(&leaf),
1177            vec![],
1178        );
1179        return;
1180    }
1181
1182    for link in links {
1183        if link.target.ends_with(".md") {
1184            let bare = link.target.trim_end_matches(".md");
1185            push(
1186                issues,
1187                Severity::Warning,
1188                codes::WIKI_LINK_HAS_EXTENSION,
1189                rel,
1190                Some(link.line),
1191                Some(field.to_string()),
1192                format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
1193                Some(format!("drop the extension: [[{bare}]]")),
1194                vec![],
1195            );
1196        }
1197        let bare = link.target.trim_end_matches(".md");
1198        if !path_under_prefix(bare, prefix_str) {
1199            let leaf = bare.rsplit('/').next().unwrap_or(bare);
1200            push(
1201                issues,
1202                Severity::Error,
1203                codes::SCHEMA_LINK_PREFIX_MISMATCH,
1204                rel,
1205                line,
1206                Some(field.to_string()),
1207                format!("`{field}` target `{bare}` is not under `{prefix_str}/`"),
1208                suggestion(leaf),
1209                vec![],
1210            );
1211        } else {
1212            // Correct prefix — still surface a broken target so the agent sees
1213            // one consistent vocabulary. Resolve like the graph engine (literal
1214            // path first, then `.md`) so a `link to sources/` field pointing at a
1215            // raw `.eml`/`.pdf` source isn't wrongly flagged broken.
1216            match resolve_wiki_target(store, bare) {
1217                TargetResolution::Exists => {}
1218                TargetResolution::Missing => push(
1219                    issues,
1220                    Severity::Error,
1221                    codes::WIKI_LINK_BROKEN,
1222                    rel,
1223                    line,
1224                    Some(field.to_string()),
1225                    format!("wiki-link target `{bare}` doesn't exist"),
1226                    Some(format!(
1227                        "create `{bare}.md`, or point the link at an existing file"
1228                    )),
1229                    vec![],
1230                ),
1231                TargetResolution::Unsafe => push(
1232                    issues,
1233                    Severity::Error,
1234                    codes::WIKI_LINK_BROKEN,
1235                    rel,
1236                    line,
1237                    Some(field.to_string()),
1238                    format!("wiki-link target `{bare}` is not a safe store-relative path"),
1239                    Some("use a full store-relative path under sources/ or records/".into()),
1240                    vec![],
1241                ),
1242            }
1243        }
1244    }
1245}
1246
1247/// Shape enforcement for a non-link, non-enum schema field.
1248fn check_schema_shape(
1249    rel: &Path,
1250    field: &str,
1251    value: &Value,
1252    shape: Shape,
1253    line: Option<u32>,
1254    issues: &mut Vec<Issue>,
1255) {
1256    let s = scalar_string(value).unwrap_or_default();
1257    let ok = match shape {
1258        Shape::String => true, // any scalar string
1259        Shape::Int => value.is_i64() || value.is_u64() || s.trim().parse::<i64>().is_ok(),
1260        Shape::Bool => value.is_bool() || matches!(s.trim(), "true" | "false"),
1261        Shape::Date => is_iso8601_date_or_datetime(&s),
1262        Shape::Email => is_email(&s),
1263        Shape::Currency => is_currency(&s),
1264        Shape::Url => is_url(&s),
1265    };
1266    if !ok {
1267        push(
1268            issues,
1269            Severity::Error,
1270            codes::SCHEMA_SHAPE_MISMATCH,
1271            rel,
1272            line,
1273            Some(field.to_string()),
1274            format!("`{field}` value {s:?} doesn't match shape {shape:?}"),
1275            Some(shape_suggestion(shape)),
1276            vec![],
1277        );
1278    }
1279}
1280
1281// ─────────────────────────────────────────────────────────────────────────────
1282//  Cross-file: entity-dedup collisions (validate_all only)
1283// ─────────────────────────────────────────────────────────────────────────────
1284
1285/// Hard `DUP_ID` + the soft, schema-declared `DUP_UNIQUE_KEY` collisions.
1286///
1287/// `DUP_ID` is universal (two files with the same explicit `id`).
1288/// `DUP_UNIQUE_KEY` is driven entirely by the store's `DB.md ## Schemas`: each
1289/// `- unique: <field>[, <field> …]` directive on a `### <type>` declares a
1290/// uniqueness constraint, and two records of that type whose declared values
1291/// collide warn. No type carries a built-in dedup key — the store opts in.
1292///
1293/// **Reporting precedence (rule #1 in `corpus-b-edges/EXPECTED/README.md`):** a
1294/// collision group of N files yields exactly ONE issue, not N. Its `file` is the
1295/// lexicographically smallest store-relative path in the group (a total order →
1296/// deterministic); `related` is the rest, sorted. A single-field key anchors to
1297/// that field's line on the reported file and carries it as `key`; a multi-field
1298/// key anchors to line 1 with a null key.
1299fn check_duplicates(store: &Store, parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
1300    // Path → frontmatter YAML, for resolving the anchor field's line on the
1301    // reported (smallest-path) member.
1302    let fm_yaml_of: HashMap<&PathBuf, &str> = parsed
1303        .iter()
1304        .map(|(rel, p)| (rel, p.fm_yaml.as_str()))
1305        .collect();
1306
1307    // ── DUP_ID (hard error): two files with the same explicit `id`. ──────────
1308    let mut by_id: HashMap<String, Vec<PathBuf>> = HashMap::new();
1309    for (rel, p) in parsed {
1310        if let Some(map) = &p.fm {
1311            if let Some(id) = map.get("id").and_then(scalar_string) {
1312                if !id.trim().is_empty() {
1313                    by_id.entry(id).or_default().push(rel.clone());
1314                }
1315            }
1316        }
1317    }
1318    for (id, files) in &by_id {
1319        if files.len() > 1 {
1320            let (reported, related) = canonical_and_related(files);
1321            let line = fm_yaml_of.get(&reported).and_then(|y| fm_key_line(y, "id"));
1322            push(
1323                issues,
1324                Severity::Error,
1325                codes::DUP_ID,
1326                &reported,
1327                line,
1328                Some("id".into()),
1329                format!("id {id:?} is declared by more than one file"),
1330                Some("give each file a unique `id` (or drop it to derive from the path)".into()),
1331                related,
1332            );
1333        }
1334    }
1335
1336    // ── DUP_UNIQUE_KEY (warning): schema-declared `unique:` collisions. ───────
1337    // Every constraint comes from the store's `## Schemas`; a type with no
1338    // `unique:` directive is never dedup-checked. Iteration over the BTreeMap is
1339    // key-ordered, so emitted issues are deterministic across runs.
1340    for (type_name, schema) in &store.config.schemas {
1341        for key_fields in &schema.unique_keys {
1342            soft_dup(parsed, issues, type_name, key_fields, &fm_yaml_of);
1343        }
1344    }
1345}
1346
1347/// Emit ONE `DUP_UNIQUE_KEY` warning per group of ≥2 files of `type_` whose
1348/// declared `key_fields` render to the same token tuple. Files missing any key
1349/// field are skipped — an incomplete key is never a collision.
1350///
1351/// Per reporting rule #1 the issue is keyed on the lexicographically smallest
1352/// store-relative path; `related` is the rest. A single-field key anchors to
1353/// that field's line on the reported file and carries it as `key`; a multi-field
1354/// key anchors to line 1 with a null key. `fm_yaml_of` resolves the field line.
1355fn soft_dup(
1356    parsed: &[(PathBuf, Parsed)],
1357    issues: &mut Vec<Issue>,
1358    type_: &str,
1359    key_fields: &[String],
1360    fm_yaml_of: &HashMap<&PathBuf, &str>,
1361) {
1362    if key_fields.is_empty() {
1363        return;
1364    }
1365    let mut groups: HashMap<Vec<String>, Vec<PathBuf>> = HashMap::new();
1366    for (rel, p) in parsed {
1367        let is_type =
1368            p.fm.as_ref()
1369                .and_then(|m| m.get("type"))
1370                .and_then(scalar_string)
1371                .map(|t| t == type_)
1372                .unwrap_or(false);
1373        if !is_type {
1374            continue;
1375        }
1376        if let Some(key) = dedup_key(p, key_fields) {
1377            groups.entry(key).or_default().push(rel.clone());
1378        }
1379    }
1380    // HashMap iteration is nondeterministic; sort by reported member so the
1381    // emitted issue order is stable across runs.
1382    let mut collisions: Vec<(PathBuf, Vec<PathBuf>)> = groups
1383        .values()
1384        .filter(|files| files.len() > 1)
1385        .map(|files| canonical_and_related(files))
1386        .collect();
1387    collisions.sort_by(|a, b| a.0.cmp(&b.0));
1388
1389    let fields_disp = key_fields.join(", ");
1390    for (reported, related) in collisions {
1391        // Single-field keys anchor to the field's line + carry the key; multi-
1392        // field keys anchor to line 1 with a null key.
1393        let (line, key) = if key_fields.len() == 1 {
1394            (
1395                fm_yaml_of
1396                    .get(&reported)
1397                    .and_then(|y| fm_key_line(y, &key_fields[0])),
1398                Some(key_fields[0].clone()),
1399            )
1400        } else {
1401            (Some(1), None)
1402        };
1403        let n = related.len();
1404        push(
1405            issues,
1406            Severity::Warning,
1407            codes::DUP_UNIQUE_KEY,
1408            &reported,
1409            line,
1410            key,
1411            format!("`{type_}` unique key ({fields_disp}) collides with {n} other record(s)"),
1412            Some("merge with `dbmd rename`, or cross-link with `dbmd link`".into()),
1413            related,
1414        );
1415    }
1416}
1417
1418/// Render a type's `unique:` key for one file: each field's dedup token in
1419/// order, or `None` if any field is absent/empty (an incomplete key never
1420/// collides).
1421fn dedup_key(p: &Parsed, key_fields: &[String]) -> Option<Vec<String>> {
1422    let mut out = Vec::with_capacity(key_fields.len());
1423    for f in key_fields {
1424        out.push(dedup_token(p, f)?);
1425    }
1426    Some(out)
1427}
1428
1429/// One field's normalized dedup token, or `None` when absent/empty. Wiki-link
1430/// values (single or block-sequence list) reduce to their lower-cased target
1431/// path(s); a list collapses to a sorted, de-duplicated set so item order never
1432/// matters. Plain scalars (and YAML scalar lists) lower-case and trim.
1433fn dedup_token(p: &Parsed, field: &str) -> Option<String> {
1434    // Wiki-links first — read from the raw frontmatter text so the unquoted
1435    // `field: [[...]]` (a YAML nested-sequence, not a string) is handled.
1436    let links = frontmatter_links_for_key(&p.fm_yaml, field, 2);
1437    if !links.is_empty() {
1438        let set: BTreeSet<String> = links
1439            .into_iter()
1440            .map(|l| l.target.trim_end_matches(".md").to_lowercase())
1441            .filter(|t| !t.is_empty())
1442            .collect();
1443        return if set.is_empty() {
1444            None
1445        } else {
1446            Some(set.into_iter().collect::<Vec<_>>().join(","))
1447        };
1448    }
1449    match p.fm.as_ref()?.get(field) {
1450        Some(Value::Sequence(items)) => {
1451            let set: BTreeSet<String> = items
1452                .iter()
1453                .filter_map(scalar_string)
1454                .map(|s| s.trim().to_lowercase())
1455                .filter(|t| !t.is_empty())
1456                .collect();
1457            if set.is_empty() {
1458                None
1459            } else {
1460                Some(set.into_iter().collect::<Vec<_>>().join(","))
1461            }
1462        }
1463        Some(v) => {
1464            let s = scalar_string(v)?.trim().to_lowercase();
1465            if s.is_empty() {
1466                None
1467            } else {
1468                Some(s)
1469            }
1470        }
1471        None => None,
1472    }
1473}
1474
1475/// Split a non-empty collision group into `(reported, related)`: the
1476/// lexicographically smallest store-relative path is the reported member; the
1477/// rest, sorted ascending, are `related`. Deterministic because store-relative
1478/// path is a total order — the property reporting rule #1 relies on.
1479fn canonical_and_related(files: &[PathBuf]) -> (PathBuf, Vec<PathBuf>) {
1480    let mut sorted = files.to_vec();
1481    sorted.sort();
1482    let reported = sorted[0].clone();
1483    let related = sorted[1..].to_vec();
1484    (reported, related)
1485}
1486
1487// ─────────────────────────────────────────────────────────────────────────────
1488//  Cross-file: hierarchical index.md + index.jsonl sync (validate_all only)
1489// ─────────────────────────────────────────────────────────────────────────────
1490
1491/// All `INDEX_*` and `INDEX_JSONL_*` checks across the three canonical levels.
1492fn check_indexes(store: &Store, files: &[PathBuf], issues: &mut Vec<Issue>) {
1493    // Group content files by their immediate parent folder (the type-folder,
1494    // *across date shards* — a sharded file's "type folder" is the folder right
1495    // under the layer). We key on the type-folder so shards roll up correctly.
1496    let mut type_folders: BTreeMap<PathBuf, Vec<PathBuf>> = BTreeMap::new();
1497    for rel in files {
1498        if let Some(tf) = type_folder_of(rel) {
1499            type_folders.entry(tf).or_default().push(rel.clone());
1500        }
1501    }
1502
1503    // Layers that actually contain a type-folder. The index WRITER creates a
1504    // layer/root `index.md` ONLY when a type-folder exists to roll up:
1505    // `Index::build_root`/`build_layer` populate `child_counts` from type-folders
1506    // alone, and `rebuild_all`/`write_level` remove the `index.md` when that map
1507    // is empty. A layer with ONLY loose files therefore has NO `index.md` — its
1508    // loose records live in the layer's own `index.jsonl` (checked in the loose
1509    // block below). Gating the `index.md` requirement on type-folder presence
1510    // (not on "any content file") keeps `validate --all` in parity with
1511    // `dbmd index rebuild`: requiring an `index.md` for a loose-only layer would
1512    // demand an artifact the canonical rebuild never creates, permanently
1513    // wedging the sweep on a correct store.
1514    let mut layers_with_type_folders: BTreeSet<&'static str> = BTreeSet::new();
1515    for tf in type_folders.keys() {
1516        match tf.iter().next().and_then(|s| s.to_str()) {
1517            Some("sources") => {
1518                layers_with_type_folders.insert("sources");
1519            }
1520            Some("records") => {
1521                layers_with_type_folders.insert("records");
1522            }
1523            _ => {}
1524        }
1525    }
1526
1527    // ── Root index.md ──── (only when a type-folder exists to roll up) ──────────
1528    if !type_folders.is_empty() {
1529        let root_index = store.root.join("index.md");
1530        if !root_index.is_file() {
1531            push(
1532                issues,
1533                Severity::Error,
1534                codes::INDEX_MISSING,
1535                Path::new("index.md"),
1536                None,
1537                None,
1538                "store has files but no root `index.md`".into(),
1539                Some("run `dbmd index rebuild`".into()),
1540                vec![],
1541            );
1542        } else {
1543            check_index_scope(store, Path::new("index.md"), "root", None, issues);
1544        }
1545    }
1546
1547    // ── Layer index.md ──── (only layers that contain a type-folder) ───────────
1548    for layer in &layers_with_type_folders {
1549        let layer_index_rel = PathBuf::from(layer).join("index.md");
1550        let abs = store.root.join(&layer_index_rel);
1551        if !abs.is_file() {
1552            push(
1553                issues,
1554                Severity::Error,
1555                codes::INDEX_MISSING,
1556                &layer_index_rel,
1557                None,
1558                None,
1559                format!("layer `{layer}/` has files but no `index.md`"),
1560                Some("run `dbmd index rebuild`".into()),
1561                vec![],
1562            );
1563        } else {
1564            check_index_scope(store, &layer_index_rel, "layer", Some(layer), issues);
1565        }
1566    }
1567
1568    // ── Type-folder index.md + index.jsonl ───────────────────────────────────
1569    for (tf, members) in &type_folders {
1570        let index_md_rel = tf.join("index.md");
1571        let index_md_abs = store.root.join(&index_md_rel);
1572        let index_md_present = index_md_abs.is_file();
1573        if !index_md_present {
1574            // The whole folder index is absent → a single `INDEX_MISSING` keyed
1575            // on the FOLDER (not the would-be `index.md` path). When the index is
1576            // entirely missing we do NOT additionally evaluate per-entry
1577            // completeness or the `index.jsonl` twin: one `INDEX_MISSING` covers
1578            // the folder (precedence rule #4 in `corpus-b-edges/EXPECTED`).
1579            push(
1580                issues,
1581                Severity::Error,
1582                codes::INDEX_MISSING,
1583                tf,
1584                None,
1585                None,
1586                format!("non-empty folder `{}` has no index.md", tf.display()),
1587                Some(format!(
1588                    "run `dbmd index rebuild --folder {}`",
1589                    tf.display()
1590                )),
1591                vec![],
1592            );
1593            continue;
1594        }
1595
1596        check_index_scope(store, &index_md_rel, "type-folder", tf.to_str(), issues);
1597        check_type_folder_index_md(store, tf, &index_md_rel, members, issues);
1598
1599        // index.jsonl twin — must exist and be complete (uncapped). Only checked
1600        // when the `index.md` is present (above): a folder whose entire index is
1601        // missing is one `INDEX_MISSING`, not also an `INDEX_JSONL_MISSING`.
1602        let jsonl_rel = tf.join("index.jsonl");
1603        let jsonl_abs = store.root.join(&jsonl_rel);
1604        if !jsonl_abs.is_file() {
1605            push(
1606                issues,
1607                Severity::Error,
1608                codes::INDEX_JSONL_MISSING,
1609                &jsonl_rel,
1610                None,
1611                None,
1612                format!("type-folder `{}/` has no `index.jsonl` twin", tf.display()),
1613                Some("run `dbmd index rebuild`".into()),
1614                vec![],
1615            );
1616        } else {
1617            check_type_folder_index_jsonl(store, tf, &jsonl_rel, members, issues);
1618        }
1619    }
1620
1621    // ── Loose files: content directly at a layer root (no type-folder). ──────
1622    // They are catalogued in the layer's own `index.jsonl` (the layer `index.md`
1623    // stays a type-folder rollup), so structured reads — `query`, dedup, `graph`
1624    // — see them the same way they see canonical files. Require that sidecar and
1625    // sync-check it, so a loose file is never silently absent from the catalog.
1626    // Only genuinely-loose files land here: `type_folder_of` already grouped
1627    // every file two-or-more levels under a layer into its type-folder above.
1628    let mut loose_by_layer: BTreeMap<PathBuf, Vec<PathBuf>> = BTreeMap::new();
1629    for rel in files {
1630        if !is_content_file(rel) || type_folder_of(rel).is_some() {
1631            continue;
1632        }
1633        if let Some(layer_dir) = loose_layer_dir(rel) {
1634            loose_by_layer
1635                .entry(layer_dir)
1636                .or_default()
1637                .push(rel.clone());
1638        }
1639    }
1640    for (layer_dir, members) in &loose_by_layer {
1641        let jsonl_rel = layer_dir.join("index.jsonl");
1642        if !store.root.join(&jsonl_rel).is_file() {
1643            push(
1644                issues,
1645                Severity::Error,
1646                codes::INDEX_JSONL_MISSING,
1647                &jsonl_rel,
1648                None,
1649                None,
1650                format!(
1651                    "loose files at `{}/` are not catalogued — the layer has no `index.jsonl`",
1652                    layer_dir.display()
1653                ),
1654                Some("run `dbmd index rebuild`".into()),
1655                members.clone(),
1656            );
1657        } else {
1658            // `check_type_folder_index_jsonl` ignores its `tf` arg (`let _ = tf`)
1659            // and only checks jsonl-vs-files-vs-frontmatter — exactly the layer
1660            // sidecar's contract, so it is reused verbatim.
1661            check_type_folder_index_jsonl(store, layer_dir, &jsonl_rel, members, issues);
1662        }
1663    }
1664
1665    // ── Orphan index.md: an index file in a folder with no content. ──────────
1666    for rel in walk_index_files(&store.root) {
1667        let parent = rel.parent().unwrap_or(Path::new("")).to_path_buf();
1668        let parent_str = parent.to_string_lossy().to_string();
1669        let is_canonical = parent_str.is_empty() // root
1670            || matches!(parent_str.as_str(), "sources" | "records")
1671            || type_folders.contains_key(&parent);
1672        if !is_canonical {
1673            push(
1674                issues,
1675                Severity::Warning,
1676                codes::INDEX_ORPHAN,
1677                &rel,
1678                None,
1679                None,
1680                format!(
1681                    "`{}` sits in an empty or non-canonical folder",
1682                    rel.display()
1683                ),
1684                Some("remove it, or run `dbmd index rebuild`".into()),
1685                vec![],
1686            );
1687        }
1688    }
1689}
1690
1691/// Check a type-folder `index.md`'s entries against the folder's actual files:
1692/// stale entries (target gone), missing entries (file not listed), and
1693/// summary mismatches.
1694fn check_type_folder_index_md(
1695    store: &Store,
1696    tf: &Path,
1697    index_rel: &Path,
1698    members: &[PathBuf],
1699    issues: &mut Vec<Issue>,
1700) {
1701    let abs = store.root.join(index_rel);
1702    let Ok(text) = std::fs::read_to_string(&abs) else {
1703        return;
1704    };
1705    let entries = parse_index_entries(&text);
1706
1707    let listed: BTreeSet<PathBuf> = entries
1708        .iter()
1709        .map(|e| PathBuf::from(e.target.trim_end_matches(".md")))
1710        .collect();
1711
1712    // Stale entries + summary mismatch.
1713    for entry in &entries {
1714        let bare = entry.target.trim_end_matches(".md");
1715        // Resolve like the graph engine (literal path first, then `.md`) so an
1716        // index entry naming a raw `.eml`/`.pdf` source isn't reported stale.
1717        let target_abs = match resolved_target_abs(store, bare) {
1718            Some(abs) => abs,
1719            None => {
1720                if matches!(resolve_wiki_target(store, bare), TargetResolution::Unsafe) {
1721                    push(
1722                        issues,
1723                        Severity::Error,
1724                        codes::INDEX_STALE_ENTRY,
1725                        index_rel,
1726                        Some(entry.line),
1727                        None,
1728                        format!("index entry `[[{bare}]]` is not a safe store-relative path"),
1729                        Some("run `dbmd index rebuild`".into()),
1730                        vec![],
1731                    );
1732                } else {
1733                    push(
1734                        issues,
1735                        Severity::Error,
1736                        codes::INDEX_STALE_ENTRY,
1737                        index_rel,
1738                        Some(entry.line),
1739                        None,
1740                        format!("index entry `[[{bare}]]` points at a missing file"),
1741                        Some("run `dbmd index rebuild`".into()),
1742                        // The stale target the entry names (the file that no
1743                        // longer exists) — so the agent can locate the dangling
1744                        // reference.
1745                        vec![PathBuf::from(format!("{bare}.md"))],
1746                    );
1747                }
1748                continue;
1749            }
1750        };
1751        // Summary mismatch: the entry text must equal the file's `summary`. A
1752        // bare `- [[path]]` entry (no `— <text>`) when the file HAS a non-empty
1753        // summary is also a mismatch — the SPEC requires every type-folder index
1754        // entry to quote the file's `summary` (`- [[path]] — <summary>`), so a
1755        // missing quote can't validate clean just because there's nothing to
1756        // compare.
1757        if let Some(expected) = read_summary(&target_abs) {
1758            match &entry.summary_text {
1759                // Compare with the SAME whitespace normalization the renderer
1760                // applies when it writes the `index.md` browse line
1761                // (`format_md_entry` -> `collapse_whitespace`). `text_part` is the
1762                // already-collapsed text parsed back out of `index.md`; `expected`
1763                // is the RAW file summary. Comparing a collapsed value against a
1764                // raw one falsely flagged any valid one-line summary that carries
1765                // internal whitespace (a double space, a tab) — a permanent,
1766                // rebuild-immune INDEX_SUMMARY_MISMATCH that wedged the store, since
1767                // `index rebuild` regenerates the byte-identical collapsed line.
1768                // Normalizing both sides makes the check compare like with like.
1769                Some(text_part)
1770                    if crate::summary::collapse_whitespace(text_part)
1771                        != crate::summary::collapse_whitespace(&expected) =>
1772                {
1773                    push(
1774                        issues,
1775                        Severity::Error,
1776                        codes::INDEX_SUMMARY_MISMATCH,
1777                        index_rel,
1778                        Some(entry.line),
1779                        None,
1780                        format!("index entry for `{bare}` text doesn't match the file's `summary`"),
1781                        Some("run `dbmd index rebuild`".into()),
1782                        vec![PathBuf::from(format!("{bare}.md"))],
1783                    );
1784                }
1785                None if !expected.trim().is_empty() => {
1786                    push(
1787                        issues,
1788                        Severity::Error,
1789                        codes::INDEX_SUMMARY_MISMATCH,
1790                        index_rel,
1791                        Some(entry.line),
1792                        None,
1793                        format!("index entry for `{bare}` is missing its summary text (the file has a `summary`)"),
1794                        Some("run `dbmd index rebuild`".into()),
1795                        vec![PathBuf::from(format!("{bare}.md"))],
1796                    );
1797                }
1798                _ => {}
1799            }
1800        }
1801    }
1802
1803    // Missing entries: a member file not listed. Skip the index/log meta files.
1804    // The browse view caps at 500; only flag a missing entry when the folder is
1805    // under the cap (a capped folder legitimately omits older files).
1806    let content_members: Vec<&PathBuf> = members.iter().filter(|m| is_content_file(m)).collect();
1807    if content_members.len() <= 500 {
1808        for m in content_members {
1809            let bare = PathBuf::from(m.to_string_lossy().trim_end_matches(".md").to_string());
1810            if !listed.contains(&bare) {
1811                push(
1812                    issues,
1813                    Severity::Error,
1814                    codes::INDEX_MISSING_ENTRY,
1815                    index_rel,
1816                    None,
1817                    None,
1818                    format!(
1819                        "file `{}` is not listed in its folder's `index.md`",
1820                        m.display()
1821                    ),
1822                    Some("run `dbmd index rebuild`".into()),
1823                    vec![(*m).clone()],
1824                );
1825            }
1826        }
1827    }
1828    let _ = tf;
1829}
1830
1831/// Check a type-folder `index.jsonl` twin: it must list **every** file in the
1832/// folder (uncapped), every record must point at a real file, and each record's
1833/// fields must match the file's frontmatter.
1834fn check_type_folder_index_jsonl(
1835    store: &Store,
1836    tf: &Path,
1837    jsonl_rel: &Path,
1838    members: &[PathBuf],
1839    issues: &mut Vec<Issue>,
1840) {
1841    let abs = store.root.join(jsonl_rel);
1842    let Ok(text) = std::fs::read_to_string(&abs) else {
1843        return;
1844    };
1845
1846    // Parse records (last-write-wins by path), tolerating tombstones/blank lines.
1847    let mut records: BTreeMap<PathBuf, serde_json::Value> = BTreeMap::new();
1848    for (i, line) in text.lines().enumerate() {
1849        let line = line.trim();
1850        if line.is_empty() {
1851            continue;
1852        }
1853        let rec: serde_json::Value = match serde_json::from_str(line) {
1854            Ok(v) => v,
1855            Err(e) => {
1856                push(
1857                    issues,
1858                    Severity::Error,
1859                    codes::INDEX_JSONL_DESYNC,
1860                    jsonl_rel,
1861                    Some((i + 1) as u32),
1862                    None,
1863                    format!("`index.jsonl` line {} is not valid JSON: {e}", i + 1),
1864                    Some("run `dbmd index rebuild`".into()),
1865                    vec![],
1866                );
1867                continue;
1868            }
1869        };
1870        if let Some(path) = rec.get("path").and_then(|v| v.as_str()) {
1871            if !is_safe_store_relative_path(Path::new(path)) {
1872                push(
1873                    issues,
1874                    Severity::Error,
1875                    codes::INDEX_JSONL_DESYNC,
1876                    jsonl_rel,
1877                    Some((i + 1) as u32),
1878                    None,
1879                    format!("`index.jsonl` record path `{path}` is not a safe store-relative path"),
1880                    Some("run `dbmd index rebuild`".into()),
1881                    vec![],
1882                );
1883                continue;
1884            }
1885            records.insert(PathBuf::from(path), rec);
1886        }
1887    }
1888
1889    let member_set: BTreeSet<PathBuf> = members
1890        .iter()
1891        .filter(|m| is_content_file(m))
1892        .cloned()
1893        .collect();
1894
1895    // jsonl record → missing file = desync.
1896    for path in records.keys() {
1897        let target_abs = store.root.join(path);
1898        if !target_abs.is_file() {
1899            push(
1900                issues,
1901                Severity::Error,
1902                codes::INDEX_JSONL_DESYNC,
1903                jsonl_rel,
1904                None,
1905                None,
1906                format!(
1907                    "`index.jsonl` record points at missing file `{}`",
1908                    path.display()
1909                ),
1910                Some("run `dbmd index rebuild`".into()),
1911                vec![],
1912            );
1913        }
1914    }
1915
1916    // file not in jsonl = desync (the jsonl is the complete twin — no cap).
1917    for m in &member_set {
1918        if !records.contains_key(m) {
1919            push(
1920                issues,
1921                Severity::Error,
1922                codes::INDEX_JSONL_DESYNC,
1923                jsonl_rel,
1924                None,
1925                None,
1926                format!(
1927                    "file `{}` is missing from the complete `index.jsonl`",
1928                    m.display()
1929                ),
1930                Some("run `dbmd index rebuild`".into()),
1931                vec![m.clone()],
1932            );
1933        }
1934    }
1935
1936    // Record fields stale vs. frontmatter. SPEC § Validation defines
1937    // `INDEX_JSONL_STALE` as "an `index.jsonl` record's fields don't match the
1938    // file's frontmatter" — ANY field, not just `summary`/`type`. The query and
1939    // search paths read every field straight from these sidecars (`tags`,
1940    // `links`, `created`, `updated`, plus type-specific `email` / `domain` /
1941    // `company` / `amount` / `vendor` …), so a single field left unchecked lets
1942    // a stale value answer queries with data that exists in no `.md` file.
1943    //
1944    // Rather than re-list (and drift from) every projected key, rebuild the
1945    // record the canonical projection would write for this file
1946    // ([`IndexRecord::expected_from_file`], the same path `index rebuild` uses)
1947    // and diff the two as flat JSON maps. Every key the projection emits is
1948    // covered automatically; `path` is the join key and is skipped.
1949    for (path, rec) in &records {
1950        let target_abs = store.root.join(path);
1951        if !target_abs.is_file() {
1952            continue;
1953        }
1954        let Ok(expected) = crate::index::IndexRecord::expected_from_file(&target_abs, path.clone())
1955        else {
1956            continue; // unreadable / unparseable frontmatter is reported elsewhere
1957        };
1958        let Ok(expected_json) = serde_json::to_value(&expected) else {
1959            continue;
1960        };
1961        let (Some(have), Some(want)) = (rec.as_object(), expected_json.as_object()) else {
1962            continue;
1963        };
1964
1965        // Compare the union of keys present on either side; a key the file
1966        // projects but the sidecar omits is just as stale as a wrong value.
1967        let mut mismatched_keys: BTreeSet<&str> = BTreeSet::new();
1968        for key in have.keys().chain(want.keys()) {
1969            if key == "path" {
1970                continue;
1971            }
1972            if have.get(key) != want.get(key) {
1973                mismatched_keys.insert(key);
1974            }
1975        }
1976
1977        if !mismatched_keys.is_empty() {
1978            let keys: Vec<&str> = mismatched_keys.into_iter().collect();
1979            push(
1980                issues,
1981                Severity::Error,
1982                codes::INDEX_JSONL_STALE,
1983                jsonl_rel,
1984                None,
1985                Some(keys.join(",")),
1986                format!(
1987                    "`index.jsonl` record for `{}` is stale ({})",
1988                    path.display(),
1989                    keys.join(", ")
1990                ),
1991                Some("run `dbmd index rebuild`".into()),
1992                vec![path.clone()],
1993            );
1994        }
1995    }
1996    let _ = tf;
1997}
1998
1999/// Check an index's `scope:` frontmatter against its filesystem location.
2000fn check_index_scope(
2001    store: &Store,
2002    index_rel: &Path,
2003    expected_scope: &str,
2004    expected_folder: Option<&str>,
2005    issues: &mut Vec<Issue>,
2006) {
2007    let abs = store.root.join(index_rel);
2008    let Ok(text) = std::fs::read_to_string(&abs) else {
2009        return;
2010    };
2011    let Some((yaml, _, _)) = split_frontmatter(&text) else {
2012        return;
2013    };
2014    let Ok(Value::Mapping(map)) = serde_norway::from_str::<Value>(&yaml) else {
2015        return;
2016    };
2017    let fm = yaml_map_to_btree(&map);
2018
2019    if let Some(scope) = fm.get("scope").and_then(scalar_string) {
2020        // Accept "type-folder" and the SPEC example's looser "folder" alias.
2021        let scope_ok =
2022            scope == expected_scope || (expected_scope == "type-folder" && scope == "folder");
2023        if !scope_ok {
2024            push(
2025                issues,
2026                Severity::Warning,
2027                codes::INDEX_WRONG_SCOPE,
2028                index_rel,
2029                fm_key_line(&yaml, "scope"),
2030                Some("scope".into()),
2031                format!(
2032                    "index `scope: {scope}` doesn't match location (expected `{expected_scope}`)"
2033                ),
2034                Some(format!("set `scope: {expected_scope}`")),
2035                vec![],
2036            );
2037        }
2038    }
2039    // folder: must match for layer/type-folder indexes.
2040    if let Some(expected) = expected_folder {
2041        if let Some(folder) = fm.get("folder").and_then(scalar_string) {
2042            if folder.trim_end_matches('/') != expected.trim_end_matches('/') {
2043                push(
2044                    issues,
2045                    Severity::Warning,
2046                    codes::INDEX_WRONG_SCOPE,
2047                    index_rel,
2048                    fm_key_line(&yaml, "folder"),
2049                    Some("folder".into()),
2050                    format!("index `folder: {folder}` doesn't match location `{expected}`"),
2051                    Some(format!("set `folder: {expected}`")),
2052                    vec![],
2053                );
2054            }
2055        }
2056    }
2057}
2058
2059// ─────────────────────────────────────────────────────────────────────────────
2060//  Cross-file: log.md well-formedness + ordering (validate_all only)
2061// ─────────────────────────────────────────────────────────────────────────────
2062
2063/// `LOG_*` checks: bad timestamps, unknown kinds, out-of-order entries — across
2064/// the active `log.md` AND the rotated `log/<YYYY-MM>.md` archives.
2065///
2066/// [`Log::append`] rolls strictly-prior-month entries into `log/<YYYY-MM>.md`,
2067/// and `Log::tail`/`Log::since` deliberately read those archives back. If the
2068/// LOG_* checks read only the active file, an entry `validate --all` flagged
2069/// while it lived in `log.md` would stop being flagged the moment a newer-month
2070/// append rotated it into an archive — even though the log readers still surface
2071/// that exact entry to the curator. Scanning the archives too keeps validate and
2072/// the readers in agreement after a rotation.
2073///
2074/// Order: archives oldest-month first, then the active `log.md` last — the true
2075/// chronological timeline — so the out-of-order check threads `prev` across the
2076/// rotation boundary the same way it does within a single file.
2077fn check_log(store: &Store, issues: &mut Vec<Issue>) {
2078    let mut prev: Option<DateTime<FixedOffset>> = None;
2079    for rel in log_files_chronological(store) {
2080        check_log_file(store, &rel, &mut prev, issues);
2081    }
2082}
2083
2084/// The log files to scan, in chronological order: every `log/<YYYY-MM>.md`
2085/// archive oldest-month first, then the active `log.md` last. Missing files are
2086/// simply absent from the list.
2087fn log_files_chronological(store: &Store) -> Vec<PathBuf> {
2088    let mut files: Vec<PathBuf> = Vec::new();
2089    let archive_dir = store.root.join("log");
2090    if let Ok(entries) = std::fs::read_dir(&archive_dir) {
2091        let mut archives: Vec<PathBuf> = entries
2092            .flatten()
2093            .map(|e| e.path())
2094            .filter(|p| {
2095                p.is_file()
2096                    && p.file_name()
2097                        .and_then(|s| s.to_str())
2098                        .and_then(|n| n.strip_suffix(".md"))
2099                        .is_some_and(is_year_month_archive)
2100            })
2101            .filter_map(|p| p.strip_prefix(&store.root).ok().map(Path::to_path_buf))
2102            .collect();
2103        // `YYYY-MM` stems sort lexically == chronologically; oldest first.
2104        archives.sort();
2105        files.extend(archives);
2106    }
2107    // The active file holds the current month — newest, so it comes last.
2108    if store.root.join("log.md").is_file() {
2109        files.push(PathBuf::from("log.md"));
2110    }
2111    files
2112}
2113
2114/// Scan one log file's entry headers, threading the running `prev` timestamp so
2115/// the out-of-order check spans file (rotation) boundaries. Issues anchor to the
2116/// given store-relative path so an archived entry points at its archive file.
2117fn check_log_file(
2118    store: &Store,
2119    log_rel: &Path,
2120    prev: &mut Option<DateTime<FixedOffset>>,
2121    issues: &mut Vec<Issue>,
2122) {
2123    let abs = store.root.join(log_rel);
2124    let Ok(text) = std::fs::read_to_string(&abs) else {
2125        return;
2126    };
2127
2128    for (i, line) in text.lines().enumerate() {
2129        if !line.starts_with("## [") {
2130            continue;
2131        }
2132        let line_no = (i + 1) as u32;
2133        match parse_log_header(line) {
2134            None => push(
2135                issues,
2136                Severity::Error,
2137                codes::LOG_BAD_TIMESTAMP,
2138                log_rel,
2139                Some(line_no),
2140                None,
2141                format!("log entry header has an unparseable timestamp: {line:?}"),
2142                Some("use `## [YYYY-MM-DD HH:MM] <kind> | <object>`".into()),
2143                vec![],
2144            ),
2145            Some((ts, kind, _object)) => {
2146                if !RECOGNIZED_LOG_KINDS.contains(&kind.as_str()) {
2147                    push(
2148                        issues,
2149                        Severity::Warning,
2150                        codes::LOG_UNKNOWN_KIND,
2151                        log_rel,
2152                        Some(line_no),
2153                        None,
2154                        format!("log entry kind `{kind}` is not recognized"),
2155                        Some(format!("use one of: {}", RECOGNIZED_LOG_KINDS.join(", "))),
2156                        vec![],
2157                    );
2158                }
2159                if let Some(p) = *prev {
2160                    if ts < p {
2161                        push(
2162                            issues,
2163                            Severity::Warning,
2164                            codes::LOG_OUT_OF_ORDER,
2165                            log_rel,
2166                            Some(line_no),
2167                            None,
2168                            "log entry is older than the entry above it (possible rewrite)".into(),
2169                            Some("append corrective entries; never reorder past ones".into()),
2170                            vec![],
2171                        );
2172                    }
2173                }
2174                *prev = Some(ts);
2175            }
2176        }
2177    }
2178}
2179
2180// ─────────────────────────────────────────────────────────────────────────────
2181//  Self-contained primitives (collapse onto sibling modules once they land)
2182// ─────────────────────────────────────────────────────────────────────────────
2183
2184/// A minimal wiki-link found in a body: target, optional display, 1-based line.
2185#[derive(Debug)]
2186struct Link {
2187    target: String,
2188    line: u32,
2189}
2190
2191/// True if the store marker (`DB.md`, uppercase) is present at the root. On a
2192/// case-insensitive filesystem `db.md` would also match `DB.md`; we require the
2193/// exact-cased directory entry to be present.
2194fn store_marker_present(store: &Store) -> bool {
2195    let want = store.root.join("DB.md");
2196    if !want.is_file() {
2197        return false;
2198    }
2199    // Reject a case-folded match (`db.md`) on case-insensitive filesystems.
2200    match std::fs::read_dir(&store.root) {
2201        Ok(entries) => entries
2202            .flatten()
2203            .any(|e| e.file_name().to_str() == Some("DB.md")),
2204        Err(_) => true, // can't enumerate; trust the is_file() above
2205    }
2206}
2207
2208/// Validate the store's identity file, `DB.md`: its frontmatter `type:` must be
2209/// `db-md`, it must carry both `scope` and `owner`, and its body may contain
2210/// only the three recognized `##` sections (`Agent instructions`, `Policies`,
2211/// `Schemas`).
2212///
2213/// `DB.md` is not a content file (no `summary`), so it is checked here rather
2214/// than through `check_content_file`. The marker presence is established by the
2215/// caller (`store_marker_present`); a malformed-frontmatter `DB.md` still counts
2216/// as a store (the marker is the filename), so we report its shape rather than
2217/// `NOT_A_STORE`. Issues anchor to `DB.md` as the store-relative path.
2218fn check_db_md(store: &Store, issues: &mut Vec<Issue>) {
2219    let rel = Path::new("DB.md");
2220    let abs = store.root.join("DB.md");
2221    let Ok(text) = std::fs::read_to_string(&abs) else {
2222        return; // marker present but unreadable: nothing more to say.
2223    };
2224
2225    let Some((fm_yaml, body, fm_end_line)) = split_frontmatter(&text) else {
2226        // No frontmatter block at all → it cannot declare `type: db-md` and has
2227        // neither required field. Report the type and both missing fields,
2228        // anchored to line 1 (the would-be opening fence).
2229        push(
2230            issues,
2231            Severity::Error,
2232            codes::DB_MD_BAD_TYPE,
2233            rel,
2234            Some(1),
2235            Some("type".into()),
2236            "DB.md has no frontmatter; it must declare `type: db-md`".into(),
2237            Some("add a `---` frontmatter block with `type: db-md`".into()),
2238            vec![],
2239        );
2240        for field in ["scope", "owner"] {
2241            push(
2242                issues,
2243                Severity::Error,
2244                codes::DB_MD_MISSING_FIELD,
2245                rel,
2246                Some(1),
2247                Some(field.into()),
2248                format!("DB.md frontmatter is missing required field `{field}`"),
2249                Some(format!("add `{field}:` to the DB.md frontmatter")),
2250                vec![],
2251            );
2252        }
2253        return;
2254    };
2255
2256    // Parse the frontmatter mapping. If it doesn't parse, we can still say the
2257    // identity contract is unmet (no provable `type: db-md`, no provable fields).
2258    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
2259        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
2260        Ok(Value::Null) => Some(BTreeMap::new()),
2261        _ => None,
2262    };
2263
2264    match &fm {
2265        Some(map) => {
2266            // ── type: db-md ──────────────────────────────────────────────────
2267            let type_ = map.get("type").and_then(scalar_string);
2268            if type_.as_deref() != Some("db-md") {
2269                let (line, msg) = match &type_ {
2270                    Some(t) => (
2271                        fm_key_line(&fm_yaml, "type"),
2272                        format!("DB.md has `type: {t}`; a store's DB.md must be `type: db-md`"),
2273                    ),
2274                    None => (
2275                        Some(1),
2276                        "DB.md frontmatter has no `type:`; it must be `type: db-md`".to_string(),
2277                    ),
2278                };
2279                push(
2280                    issues,
2281                    Severity::Error,
2282                    codes::DB_MD_BAD_TYPE,
2283                    rel,
2284                    line,
2285                    Some("type".into()),
2286                    msg,
2287                    Some("set `type: db-md` in the DB.md frontmatter".into()),
2288                    vec![],
2289                );
2290            }
2291
2292            // ── required fields: scope + owner ───────────────────────────────
2293            for field in ["scope", "owner"] {
2294                let present = map
2295                    .get(field)
2296                    .and_then(scalar_string)
2297                    .map(|s| !s.trim().is_empty())
2298                    .unwrap_or(false);
2299                if !present {
2300                    push(
2301                        issues,
2302                        Severity::Error,
2303                        codes::DB_MD_MISSING_FIELD,
2304                        rel,
2305                        // A present-but-empty field anchors to its line; a fully
2306                        // absent one to the block top.
2307                        fm_key_line_or_top(&fm_yaml, field),
2308                        Some(field.into()),
2309                        format!("DB.md frontmatter is missing required field `{field}`"),
2310                        Some(format!("add `{field}:` to the DB.md frontmatter")),
2311                        vec![],
2312                    );
2313                }
2314            }
2315        }
2316        None => {
2317            // Unparseable frontmatter: the identity contract is unprovable. Emit
2318            // the type error and both field errors, anchored to the block top.
2319            push(
2320                issues,
2321                Severity::Error,
2322                codes::DB_MD_BAD_TYPE,
2323                rel,
2324                Some(1),
2325                Some("type".into()),
2326                "DB.md frontmatter isn't valid YAML; it must declare `type: db-md`".into(),
2327                Some("fix the DB.md frontmatter and set `type: db-md`".into()),
2328                vec![],
2329            );
2330            for field in ["scope", "owner"] {
2331                push(
2332                    issues,
2333                    Severity::Error,
2334                    codes::DB_MD_MISSING_FIELD,
2335                    rel,
2336                    Some(1),
2337                    Some(field.into()),
2338                    format!("DB.md frontmatter is missing required field `{field}`"),
2339                    Some(format!("add `{field}:` to the DB.md frontmatter")),
2340                    vec![],
2341                );
2342            }
2343        }
2344    }
2345
2346    // ── recognized `##` section headers only ─────────────────────────────────
2347    // The body's H2 headings must be one of the four the toolkit reads; any
2348    // other is a likely typo / misplacement (warning — the parser ignores it,
2349    // so the config is not corrupted, but the operator wrote a section that will
2350    // never be read). H3 sub-headings (Frozen pages, Ignored types, `### <type>`
2351    // schema blocks) live under their H2 and are not flagged here.
2352    //
2353    // `## Folders` is recognized: `parse_db_md` reads it into `Config.folders`
2354    // (parser.rs) and the index renders folder display names + descriptions from
2355    // it (index.rs `render_*_md_from_stats`). Flagging it `DB_MD_UNKNOWN_SECTION`
2356    // with "remove this heading" told the operator to delete a working,
2357    // round-tripped config block — destroying curator-authored rollup names. It
2358    // is a real, shipped section; SPEC.md documents it alongside the other three.
2359    for section in crate::parser::extract_sections(&body) {
2360        if section.level != 2 {
2361            continue;
2362        }
2363        let name = section.heading.trim().to_ascii_lowercase();
2364        if matches!(
2365            name.as_str(),
2366            "agent instructions" | "policies" | "schemas" | "folders"
2367        ) {
2368            continue;
2369        }
2370        // `Section::line` is 1-based within the body; the body begins at file
2371        // line `fm_end_line + 1`.
2372        let file_line = fm_end_line + section.line;
2373        push(
2374            issues,
2375            Severity::Warning,
2376            codes::DB_MD_UNKNOWN_SECTION,
2377            rel,
2378            Some(file_line),
2379            None,
2380            format!(
2381                "DB.md has an unrecognized `## {}` section",
2382                section.heading.trim()
2383            ),
2384            Some(
2385                "DB.md sections are `## Agent instructions`, `## Policies`, `## Schemas`, \
2386                 `## Folders` — remove or rename this heading"
2387                    .into(),
2388            ),
2389            vec![],
2390        );
2391    }
2392
2393    // ── `## Schemas` field-declaration lint ──────────────────────────────────
2394    // Without this, every schema misparse is silent: the operator/agent gets no
2395    // signal that DB.md is interpreting their schema differently from what they
2396    // wrote, and downstream records are validated against the degraded schema.
2397    check_db_md_schemas(store, rel, &body, fm_end_line, issues);
2398}
2399
2400/// Lint the parsed `## Schemas` field declarations: an empty field name, a
2401/// duplicate field name within a type, or an unrecognized modifier all parse
2402/// "successfully" into a degraded [`Schema`] today, so a bad declaration never
2403/// surfaces. The parsed schemas live in `store.config.schemas` (directives
2404/// already separated out); this pass reports the suspicious *field* shapes,
2405/// anchored to the `### <type>` heading line so the agent can find the block.
2406fn check_db_md_schemas(
2407    store: &Store,
2408    rel: &Path,
2409    body: &str,
2410    fm_end_line: u32,
2411    issues: &mut Vec<Issue>,
2412) {
2413    if store.config.schemas.is_empty() {
2414        return;
2415    }
2416
2417    // Map each `### <type>` heading (under `## Schemas`) to its file line, so a
2418    // per-type issue can anchor to the declaration block. `extract_sections`
2419    // returns a flat list with 1-based body lines; the body starts at file line
2420    // `fm_end_line + 1`.
2421    let mut type_line: BTreeMap<String, u32> = BTreeMap::new();
2422    let mut current_h2: Option<String> = None;
2423    for section in crate::parser::extract_sections(body) {
2424        match section.level {
2425            2 => current_h2 = Some(section.heading.trim().to_ascii_lowercase()),
2426            3 if current_h2.as_deref() == Some("schemas") => {
2427                // The H3 heading text (as written) is the type name — the same
2428                // key `parse_db_md` inserts into `config.schemas`.
2429                type_line
2430                    .entry(section.heading.trim().to_string())
2431                    .or_insert(fm_end_line + section.line);
2432            }
2433            _ => {}
2434        }
2435    }
2436
2437    for (type_name, schema) in &store.config.schemas {
2438        let line = type_line.get(type_name).copied();
2439        let mut seen: BTreeSet<String> = BTreeSet::new();
2440        for field in &schema.fields {
2441            let name = field.name.trim();
2442
2443            // Empty field name: a `- (string)` / bare `- ` bullet parses to a
2444            // nameless field that can never match a frontmatter key, so its
2445            // required/shape/enum constraints silently never apply.
2446            if name.is_empty() {
2447                push(
2448                    issues,
2449                    Severity::Warning,
2450                    codes::DB_MD_SCHEMA_FIELD,
2451                    rel,
2452                    line,
2453                    None,
2454                    format!("`### {type_name}` has a schema field bullet with no field name"),
2455                    Some(
2456                        "write each field as `- <name> (<modifiers>)`, e.g. `- email (required, email)`"
2457                            .into(),
2458                    ),
2459                    vec![],
2460                );
2461                continue;
2462            }
2463
2464            // Duplicate field name within a type: the second declaration's
2465            // constraints are interpreted independently of the first, so the
2466            // author's intent is ambiguous and likely wrong.
2467            if !seen.insert(name.to_string()) {
2468                push(
2469                    issues,
2470                    Severity::Warning,
2471                    codes::DB_MD_SCHEMA_FIELD,
2472                    rel,
2473                    line,
2474                    Some(name.to_string()),
2475                    format!("`### {type_name}` declares field `{name}` more than once"),
2476                    Some(
2477                        "remove the duplicate field bullet, or merge the modifiers onto one".into(),
2478                    ),
2479                    vec![],
2480                );
2481            }
2482
2483            // Unrecognized modifiers: the parser stashes anything outside the
2484            // known vocabulary (`required` / a shape / `link to …` / `default …`
2485            // / `enum: …`) in `unknown_modifiers`. Surface them as Info so a
2486            // typo'd modifier (`requierd`, `unqiue`) doesn't silently do nothing.
2487            for modifier in &field.unknown_modifiers {
2488                let modifier = modifier.trim();
2489                if modifier.is_empty() {
2490                    continue;
2491                }
2492                push(
2493                    issues,
2494                    Severity::Info,
2495                    codes::DB_MD_SCHEMA_FIELD,
2496                    rel,
2497                    line,
2498                    Some(name.to_string()),
2499                    format!(
2500                        "`### {type_name}` field `{name}` has an unrecognized modifier `{modifier}`"
2501                    ),
2502                    Some(
2503                        "recognized modifiers are `required`, a shape (`string`/`int`/`bool`/`date`/`email`/`currency`/`url`), `link to <prefix>/`, `default <value>`, `enum: <v1>, <v2>, …`"
2504                            .into(),
2505                    ),
2506                    vec![],
2507                );
2508            }
2509        }
2510    }
2511}
2512
2513/// The `NOT_A_STORE` issue for a root with no `DB.md`.
2514fn not_a_store_issue(store: &Store) -> Issue {
2515    Issue {
2516        severity: Severity::Error,
2517        code: codes::NOT_A_STORE,
2518        file: store.root.clone(),
2519        line: None,
2520        key: None,
2521        message: format!("{} has no DB.md; not a db.md store", store.root.display()),
2522        suggestion: Some("create a `DB.md` at the store root".into()),
2523        related: vec![],
2524    }
2525}
2526
2527/// True if a store-relative path is a content file: under `sources/` or
2528/// `records/` and not an `index.md`/`index.jsonl`/`log.md`.
2529fn is_content_file(rel: &Path) -> bool {
2530    // Defense in depth: a real content file is always a forward (Normal-only)
2531    // store-relative path. Reject any `..`/absolute/prefix component so a
2532    // malformed object slot judged only by its FIRST component (`records/../..`)
2533    // can never turn a per-file read into a store escape, even if a future caller
2534    // forgets the path-safety gate `changed_objects_since` now applies.
2535    if !is_safe_store_relative_path(rel) {
2536        return false;
2537    }
2538    let Some(first) = rel.iter().next().and_then(|s| s.to_str()) else {
2539        return false;
2540    };
2541    if !matches!(first, "sources" | "records") {
2542        return false;
2543    }
2544    let name = rel.file_name().and_then(|s| s.to_str()).unwrap_or("");
2545    // Only the derived catalog twins are meta INSIDE a layer. `DB.md` / `log.md`
2546    // are reserved meta only at the store ROOT, which the `first` layer check
2547    // above already excludes — so a content file named `log.md` / `DB.md` inside
2548    // a layer (e.g. `records/docs/log.md`) is real content, consistent with
2549    // `Store::walk`.
2550    if matches!(name, "index.md" | "index.jsonl") {
2551        return false;
2552    }
2553    name.ends_with(".md")
2554}
2555
2556/// True for the store's ROOT append-only meta files (`DB.md` / `log.md`): a
2557/// single-component store-relative path whose name is one of those two. An
2558/// in-layer `records/docs/log.md` is real content (multiple components), not a
2559/// root meta file. These reach `check_content_file` only via the working-set
2560/// incoming-linker scan; their bodies are deliberately not link-checked there
2561/// because `validate --all` doesn't link-check them either.
2562fn is_root_meta_file(rel: &Path) -> bool {
2563    let mut comps = rel.components();
2564    let Some(Component::Normal(only)) = comps.next() else {
2565        return false;
2566    };
2567    if comps.next().is_some() {
2568        return false; // has a parent dir → not a root file
2569    }
2570    matches!(only.to_str(), Some("DB.md") | Some("log.md"))
2571}
2572
2573/// True for a derived index-catalog file (`index.md` / `index.jsonl`) at any
2574/// depth. Its entries are GENERATED wiki-links to type-folder members, not
2575/// authored body links: in the working-set scope it is pulled in as an incoming
2576/// linker, but its integrity belongs to `check_indexes` under `--all` (which
2577/// reports a dangling entry as `INDEX_STALE_ENTRY`, not `WIKI_LINK_BROKEN`). So
2578/// `check_content_file` never body-link-checks it, matching `walk_content_files`
2579/// (which skips `index.md` under `--all`).
2580fn is_index_catalog_file(rel: &Path) -> bool {
2581    matches!(
2582        rel.file_name().and_then(|n| n.to_str()),
2583        Some("index.md") | Some("index.jsonl")
2584    )
2585}
2586
2587/// Split a file into `(frontmatter_yaml, body, closing_fence_line)`. The block
2588/// must start at the very first line with `---` and end at the next `---`.
2589/// Returns `None` if there's no leading frontmatter block.
2590fn split_frontmatter(text: &str) -> Option<(String, String, u32)> {
2591    // Tolerate a single leading UTF-8 BOM, matching parser/store/index (which
2592    // already strip it). Without this, a BOM-prefixed file is read as having no
2593    // frontmatter here while the catalog still indexes it — so validate would
2594    // silently skip frontmatter checks on a file the rest of the toolkit sees.
2595    let text = text.strip_prefix('\u{feff}').unwrap_or(text);
2596    let mut lines = text.lines();
2597    let first = lines.next()?;
2598    if first.trim_end() != "---" {
2599        return None;
2600    }
2601    let mut yaml = String::new();
2602    let mut close_line: Option<u32> = None;
2603    // line 1 is the opening fence; YAML starts at line 2.
2604    let mut current = 1u32;
2605    for line in lines {
2606        current += 1;
2607        if line.trim_end() == "---" {
2608            close_line = Some(current);
2609            break;
2610        }
2611        yaml.push_str(line);
2612        yaml.push('\n');
2613    }
2614    let close_line = close_line?;
2615    // Body = everything after the closing fence.
2616    let body: String = text
2617        .lines()
2618        .skip(close_line as usize)
2619        .collect::<Vec<_>>()
2620        .join("\n");
2621    Some((yaml, body, close_line))
2622}
2623
2624/// Read just the `summary` field of a file, or `None` if absent/unparseable.
2625fn read_summary(abs: &Path) -> Option<String> {
2626    let text = std::fs::read_to_string(abs).ok()?;
2627    let (yaml, _, _) = split_frontmatter(&text)?;
2628    let value: Value = serde_norway::from_str(&yaml).ok()?;
2629    if let Value::Mapping(m) = value {
2630        m.get(Value::String("summary".into()))
2631            .and_then(scalar_string)
2632    } else {
2633        None
2634    }
2635}
2636
2637/// Convert a `serde_norway` mapping into a string-keyed [`BTreeMap`], dropping
2638/// non-string keys (frontmatter keys are always strings).
2639fn yaml_map_to_btree(map: &serde_norway::Mapping) -> BTreeMap<String, Value> {
2640    let mut out = BTreeMap::new();
2641    for (k, v) in map {
2642        if let Value::String(s) = k {
2643            out.insert(s.clone(), v.clone());
2644        }
2645    }
2646    out
2647}
2648
2649/// A scalar YAML value as a string (`String`/`Number`/`Bool`); `None` for
2650/// sequences/mappings/null.
2651fn scalar_string(v: &Value) -> Option<String> {
2652    match v {
2653        Value::String(s) => Some(s.clone()),
2654        Value::Number(n) => Some(n.to_string()),
2655        Value::Bool(b) => Some(b.to_string()),
2656        _ => None,
2657    }
2658}
2659
2660/// True if a frontmatter value carries no content for a *required*-field check:
2661/// a YAML `null` (`name:`), an empty sequence (`name: []`), an empty mapping
2662/// (`name: {}`), or a blank/whitespace-only scalar (`name: ""`). A non-empty
2663/// list or mapping is NOT treated as empty here — a structurally-wrong value on
2664/// a shape/enum field is caught by the later non-scalar shape check, not by the
2665/// required-presence check.
2666fn is_empty_value(v: &Value) -> bool {
2667    match v {
2668        Value::Null => true,
2669        Value::Sequence(items) => items.is_empty(),
2670        Value::Mapping(map) => map.is_empty(),
2671        other => scalar_string(other)
2672            .map(|s| s.trim().is_empty())
2673            .unwrap_or(true),
2674    }
2675}
2676
2677/// True if `tags` is a flat YAML sequence of scalars. A mapping, a scalar, or a
2678/// sequence containing a nested sequence/mapping → false (`TAGS_MALFORMED`).
2679fn is_flat_scalar_list(v: &Value) -> bool {
2680    match v {
2681        Value::Sequence(items) => items.iter().all(|it| scalar_string(it).is_some()),
2682        _ => false,
2683    }
2684}
2685
2686/// Extract every frontmatter wiki-link, returning `(key, Link)` pairs with the
2687/// link's 1-based file line. **Text-based, by necessity:** an unquoted
2688/// `company: [[records/companies/x]]` parses in YAML as a nested *sequence*, not
2689/// a string (because `[[x]]` is YAML flow-list-in-a-list); a quoted
2690/// `"[[...]]"` parses as a string. Scanning the raw frontmatter text catches
2691/// both forms uniformly, the way the link textually appears — the doctrine view.
2692///
2693/// `fm_start_line` is the file line of the first YAML line (file line 2, since
2694/// line 1 is the opening `---`), so the returned `Link::line` is absolute.
2695fn frontmatter_link_fields_text(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, Link)> {
2696    let mut out = Vec::new();
2697    for (key, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2698        for link in links {
2699            out.push((key.clone(), link));
2700        }
2701    }
2702    out
2703}
2704
2705/// The wiki-link targets declared under a single top-level frontmatter key
2706/// (text-based; handles quoted + unquoted forms). Empty if the key is absent or
2707/// carries no `[[...]]`.
2708fn frontmatter_links_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Vec<Link> {
2709    for (k, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2710        if k == key {
2711            return links;
2712        }
2713    }
2714    Vec::new()
2715}
2716
2717/// The raw value text under a single top-level frontmatter key (the remainder of
2718/// the key line plus any indented continuation/sequence lines), trimmed. Used to
2719/// decide whether a `link to` field holds a plain string vs. a wiki-link.
2720fn frontmatter_raw_value_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Option<String> {
2721    for (k, value_text, _links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2722        if k == key {
2723            return Some(value_text);
2724        }
2725    }
2726    None
2727}
2728
2729/// Split a frontmatter YAML block into `(key, raw_value_text, wiki_links)` for
2730/// each top-level key. A top-level key is a line with no leading indentation in
2731/// `name:` form; its value spans the rest of that line plus any deeper-indented
2732/// continuation lines (block scalars, block sequences) until the next top-level
2733/// key. Wiki-links are every `[[...]]` found anywhere in that span, with their
2734/// absolute file line.
2735fn frontmatter_key_blocks(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, String, Vec<Link>)> {
2736    let mut blocks: Vec<(String, String, Vec<Link>)> = Vec::new();
2737    let mut current: Option<(String, String, Vec<Link>)> = None;
2738
2739    for (idx, raw_line) in fm_yaml.lines().enumerate() {
2740        let file_line = fm_start_line + idx as u32;
2741        let indented = raw_line.starts_with(' ') || raw_line.starts_with('\t');
2742        let trimmed = raw_line.trim();
2743
2744        // A new top-level key: no indentation, `name:` prefix, not a list dash or
2745        // comment. (Indented or dash lines belong to the current key's value.)
2746        let new_key = if !indented && !trimmed.starts_with('#') && !trimmed.starts_with('-') {
2747            top_level_key(raw_line)
2748        } else {
2749            None
2750        };
2751
2752        if let Some((key, after)) = new_key {
2753            if let Some(done) = current.take() {
2754                blocks.push(done);
2755            }
2756            let mut links = Vec::new();
2757            collect_line_links(after, file_line, &mut links);
2758            current = Some((key, after.trim().to_string(), links));
2759        } else if let Some((_k, value_text, links)) = current.as_mut() {
2760            // Continuation of the current key's value (indented or dash line).
2761            if !value_text.is_empty() {
2762                value_text.push('\n');
2763            }
2764            value_text.push_str(trimmed);
2765            collect_line_links(raw_line, file_line, links);
2766        }
2767    }
2768    if let Some(done) = current.take() {
2769        blocks.push(done);
2770    }
2771    blocks
2772}
2773
2774/// Parse a top-level frontmatter key line into `(key, value_after_colon)`.
2775/// `None` if the line isn't a `name:` mapping entry.
2776fn top_level_key(line: &str) -> Option<(String, &str)> {
2777    let (key, rest) = line.split_once(':')?;
2778    let key = key.trim();
2779    if key.is_empty()
2780        || !key
2781            .chars()
2782            .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
2783    {
2784        return None;
2785    }
2786    Some((key.to_string(), rest))
2787}
2788
2789/// Append every `[[target]]` / `[[target|display]]` found in `s` to `links`,
2790/// each tagged with `file_line`.
2791fn collect_line_links(s: &str, file_line: u32, links: &mut Vec<Link>) {
2792    let bytes = s.as_bytes();
2793    let mut i = 0;
2794    while i + 1 < bytes.len() {
2795        if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2796            if let Some(close) = s[i + 2..].find("]]") {
2797                let inner = &s[i + 2..i + 2 + close];
2798                // Guard against `[[[` (nested) double-counting: the inner must
2799                // not itself open another `[[`.
2800                let target = inner
2801                    .trim_start_matches('[')
2802                    .split('|')
2803                    .next()
2804                    .unwrap_or(inner)
2805                    .trim()
2806                    .to_string();
2807                if !target.is_empty() {
2808                    links.push(Link {
2809                        target,
2810                        line: file_line,
2811                    });
2812                }
2813                i = i + 2 + close + 2;
2814                continue;
2815            }
2816        }
2817        i += 1;
2818    }
2819}
2820
2821/// Extract every `[[...]]` wiki-link from a body, with 1-based line numbers.
2822/// Skips fenced code blocks, so example links in docs don't trip the validator.
2823///
2824/// Fence tracking matches the toolkit's parser ([`crate::parser`]'s
2825/// `extract_sections`): an open fence is `(fence char, run length)` and closes
2826/// only on a line that is the **same** fence character with a run **at least as
2827/// long**. A naive "toggle a bool on any ``` or ~~~ line" inverts the state when
2828/// a `~~~` block legally contains a ```` ``` ```` line (the standard way to
2829/// document a backtick fence) — the inner backtick line would flip `in_fence`
2830/// off and the demo `[[…]]` inside the code block would be checked as a live
2831/// link, falsely flagging a legal store.
2832fn extract_wiki_links(body: &str) -> Vec<Link> {
2833    let mut out = Vec::new();
2834    let mut fence: Option<(u8, usize)> = None;
2835    for (idx, line) in body.lines().enumerate() {
2836        let content = line.trim_end_matches('\r');
2837        if let Some(f) = fence {
2838            // Inside a fence: the only thing that matters is whether THIS line
2839            // closes it (matching char, run ≥ the opening run). Everything else
2840            // is opaque code — no link extraction.
2841            if fence_closes(content, f) {
2842                fence = None;
2843            }
2844            continue;
2845        }
2846        if let Some(opened) = fence_opens(content) {
2847            fence = Some(opened);
2848            continue;
2849        }
2850        let line_no = (idx + 1) as u32;
2851        let bytes = line.as_bytes();
2852        let mut i = 0;
2853        while i + 1 < bytes.len() {
2854            if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2855                if let Some(close) = line[i + 2..].find("]]") {
2856                    let inner = &line[i + 2..i + 2 + close];
2857                    let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
2858                    // Skip a triple-bracket `[[[…` opening: the inner content
2859                    // starts with `[`, so this is the rejected flow-form list
2860                    // mis-encoding (`[[[a]], [[b]]]`), not a real wiki-link. A
2861                    // legitimate target never starts with `[`. The frontmatter
2862                    // `WIKI_LINK_FLOW_FORM_LIST` check already owns that error;
2863                    // extracting a bogus body link here would double-report it as
2864                    // a spurious `WIKI_LINK_SHORT_FORM`.
2865                    if !target.is_empty() && !target.starts_with('[') {
2866                        out.push(Link {
2867                            target,
2868                            line: line_no,
2869                        });
2870                    }
2871                    i = i + 2 + close + 2;
2872                    continue;
2873                }
2874            }
2875            i += 1;
2876        }
2877    }
2878    out
2879}
2880
2881/// If `line` opens a fenced code block, return `(fence byte, run length)`. A
2882/// local mirror of the parser's `opening_fence` so the validator's fence
2883/// tracking matches the rest of the toolkit: a fence is ``` ``` ``` or `~~~`
2884/// (run ≥ 3) at ≤ 3 spaces of indent, and a backtick fence's info string may
2885/// not itself contain a backtick.
2886fn fence_opens(line: &str) -> Option<(u8, usize)> {
2887    let indent = line.len() - line.trim_start_matches(' ').len();
2888    if indent > 3 {
2889        return None;
2890    }
2891    let rest = &line[indent..];
2892    let byte = rest.bytes().next()?;
2893    if byte != b'`' && byte != b'~' {
2894        return None;
2895    }
2896    let run = rest.len() - rest.trim_start_matches(byte as char).len();
2897    if run < 3 {
2898        return None;
2899    }
2900    // A backtick fence's info string may not itself contain a backtick.
2901    if byte == b'`' && rest[run..].contains('`') {
2902        return None;
2903    }
2904    Some((byte, run))
2905}
2906
2907/// True if `line` closes the currently open `fence`: same char, run at least as
2908/// long, nothing but trailing whitespace after. Local mirror of the parser's
2909/// `is_closing_fence` — so an inner fence of the *other* character (a ``` ``` ```
2910/// line inside a `~~~` block) does NOT close the outer fence.
2911fn fence_closes(line: &str, fence: (u8, usize)) -> bool {
2912    let (byte, open_len) = fence;
2913    let indent = line.len() - line.trim_start_matches(' ').len();
2914    if indent > 3 {
2915        return false;
2916    }
2917    let rest = &line[indent..];
2918    let run = rest.len() - rest.trim_start_matches(byte as char).len();
2919    if run < open_len {
2920        return false;
2921    }
2922    rest[run..].trim().is_empty()
2923}
2924
2925/// Detect the frontmatter INLINE flow-form wiki-link-list mis-encoding —
2926/// `attendees: [[[a]], [[b]]]` — and return the offending keys.
2927///
2928/// **Scoped to the inline value on the key line.** The SPEC's canonical
2929/// list-of-links form is the *unquoted YAML block sequence* (`- [[a]]` per
2930/// indented line), which is explicitly correct (SPEC § Linking) and MUST NOT be
2931/// flagged — even though, parsed whole, it nests the same way the rejected
2932/// inline flow form does. So this check looks only at the value written *inline*
2933/// after the colon: if it opens a flow sequence (`[…]`) whose parsed shape is a
2934/// nested sequence (a list whose items are themselves lists — the wiki-link-list
2935/// mis-encoding), it is flagged. A key with no inline value (the block form,
2936/// whose items live on continuation lines) is never inspected here.
2937///
2938/// Parsing the inline value (rather than a literal `starts_with("[[[")` text
2939/// test) is what catches the whitespace variant `attendees: [ [[a]] ]`, which
2940/// encodes the identical nested sequence but evaded the old prefix match.
2941fn detect_flow_form_link_lists(fm_yaml: &str) -> Vec<String> {
2942    let mut out = Vec::new();
2943    for line in fm_yaml.lines() {
2944        // Top-level key lines only (no indentation, not a comment or list dash).
2945        if line.starts_with(' ') || line.starts_with('\t') {
2946            continue;
2947        }
2948        let Some((key, rest)) = line.split_once(':') else {
2949            continue;
2950        };
2951        let key = key.trim();
2952        if key.is_empty()
2953            || key.starts_with('#')
2954            || key.starts_with('-')
2955            || !key
2956                .chars()
2957                .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
2958        {
2959            continue;
2960        }
2961        let rest = rest.trim();
2962        // Only an inline flow sequence (`[…]`) on the key line is a candidate;
2963        // the unquoted block form has an empty inline value and is never flagged.
2964        if !rest.starts_with('[') {
2965            continue;
2966        }
2967        // Parse just the inline value and test its shape: a list whose items are
2968        // themselves lists is the wiki-link-list mis-encoding (`[[[a]]]` parses
2969        // to `Seq[Seq[Seq[String]]]`; the scalar inline link `[[a]]` is only
2970        // `Seq[Seq[String]]` and is NOT flagged).
2971        if let Ok(Value::Sequence(items)) = serde_norway::from_str::<Value>(rest) {
2972            let nested = items.iter().any(|item| match item {
2973                Value::Sequence(inner) => inner.iter().any(|x| matches!(x, Value::Sequence(_))),
2974                _ => false,
2975            });
2976            if nested {
2977                out.push(key.to_string());
2978            }
2979        }
2980    }
2981    out
2982}
2983
2984/// True if a bare target (no `.md`) is a full store-relative path: it contains a
2985/// `/` and its first segment is a known layer.
2986fn is_full_store_path(bare: &str) -> bool {
2987    let mut parts = bare.splitn(2, '/');
2988    let first = parts.next().unwrap_or("");
2989    let has_rest = parts.next().map(|r| !r.is_empty()).unwrap_or(false);
2990    matches!(first, "sources" | "records") && has_rest
2991}
2992
2993/// True if a path contains only normal relative components. Validator inputs
2994/// come from user-authored markdown/JSON sidecars; never let absolute paths,
2995/// platform prefixes, or `..` turn a validation probe into a filesystem escape.
2996fn is_safe_store_relative_path(path: &Path) -> bool {
2997    let mut saw_component = false;
2998    for component in path.components() {
2999        match component {
3000            Component::Normal(_) => saw_component = true,
3001            Component::CurDir => {}
3002            Component::ParentDir | Component::RootDir | Component::Prefix(_) => return false,
3003        }
3004    }
3005    saw_component
3006}
3007
3008fn safe_md_target_rel(bare: &str) -> Option<PathBuf> {
3009    let path = Path::new(bare);
3010    if !is_safe_store_relative_path(path) {
3011        return None;
3012    }
3013    Some(PathBuf::from(format!("{bare}.md")))
3014}
3015
3016/// How a wiki-link / index-entry target resolves on disk.
3017enum TargetResolution {
3018    /// The target exists (either as the literal path or with a `.md` suffix).
3019    Exists,
3020    /// The target is a safe store-relative path but no file exists for it.
3021    Missing,
3022    /// The target escapes the store (absolute, `..`, prefix) — never probe it.
3023    Unsafe,
3024}
3025
3026/// Resolve a bare wiki-link / index-entry target the way the graph engine does
3027/// ([`crate::graph`]'s `resolve_existing`): try the path **as written** first
3028/// (so a link to a raw non-`.md` source file kept verbatim under `sources/` —
3029/// `[[sources/emails/x.eml]]`, `[[sources/contracts/y.pdf]]` — resolves to the
3030/// real file), then the `.md`-appended path (the common case for content
3031/// pages). Without trying the literal path first, a legal link to a raw source
3032/// file is wrongly flagged `WIKI_LINK_BROKEN` even though `graph backlinks`
3033/// resolves it.
3034fn resolve_wiki_target(store: &Store, bare: &str) -> TargetResolution {
3035    // The literal path and the `.md`-appended path share the same safety check
3036    // (`safe_md_target_rel` only differs by appending `.md`), so an unsafe bare
3037    // target is unsafe in both forms.
3038    if !is_safe_store_relative_path(Path::new(bare)) {
3039        return TargetResolution::Unsafe;
3040    }
3041    match resolved_target_abs(store, bare) {
3042        Some(_) => TargetResolution::Exists,
3043        None => TargetResolution::Missing,
3044    }
3045}
3046
3047/// The absolute on-disk path a bare wiki-link / index-entry target resolves to,
3048/// trying the literal path first, then `.md`-appended — mirroring the graph
3049/// engine. `None` when neither exists, or when the bare target escapes the store
3050/// (callers that need to distinguish unsafe from merely-missing use
3051/// [`resolve_wiki_target`]).
3052///
3053/// **Existence is EXACT-CASE, deliberately platform-independent.** A db.md store
3054/// is Git-synced across machines, so a `validate --all` that passes on the
3055/// author's box must guarantee link integrity on the box that serves the store.
3056/// Bare `Path::is_file()` honors the *host* filesystem's case sensitivity: on
3057/// case-insensitive APFS/macOS (or NTFS) a wrong-case link `[[records/x/BOB]]`
3058/// resolves to the on-disk `records/x/bob.md` and passes — but on case-sensitive
3059/// Linux that file genuinely does not exist (`WIKI_LINK_BROKEN`, per SPEC.md
3060/// § Validation: "target file doesn't exist"). To stay platform-independent we
3061/// confirm not just that *a* file exists for the candidate but that its real
3062/// on-disk casing matches the requested store-relative path character-for-
3063/// character (via [`disk_case_matches`]); a case mismatch is treated as NOT
3064/// found, so macOS reports the same broken links Linux would.
3065///
3066/// NOTE on the residual validate-vs-graph divergence on macOS: the graph engine
3067/// ([`crate::graph`]) intentionally mirrors host `is_file()` + ASCII-lowercased
3068/// keys for its internal backlink/rename bookkeeping on a *single* host, so on
3069/// case-insensitive macOS `graph backlinks` will still resolve a wrong-case link
3070/// that `validate` now flags. That divergence is by design: the graph's job is
3071/// single-host consistency; `validate`'s job is cross-platform link integrity.
3072fn resolved_target_abs(store: &Store, bare: &str) -> Option<PathBuf> {
3073    if !is_safe_store_relative_path(Path::new(bare)) {
3074        return None;
3075    }
3076    // The literal path, as written (e.g. an `.eml`/`.pdf` source file kept
3077    // verbatim under `sources/`).
3078    let literal = store.root.join(bare);
3079    if literal.is_file() && disk_case_matches(store, &literal, bare) {
3080        return Some(literal);
3081    }
3082    // The `.md`-appended path (a content page referenced without its extension).
3083    let with_md_rel = format!("{bare}.md");
3084    let with_md = store.root.join(&with_md_rel);
3085    if with_md.is_file() && disk_case_matches(store, &with_md, &with_md_rel) {
3086        return Some(with_md);
3087    }
3088    None
3089}
3090
3091/// True if `abs` (already confirmed to be an existing file under `store.root`)
3092/// has the exact on-disk casing of the requested store-relative path `requested`.
3093///
3094/// Makes wiki-link existence resolution platform-independent: on case-insensitive
3095/// filesystems (APFS/macOS, NTFS) `Path::is_file()` says yes to a wrong-case
3096/// path, so we canonicalize the candidate — which returns the *real* on-disk
3097/// casing — and compare its store-relative portion to `requested`
3098/// case-sensitively. A mismatch means the file the link actually names does not
3099/// exist on a case-sensitive host, so the caller treats it as not found.
3100///
3101/// Conservative on `canonicalize` failure: if we cannot read the real path (a
3102/// transient FS error, a symlink we cannot resolve, a root that is itself a
3103/// symlink we cannot strip), we fall back to accepting the `is_file()` result
3104/// rather than producing a spurious `WIKI_LINK_BROKEN`. This keeps the check
3105/// additive — it only ever *adds* the case-mismatch detection; it never makes a
3106/// genuinely-resolvable correct-case link fail.
3107fn disk_case_matches(store: &Store, abs: &Path, requested: &str) -> bool {
3108    let Ok(canon_abs) = abs.canonicalize() else {
3109        return true; // cannot read real casing — don't invent a broken link
3110    };
3111    // Strip the store root (also canonicalized so a symlinked root still cancels)
3112    // to get the real on-disk store-relative path, then compare to what the link
3113    // asked for. `canonicalize` on the root may itself fail (e.g. the root no
3114    // longer exists by the time we probe) — be conservative there too.
3115    let Ok(canon_root) = store.root.canonicalize() else {
3116        return true;
3117    };
3118    let Ok(disk_rel) = canon_abs.strip_prefix(&canon_root) else {
3119        // The real file lives outside the (canonical) root — e.g. reached via a
3120        // symlink in the store. Containment is already enforced by
3121        // `is_safe_store_relative_path`; here we simply cannot make a
3122        // case-comparison, so don't manufacture a broken link.
3123        return true;
3124    };
3125    // Compare store-relative paths component-by-component, case-sensitively,
3126    // independent of the host's path separator and case folding.
3127    disk_rel == Path::new(requested)
3128}
3129
3130/// True if a bare target path is under `prefix` (both `.md`-stripped).
3131fn path_under_prefix(bare: &str, prefix: &str) -> bool {
3132    let prefix = prefix.trim_end_matches('/');
3133    bare == prefix || bare.starts_with(&format!("{prefix}/"))
3134}
3135
3136/// The type-folder for a store-relative content path: `<layer>/<type-folder>`
3137/// (the folder directly under the layer; date-shards roll up to it). `None` for
3138/// files directly in a layer folder or outside the two layers.
3139fn type_folder_of(rel: &Path) -> Option<PathBuf> {
3140    let comps: Vec<&str> = rel.iter().filter_map(|s| s.to_str()).collect();
3141    if comps.len() < 3 {
3142        return None; // need layer/type-folder/file at minimum
3143    }
3144    if !matches!(comps[0], "sources" | "records") {
3145        return None;
3146    }
3147    Some(PathBuf::from(comps[0]).join(comps[1]))
3148}
3149
3150/// The layer dir a *loose* content file sits directly in (`records`/`sources`):
3151/// exactly two path components, the first a known layer. `None` for a file
3152/// inside a type-folder or outside any layer. Counterpart to the index crate's
3153/// `loose_layer_of`, kept local so `validate` needs no index internals.
3154fn loose_layer_dir(rel: &Path) -> Option<PathBuf> {
3155    let comps: Vec<&str> = rel.iter().filter_map(|s| s.to_str()).collect();
3156    if comps.len() != 2 || !matches!(comps[0], "sources" | "records") {
3157        return None;
3158    }
3159    Some(PathBuf::from(comps[0]))
3160}
3161
3162/// **SWEEP.** Walk every `.md` content file under `sources/`/`records/`,
3163/// returning store-relative paths to be parsed in full. Skips hidden dirs and
3164/// the index twin (`index.jsonl`). Used only by `validate_all`; the working-set
3165/// incoming-linker scan rides the embedded-ripgrep `Store::find_links_to_any`
3166/// (a single presence-only pass), so the loop default never walks-and-*parses*
3167/// the whole content tree.
3168///
3169/// **`log/` is NOT pruned here.** Only the *root-level* `log/` rotation archive
3170/// is reserved (`Store::is_in_log_dir` checks only the first path component);
3171/// the walk roots are the two layers, so the root archive is already out of
3172/// scope. A `log`-named folder *inside* a layer (e.g. `records/log/` — a
3173/// decision log) is real content (see `is_content_file`), so pruning every
3174/// `name == "log"` made `--all` silently skip those files — reporting fewer
3175/// errors than the default working-set scope on the same store.
3176fn walk_content_files(root: &Path) -> Vec<PathBuf> {
3177    let mut out = Vec::new();
3178    for layer in ["sources", "records"] {
3179        let base = root.join(layer);
3180        if !base.is_dir() {
3181            continue;
3182        }
3183        for entry in walkdir::WalkDir::new(&base)
3184            // Follow symlinks, matching the loop-default `md_walker`
3185            // (store.rs `follow_links(true)`): a content file that is a symlink
3186            // into the store, or that lives in a symlinked-in type-folder, is
3187            // checked by `dbmd validate` (the loop default rides `Store::walk` /
3188            // `walk_all_md`, both following symlinks). Without this the `--all`
3189            // sweep silently SKIPPED such files, so the authoritative superset
3190            // reported FEWER issues than the loop scope on the same store —
3191            // inverting the `--all`-is-the-superset contract. walkdir's loop
3192            // detection drops a symlink cycle (yields an Err that `.flatten()`
3193            // discards), so this cannot hang.
3194            .follow_links(true)
3195            .into_iter()
3196            .filter_entry(|e| {
3197                let name = e.file_name().to_str().unwrap_or("");
3198                !name.starts_with('.')
3199            })
3200            .flatten()
3201        {
3202            if !entry.file_type().is_file() {
3203                continue;
3204            }
3205            let name = entry.file_name().to_str().unwrap_or("");
3206            if name.ends_with(".md") && name != "index.md" {
3207                if let Ok(rel) = entry.path().strip_prefix(root) {
3208                    out.push(rel.to_path_buf());
3209                }
3210            }
3211        }
3212    }
3213    out.sort();
3214    out
3215}
3216
3217/// Every `index.md` under the store (root + layers + type-folders), as
3218/// store-relative paths. Used to detect orphan indexes. Like
3219/// [`walk_content_files`], a `log`-named folder *inside* a layer is real content
3220/// and its `index.md` is not pruned (only the root-level `log/` archive is
3221/// reserved, and the walk roots are the two layers, so it is already
3222/// out of scope).
3223fn walk_index_files(root: &Path) -> Vec<PathBuf> {
3224    let mut out = Vec::new();
3225    if root.join("index.md").is_file() {
3226        out.push(PathBuf::from("index.md"));
3227    }
3228    for layer in ["sources", "records"] {
3229        let base = root.join(layer);
3230        if !base.is_dir() {
3231            continue;
3232        }
3233        for entry in walkdir::WalkDir::new(&base)
3234            // Follow symlinks, matching the loop-default `md_walker`
3235            // (store.rs `follow_links(true)`): a content file that is a symlink
3236            // into the store, or that lives in a symlinked-in type-folder, is
3237            // checked by `dbmd validate` (the loop default rides `Store::walk` /
3238            // `walk_all_md`, both following symlinks). Without this the `--all`
3239            // sweep silently SKIPPED such files, so the authoritative superset
3240            // reported FEWER issues than the loop scope on the same store —
3241            // inverting the `--all`-is-the-superset contract. walkdir's loop
3242            // detection drops a symlink cycle (yields an Err that `.flatten()`
3243            // discards), so this cannot hang.
3244            .follow_links(true)
3245            .into_iter()
3246            .filter_entry(|e| {
3247                let name = e.file_name().to_str().unwrap_or("");
3248                !name.starts_with('.')
3249            })
3250            .flatten()
3251        {
3252            if entry.file_type().is_file() && entry.file_name().to_str() == Some("index.md") {
3253                if let Ok(rel) = entry.path().strip_prefix(root) {
3254                    out.push(rel.to_path_buf());
3255                }
3256            }
3257        }
3258    }
3259    out.sort();
3260    out
3261}
3262
3263/// A parsed `index.md` entry line: the wiki-link target, the optional summary
3264/// text after the `—`, and the 1-based line number.
3265struct IndexEntry {
3266    target: String,
3267    summary_text: Option<String>,
3268    line: u32,
3269}
3270
3271/// Parse the `- [[<path>]] — <summary>` entry lines of an `index.md`. Stops at a
3272/// `## More` footer (those lines aren't file entries). Root/layer entries with a
3273/// `|display` segment and a `(N)` count are parsed too — the target is the bare
3274/// path, the summary text is whatever follows the em dash.
3275fn parse_index_entries(text: &str) -> Vec<IndexEntry> {
3276    let mut out = Vec::new();
3277    let mut in_more = false;
3278    for (idx, line) in text.lines().enumerate() {
3279        let trimmed = line.trim_start();
3280        if trimmed.starts_with("## More") {
3281            in_more = true;
3282            continue;
3283        }
3284        if in_more {
3285            continue;
3286        }
3287        if !trimmed.starts_with("- ") {
3288            continue;
3289        }
3290        // Find the first `[[...]]`.
3291        let Some(open) = trimmed.find("[[") else {
3292            continue;
3293        };
3294        let Some(close_rel) = trimmed[open + 2..].find("]]") else {
3295            continue;
3296        };
3297        let inner = &trimmed[open + 2..open + 2 + close_rel];
3298        let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
3299
3300        // Summary text: whatever follows the first em dash (`—`) or ` - `.
3301        let after = &trimmed[open + 2 + close_rel + 2..];
3302        let summary_text = extract_index_entry_summary(after);
3303
3304        out.push(IndexEntry {
3305            target,
3306            summary_text,
3307            line: (idx + 1) as u32,
3308        });
3309    }
3310    out
3311}
3312
3313/// Pull the summary portion out of the text trailing an index entry's
3314/// wiki-link: drop a leading `(N files)` count, then the `—`/`-` separator, then
3315/// strip a trailing `  ·  #tag` suffix **only when it is a genuine tag block**
3316/// (so a literal `·` inside the summary text is preserved, not mistaken for the
3317/// renderer's tag separator).
3318fn extract_index_entry_summary(after: &str) -> Option<String> {
3319    let mut s = after.trim();
3320    // Drop a leading "(N ...)" count segment, if present.
3321    if s.starts_with('(') {
3322        if let Some(close) = s.find(')') {
3323            s = s[close + 1..].trim_start();
3324        }
3325    }
3326    // Require an em dash or hyphen separator before the summary.
3327    let s = if let Some(rest) = s.strip_prefix('—') {
3328        rest.trim()
3329    } else if let Some(rest) = s.strip_prefix('-') {
3330        rest.trim()
3331    } else {
3332        return None;
3333    };
3334    if s.is_empty() {
3335        return None;
3336    }
3337    // Strip a trailing tag block — but ONLY when it matches the EXACT delimiter
3338    // the renderer emits: `  ·  #tag #tag` (a *double*-spaced middot, per
3339    // `crate::index::format_md_entry`'s `format!("  ·  {tags}")`), dropped when
3340    // the file has no tags. The previous code also accepted a *single*-spaced
3341    // ` · ` separator, which collided with a legal summary whose own text ends
3342    // in a single-spaced middot-plus-hashtag tail — e.g. a tagless file with
3343    // `summary: "Standup notes · #standup"`. The renderer round-trips that
3344    // summary verbatim (no tag block, since there are no tags), but the loose
3345    // strip mistook the ` · #standup` for the renderer's tag suffix, compared
3346    // `"Standup notes"` against the file's full summary, and emitted a spurious
3347    // `INDEX_SUMMARY_MISMATCH` that `dbmd index rebuild` could never fix
3348    // (rebuild regenerates the identical line). Matching the renderer's exact
3349    // double-spaced delimiter makes the comparison round-trip. `rsplit_once`
3350    // matches from the right so only the real trailing tag block is considered.
3351    let s = match s.rsplit_once("  ·  ") {
3352        Some((summary, tags)) if is_tag_suffix(tags) => summary.trim(),
3353        _ => s,
3354    };
3355    Some(s.to_string())
3356}
3357
3358/// True if `s` is a non-empty tag block: one or more whitespace-separated tokens
3359/// each starting with `#`, the exact shape the index renderer appends after the
3360/// `·` separator (`crate::index::format_md_entry`). Used to distinguish the
3361/// renderer's `  ·  #tag` suffix from a literal `·` inside the summary text.
3362fn is_tag_suffix(s: &str) -> bool {
3363    let mut any = false;
3364    for tok in s.split_whitespace() {
3365        if !tok.starts_with('#') || tok.len() < 2 {
3366            return false;
3367        }
3368        any = true;
3369    }
3370    any
3371}
3372
3373/// Parse a `log.md` entry header `## [YYYY-MM-DD HH:MM] <kind> | <object>`.
3374/// Returns `(timestamp, kind, object)`; `None` if the timestamp is unparseable
3375/// or the header isn't well-formed.
3376fn parse_log_header(line: &str) -> Option<(DateTime<FixedOffset>, String, Option<String>)> {
3377    let rest = line.strip_prefix("## [")?;
3378    let close = rest.find(']')?;
3379    let ts_str = &rest[..close];
3380    let tail = rest[close + 1..].trim();
3381
3382    // Parse `YYYY-MM-DD HH:MM` (the SPEC header form) as a naive local time and
3383    // attach a zero offset — the log header carries minute precision, no zone.
3384    let naive = NaiveDateTime::parse_from_str(ts_str.trim(), "%Y-%m-%d %H:%M").ok()?;
3385    let offset = FixedOffset::east_opt(0)?;
3386    let ts = naive.and_local_timezone(offset).single()?;
3387
3388    // kind | object
3389    let (kind, object) = match tail.split_once('|') {
3390        Some((k, o)) => {
3391            let o = o.trim();
3392            (
3393                k.trim().to_string(),
3394                if o.is_empty() {
3395                    None
3396                } else {
3397                    Some(o.to_string())
3398                },
3399            )
3400        }
3401        None => (tail.to_string(), None),
3402    };
3403    if kind.is_empty() {
3404        return None;
3405    }
3406    Some((ts, kind, object))
3407}
3408
3409/// Every log file that holds entries for the working-set scan: the active
3410/// `log.md` plus every `log/<YYYY-MM>.md` archive. [`Log::append`] rotates
3411/// strictly-prior-month entries into the archives, so the active file alone is
3412/// NOT the full timeline — both the last `validate` cutoff and a changed-but-
3413/// unvalidated object can live in an archive after a month rollover. Reading the
3414/// archives here keeps the working-set readers in sync with the rest of the log
3415/// layer (`Log::since`/`Log::tail`), which deliberately cross archives, and
3416/// prevents `dbmd validate` from silently skipping archived changed files. Reads
3417/// only log headers, never the content store, so the loop budget is preserved.
3418fn log_files_for_working_set(store: &Store) -> Vec<PathBuf> {
3419    let mut files = vec![store.root.join("log.md")];
3420    let archive_dir = store.root.join("log");
3421    if let Ok(entries) = std::fs::read_dir(&archive_dir) {
3422        let mut archives: Vec<PathBuf> = entries
3423            .flatten()
3424            .map(|e| e.path())
3425            .filter(|p| {
3426                p.is_file()
3427                    && p.file_name()
3428                        .and_then(|s| s.to_str())
3429                        .and_then(|n| n.strip_suffix(".md"))
3430                        .is_some_and(is_year_month_archive)
3431            })
3432            .collect();
3433        // Deterministic order (oldest month first); the callers fold across all
3434        // files so order doesn't affect the result, but a stable order keeps the
3435        // scan reproducible.
3436        archives.sort();
3437        files.extend(archives);
3438    }
3439    files
3440}
3441
3442/// True if `s` looks like a `YYYY-MM` archive stem (4 digits, `-`, 2 digits) —
3443/// the `log/<YYYY-MM>.md` naming the rotation in [`crate::log`] emits.
3444fn is_year_month_archive(s: &str) -> bool {
3445    let b = s.as_bytes();
3446    b.len() == 7
3447        && b[..4].iter().all(u8::is_ascii_digit)
3448        && b[4] == b'-'
3449        && b[5..7].iter().all(u8::is_ascii_digit)
3450}
3451
3452/// The timestamp of the most recent `validate` entry across the active `log.md`
3453/// **and** the `log/<YYYY-MM>.md` archives — the default working-set cutoff.
3454/// Reads only headers; never the whole store. Archive-aware so a `validate`
3455/// entry that rotated into an archive after a month rollover still anchors the
3456/// cutoff (without this, the cutoff silently resets to `None`).
3457fn last_validate_at(store: &Store) -> Option<DateTime<FixedOffset>> {
3458    let mut latest: Option<DateTime<FixedOffset>> = None;
3459    for file in log_files_for_working_set(store) {
3460        let Ok(text) = std::fs::read_to_string(&file) else {
3461            continue;
3462        };
3463        for line in text.lines() {
3464            if !line.starts_with("## [") {
3465                continue;
3466            }
3467            if let Some((ts, kind, _)) = parse_log_header(line) {
3468                if kind == "validate" {
3469                    latest = Some(match latest {
3470                        Some(p) if p >= ts => p,
3471                        _ => ts,
3472                    });
3473                }
3474            }
3475        }
3476    }
3477    latest
3478}
3479
3480/// The set of content objects changed since `cutoff`, read from log entries
3481/// whose kind mutates a file. When `cutoff` is `None`, every mutating entry
3482/// counts (no prior validate window). Returns store-relative `.md` paths.
3483///
3484/// Scans the active `log.md` **and** every `log/<YYYY-MM>.md` archive: after a
3485/// month rollover [`Log::append`] rotates prior-month entries out of the active
3486/// file, so an object changed-but-never-validated in a prior month lives only in
3487/// an archive. Reading the archives here is what keeps `dbmd validate` from
3488/// silently skipping those files. Reads only log headers, never the content
3489/// store.
3490fn changed_objects_since(
3491    store: &Store,
3492    cutoff: Option<DateTime<FixedOffset>>,
3493) -> BTreeSet<PathBuf> {
3494    let mut out = BTreeSet::new();
3495    for file in log_files_for_working_set(store) {
3496        let Ok(text) = std::fs::read_to_string(&file) else {
3497            continue;
3498        };
3499        for line in text.lines() {
3500            if !line.starts_with("## [") {
3501                continue;
3502            }
3503            let Some((ts, kind, object)) = parse_log_header(line) else {
3504                continue;
3505            };
3506            if let Some(c) = cutoff {
3507                if ts < c {
3508                    continue;
3509                }
3510            }
3511            if !matches!(
3512                kind.as_str(),
3513                "create" | "update" | "ingest" | "rename" | "delete" | "link"
3514            ) {
3515                continue;
3516            }
3517            if let Some(obj) = object {
3518                // The object slot is a store-relative path (or a wiki-link target).
3519                let bare = obj
3520                    .trim()
3521                    .trim_start_matches("[[")
3522                    .trim_end_matches("]]")
3523                    .split('|')
3524                    .next()
3525                    .unwrap_or("")
3526                    .trim()
3527                    .trim_end_matches(".md")
3528                    .to_string();
3529                if bare.is_empty() {
3530                    continue;
3531                }
3532                // Containment: the object slot is a log-header field that can
3533                // carry a `..`/absolute/prefix path (a hand-edited or
3534                // merge-malformed log line). Route it through the same safety gate
3535                // every other disk-touching validator path uses
3536                // (`safe_md_target_rel`, which `link_target_type` already applies)
3537                // so a `records/../../leaky` object cannot make
3538                // `validate_working_set` read + frontmatter-report on a file
3539                // OUTSIDE the store root. An unsafe object is dropped from the
3540                // changed set rather than probed.
3541                if let Some(rel) = safe_md_target_rel(&bare) {
3542                    out.insert(rel);
3543                }
3544            }
3545        }
3546    }
3547    out
3548}
3549
3550/// The result of the [`derived_from_ignored_type`] policy check: the
3551/// `derived_from` target that resolves to an ignored-type record, plus that
3552/// record's type. Carries exactly what both the validate finding and the
3553/// write-time warning need to render their message.
3554#[derive(Debug, Clone, PartialEq, Eq)]
3555pub struct DerivedFromIgnored {
3556    /// The `derived_from` wiki-link target as written (bare store-relative path,
3557    /// no `.md`).
3558    pub target: String,
3559    /// The resolved `type` of that target, which is present in
3560    /// `store.config.ignored_types`.
3561    pub target_type: String,
3562}
3563
3564/// **The single authoritative `### Ignored types` derivation check.** Decides
3565/// whether a conclusion record derives from an ignored-type record: the
3566/// `meta-type` must be `conclusion`, `### Ignored types` must be non-empty, and
3567/// some `derived_from` target must resolve to a record whose `type` is in
3568/// `ignored_types`. Returns the first such target (and its type), or `None`.
3569///
3570/// Both surfaces call this so the policy lives in exactly one place:
3571/// [`check_content_file`] (read side — `dbmd validate`) feeds it the
3572/// `derived_from` targets it scanned from the raw frontmatter, and the write
3573/// surface (`dbmd write`) feeds it the targets from the composed frontmatter.
3574/// The link *extraction* differs per surface (text-scan with line numbers vs.
3575/// the parsed `Frontmatter`); the *decision* — type gate, target-type
3576/// resolution, and `ignored_types` membership — does not.
3577pub fn derived_from_ignored_type<I, S>(
3578    store: &Store,
3579    meta_type: &str,
3580    derived_from_targets: I,
3581) -> Option<DerivedFromIgnored>
3582where
3583    I: IntoIterator<Item = S>,
3584    S: AsRef<str>,
3585{
3586    if meta_type != "conclusion" || store.config.ignored_types.is_empty() {
3587        return None;
3588    }
3589    for target in derived_from_targets {
3590        let target = target.as_ref();
3591        if let Some(target_type) = link_target_type(store, target) {
3592            if store.config.ignored_types.contains(&target_type) {
3593                return Some(DerivedFromIgnored {
3594                    target: target.to_string(),
3595                    target_type,
3596                });
3597            }
3598        }
3599    }
3600    None
3601}
3602
3603/// Resolve the `type` of a wiki-link target file (bare, no `.md`), or `None`.
3604fn link_target_type(store: &Store, target: &str) -> Option<String> {
3605    let bare = target.trim_end_matches(".md");
3606    let abs = store.root.join(safe_md_target_rel(bare)?);
3607    let text = std::fs::read_to_string(&abs).ok()?;
3608    let (yaml, _, _) = split_frontmatter(&text)?;
3609    let value: Value = serde_norway::from_str(&yaml).ok()?;
3610    if let Value::Mapping(m) = value {
3611        m.get(Value::String("type".into())).and_then(scalar_string)
3612    } else {
3613        None
3614    }
3615}
3616
3617// ── Shape validators ─────────────────────────────────────────────────────────
3618
3619/// True if a string is RFC3339 / ISO-8601 with a time + zone (the
3620/// `created`/`updated` contract: `2026-05-27T08:00:00-07:00`).
3621fn is_iso8601(s: &str) -> bool {
3622    DateTime::parse_from_rfc3339(s.trim()).is_ok()
3623}
3624
3625/// True if a string is an ISO-8601 *date* (`2026-05-27`) or a full RFC3339
3626/// datetime. Type-specific date fields (`expense.date`, `contact.last_touch`)
3627/// accept the date-only form per the SPEC's worked example.
3628fn is_iso8601_date_or_datetime(s: &str) -> bool {
3629    let s = s.trim();
3630    if DateTime::parse_from_rfc3339(s).is_ok() {
3631        return true;
3632    }
3633    chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_ok()
3634}
3635
3636/// True for `<local>@<domain>` with a non-empty local part and a dotted domain.
3637/// There must be exactly one `@`: a domain that still contains an `@` after the
3638/// split (the common double-`@` typo `sarah@@acme.com`, or `a@b@c.com`) is
3639/// rejected — without this the domain `@acme.com` passed every other check.
3640fn is_email(s: &str) -> bool {
3641    let s = s.trim();
3642    let Some((local, domain)) = s.split_once('@') else {
3643        return false;
3644    };
3645    !local.is_empty()
3646        && !domain.contains('@')
3647        && domain.contains('.')
3648        && !domain.starts_with('.')
3649        && !domain.ends_with('.')
3650        && !domain.contains(' ')
3651        && !local.contains(' ')
3652}
3653
3654/// True for a currency amount: an optional symbol or 3-letter ISO code, then a
3655/// plain decimal number with optional thousands separators and ≤ 2 decimals.
3656///
3657/// The numeric part is validated by hand (not `f64::parse`) so the non-numeric
3658/// floats `f64` accepts — `inf`, `-inf`, `NaN`, and `1e3`-style exponents — are
3659/// rejected, and the ≤ 2-decimal rule is actually enforced.
3660fn is_currency(s: &str) -> bool {
3661    let mut t = s.trim();
3662    // Strip a leading currency symbol …
3663    for sym in ["$", "€", "£", "¥"] {
3664        if let Some(rest) = t.strip_prefix(sym) {
3665            t = rest.trim_start();
3666            break;
3667        }
3668    }
3669    // … or a leading 3-letter ISO-4217-ish code (`USD 100`, `EUR 9.50`). The
3670    // code must be exactly three ASCII letters and separated from the number by
3671    // whitespace, so a bare `USD` with no amount still fails.
3672    if let Some((head, rest)) = t.split_once(char::is_whitespace) {
3673        if head.len() == 3 && head.chars().all(|c| c.is_ascii_alphabetic()) {
3674            t = rest.trim_start();
3675        }
3676    }
3677
3678    let cleaned: String = t.chars().filter(|c| *c != ',').collect();
3679    is_plain_amount(cleaned.trim())
3680}
3681
3682/// True for a bare decimal amount: optional sign, ≥ 1 digit, an optional
3683/// fractional part of 1–2 digits. No exponents, no `inf`/`NaN`, no empty string.
3684fn is_plain_amount(s: &str) -> bool {
3685    let digits = s.strip_prefix(['+', '-']).unwrap_or(s);
3686    let (int_part, frac_part) = match digits.split_once('.') {
3687        Some((i, f)) => (i, Some(f)),
3688        None => (digits, None),
3689    };
3690    if int_part.is_empty() || !int_part.bytes().all(|b| b.is_ascii_digit()) {
3691        return false;
3692    }
3693    match frac_part {
3694        None => true,
3695        Some(f) => (1..=2).contains(&f.len()) && f.bytes().all(|b| b.is_ascii_digit()),
3696    }
3697}
3698
3699/// True for an http(s) URL: a recognized scheme prefix with at least one
3700/// character after it. The length guard uses the *matched* scheme's own length,
3701/// so a single-character host on the shorter `http://` scheme (`http://x`, 8
3702/// bytes — e.g. an intranet/container hostname) is accepted; a bare scheme with
3703/// nothing after it (`http://`, `https://`) is rejected.
3704fn is_url(s: &str) -> bool {
3705    let s = s.trim();
3706    for scheme in ["http://", "https://"] {
3707        if let Some(rest) = s.strip_prefix(scheme) {
3708            return !rest.is_empty();
3709        }
3710    }
3711    false
3712}
3713
3714/// A short, deterministic suggestion for a `SCHEMA_SHAPE_MISMATCH`.
3715fn shape_suggestion(shape: Shape) -> String {
3716    match shape {
3717        Shape::String => "use a scalar string".into(),
3718        Shape::Int => "use an integer".into(),
3719        Shape::Bool => "use `true` or `false`".into(),
3720        Shape::Date => "use an ISO-8601 date, e.g. 2026-05-27".into(),
3721        Shape::Email => "use a `<local>@<domain>` address".into(),
3722        Shape::Currency => "use a numeric amount, e.g. 1234.56".into(),
3723        Shape::Url => "use an http(s) URL".into(),
3724    }
3725}
3726
3727/// Suggest a full-path rewrite for a short-form wiki-link. Without the layer we
3728/// can't know the folder, so the suggestion is generic but actionable.
3729fn short_form_suggestion(bare: &str) -> Option<String> {
3730    Some(format!(
3731        "use a full store-relative path, e.g. [[records/contacts/{}]]",
3732        slugish(bare)
3733    ))
3734}
3735
3736/// A filesystem-ish leaf for a plain string (lowercase, spaces → hyphens).
3737fn slugish(s: &str) -> String {
3738    s.trim()
3739        .to_lowercase()
3740        .chars()
3741        .map(|c| if c.is_whitespace() { '-' } else { c })
3742        .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '/' || *c == '_')
3743        .collect()
3744}
3745
3746/// Cross-file asset-manifest integrity (the `--all` sweep). Text-only: it never
3747/// hashes a byte or reads an asset file's contents — byte presence and hash
3748/// correctness are `dbmd assets verify`, not `validate`, so a fresh clone with
3749/// no restored bytes still passes. Cross-checks `assets.jsonl` against every
3750/// content file's `asset`/`assets` declarations.
3751fn check_assets(store: &Store, parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
3752    use crate::assets;
3753
3754    let manifest_rel = Path::new(assets::MANIFEST_FILE);
3755    let manifest_abs = store.root.join(assets::MANIFEST_FILE);
3756
3757    // Lenient manifest read: a malformed line is reported, not fatal.
3758    let mut manifest: BTreeMap<String, assets::AssetRecord> = BTreeMap::new();
3759    if let Ok(text) = std::fs::read_to_string(&manifest_abs) {
3760        for (i, line) in text.lines().enumerate() {
3761            if line.trim().is_empty() {
3762                continue;
3763            }
3764            match serde_json::from_str::<assets::AssetRecord>(line) {
3765                Ok(rec) => {
3766                    manifest.insert(rec.path.clone(), rec);
3767                }
3768                Err(e) => push(
3769                    issues,
3770                    Severity::Error,
3771                    codes::ASSET_MANIFEST_MALFORMED,
3772                    manifest_rel,
3773                    Some((i as u32) + 1),
3774                    None,
3775                    format!("invalid {} record: {e}", assets::MANIFEST_FILE),
3776                    Some("run `dbmd assets scan` to rebuild the manifest".to_string()),
3777                    vec![],
3778                ),
3779            }
3780        }
3781    }
3782
3783    // Per-wrapper declarations: every declared asset must be in the manifest and
3784    // must not point at a markdown content file.
3785    let mut declared: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
3786    for (rel, p) in parsed {
3787        let Some(map) = &p.fm else {
3788            continue;
3789        };
3790        for decl in assets::declarations_from_yaml_map(map) {
3791            let norm = match assets::normalize_asset_path(&decl.path) {
3792                Ok(n) => n,
3793                Err(_) => continue, // a bad declared path is surfaced by `scan`, not here
3794            };
3795            declared.insert(norm.clone());
3796            let is_md = Path::new(&norm)
3797                .extension()
3798                .and_then(|e| e.to_str())
3799                .map(|e| e.eq_ignore_ascii_case("md"))
3800                .unwrap_or(false);
3801            if is_md {
3802                push(
3803                    issues,
3804                    Severity::Warning,
3805                    codes::ASSET_PATH_IS_CONTENT,
3806                    rel,
3807                    None,
3808                    Some("asset".to_string()),
3809                    format!("asset path `{norm}` points at a markdown content file"),
3810                    Some("assets are raw binaries; reference a non-markdown path".to_string()),
3811                    vec![PathBuf::from(&norm)],
3812                );
3813            }
3814            if !manifest.contains_key(&norm) {
3815                push(
3816                    issues,
3817                    Severity::Error,
3818                    codes::ASSET_UNDECLARED,
3819                    rel,
3820                    None,
3821                    Some("asset".to_string()),
3822                    format!(
3823                        "references asset `{norm}` with no record in {}",
3824                        assets::MANIFEST_FILE
3825                    ),
3826                    Some("run `dbmd assets scan` to catalog it".to_string()),
3827                    vec![PathBuf::from(&norm)],
3828                );
3829            }
3830        }
3831    }
3832
3833    // Per-record: wrapper existence + orphan detection.
3834    for (path, rec) in &manifest {
3835        for w in &rec.wrappers {
3836            if !store.root.join(w).is_file() {
3837                push(
3838                    issues,
3839                    Severity::Error,
3840                    codes::ASSET_WRAPPER_BROKEN,
3841                    Path::new(path),
3842                    None,
3843                    None,
3844                    format!("manifest record for `{path}` names a missing wrapper `{w}`"),
3845                    Some("run `dbmd assets scan` to reconcile the manifest".to_string()),
3846                    vec![PathBuf::from(w)],
3847                );
3848            }
3849        }
3850        if !declared.contains(path) {
3851            push(
3852                issues,
3853                Severity::Warning,
3854                codes::ASSET_MANIFEST_ORPHAN,
3855                Path::new(path),
3856                None,
3857                None,
3858                format!(
3859                    "`{path}` is in {} but no wrapper references it",
3860                    assets::MANIFEST_FILE
3861                ),
3862                Some("run `dbmd assets scan` to drop the orphan, or add a wrapper".to_string()),
3863                vec![],
3864            );
3865        }
3866    }
3867}
3868
3869/// Push a fully-formed [`Issue`].
3870#[allow(clippy::too_many_arguments)]
3871fn push(
3872    issues: &mut Vec<Issue>,
3873    severity: Severity,
3874    code: &'static str,
3875    file: &Path,
3876    line: Option<u32>,
3877    key: Option<String>,
3878    message: String,
3879    suggestion: Option<String>,
3880    related: Vec<PathBuf>,
3881) {
3882    issues.push(Issue {
3883        severity,
3884        code,
3885        file: file.to_path_buf(),
3886        line,
3887        key,
3888        message,
3889        suggestion,
3890        related,
3891    });
3892}
3893
3894/// 1-based line of a top-level frontmatter key inside the YAML block, offset to
3895/// the file (the YAML starts at file line 2). `None` if not found.
3896fn fm_key_line(fm_yaml: &str, key: &str) -> Option<u32> {
3897    for (i, line) in fm_yaml.lines().enumerate() {
3898        let trimmed = line.trim_start();
3899        // A top-level key line: `key:` with no leading list dash.
3900        if let Some(rest) = trimmed.strip_prefix(key) {
3901            if rest.starts_with(':') && line.starts_with(key) {
3902                // +2: file line 1 is the opening `---`, YAML line 0 → file line 2.
3903                return Some((i as u32) + 2);
3904            }
3905        }
3906    }
3907    None
3908}
3909
3910/// The line a *field-absence* issue (a required key that is missing entirely)
3911/// anchors to: the key's line when present, else line `1` — the frontmatter
3912/// block's opening `---`. A missing key has no line of its own; anchoring it to
3913/// the block top gives the agent (and the `EXPECTED` golden) a stable, non-null
3914/// line to point at instead of an unhelpful `null`.
3915fn fm_key_line_or_top(fm_yaml: &str, key: &str) -> Option<u32> {
3916    fm_key_line(fm_yaml, key).or(Some(1))
3917}
3918
3919/// A stable sort order for issues: by file, then line, then code. Keeps `--json`
3920/// output deterministic across runs.
3921fn issue_order(a: &Issue, b: &Issue) -> std::cmp::Ordering {
3922    a.file
3923        .cmp(&b.file)
3924        .then(a.line.cmp(&b.line))
3925        .then(a.code.cmp(b.code))
3926        .then(a.key.cmp(&b.key))
3927}
3928
3929// ═════════════════════════════════════════════════════════════════════════════
3930//  Tests
3931// ═════════════════════════════════════════════════════════════════════════════
3932
3933#[cfg(test)]
3934mod tests {
3935    use super::*;
3936    use crate::parser::{Config, FieldSpec};
3937    use std::fs;
3938    use tempfile::TempDir;
3939
3940    #[test]
3941    fn split_frontmatter_tolerates_leading_bom() {
3942        // Regression (finding #19 cross-module): a UTF-8 BOM before the opening
3943        // fence must not make validate treat the file as frontmatter-less while
3944        // the catalog indexes it. Pre-fix `first.trim_end() != "---"` was true
3945        // for `\u{feff}---` and the function returned None.
3946        let text = "\u{feff}---\ntype: contact\nsummary: hi\n---\nbody\n";
3947        let parsed = split_frontmatter(text);
3948        assert!(
3949            parsed.is_some(),
3950            "a leading BOM must not hide frontmatter from validate"
3951        );
3952        let (yaml, body, close_line) = parsed.unwrap();
3953        assert_eq!(yaml, "type: contact\nsummary: hi\n");
3954        assert_eq!(body, "body");
3955        assert_eq!(close_line, 4, "BOM is inline on line 1, not a new line");
3956    }
3957
3958    /// A test store builder over a real tempdir. Every helper writes real files
3959    /// so the assertions exercise real behavior, not mocks.
3960    struct Fixture {
3961        dir: TempDir,
3962        config: Config,
3963    }
3964
3965    impl Fixture {
3966        /// A fresh store with a **valid** `DB.md` (the identity contract:
3967        /// `type: db-md` + `scope` + `owner`) and the two layer dirs. A valid
3968        /// DB.md keeps `check_db_md` silent so a "clean store" fixture is truly
3969        /// clean; tests that want a broken DB.md write their own via `write`.
3970        fn new() -> Self {
3971            let dir = TempDir::new().unwrap();
3972            fs::write(
3973                dir.path().join("DB.md"),
3974                "---\ntype: db-md\nscope: company\nowner: Test\n---\n",
3975            )
3976            .unwrap();
3977            for layer in ["sources", "records"] {
3978                fs::create_dir_all(dir.path().join(layer)).unwrap();
3979            }
3980            Fixture {
3981                dir,
3982                config: Config::default(),
3983            }
3984        }
3985
3986        /// A store with no `DB.md` marker.
3987        fn bare() -> Self {
3988            let dir = TempDir::new().unwrap();
3989            Fixture {
3990                dir,
3991                config: Config::default(),
3992            }
3993        }
3994
3995        /// Write a file at a store-relative path, creating parent dirs.
3996        fn write(&self, rel: &str, contents: &str) {
3997            let abs = self.dir.path().join(rel);
3998            fs::create_dir_all(abs.parent().unwrap()).unwrap();
3999            fs::write(abs, contents).unwrap();
4000        }
4001
4002        fn store(&self) -> Store {
4003            Store {
4004                root: self.dir.path().to_path_buf(),
4005                config: self.config.clone(),
4006            }
4007        }
4008
4009        fn store_all(&self) -> Vec<Issue> {
4010            validate_all(&self.store()).unwrap()
4011        }
4012
4013        /// Write the canonical `index.md` + `index.jsonl` at every level via the
4014        /// real builder ([`crate::index::Index::rebuild_all`]) — the same
4015        /// projection a `dbmd index rebuild` produces. Use this (rather than a
4016        /// hand-typed sidecar line) whenever a test asserts a *clean* store, so
4017        /// the sidecar carries the COMPLETE per-field projection and the fixture
4018        /// can't silently drift from what the index writer emits.
4019        fn rebuild_indexes(&self) {
4020            crate::index::Index::rebuild_all(&self.store()).unwrap();
4021        }
4022    }
4023
4024    /// True if any issue has this code.
4025    fn has(issues: &[Issue], code: &str) -> bool {
4026        issues.iter().any(|i| i.code == code)
4027    }
4028
4029    /// Count issues with a code.
4030    fn count(issues: &[Issue], code: &str) -> usize {
4031        issues.iter().filter(|i| i.code == code).count()
4032    }
4033
4034    /// The first issue with a code, or panic.
4035    fn find<'a>(issues: &'a [Issue], code: &str) -> &'a Issue {
4036        issues
4037            .iter()
4038            .find(|i| i.code == code)
4039            .unwrap_or_else(|| panic!("expected an issue with code {code}; got {issues:#?}"))
4040    }
4041
4042    /// A minimal valid `contact` body for reuse.
4043    fn valid_contact(summary: &str) -> String {
4044        format!(
4045            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{summary}\"\nname: A\n---\n\n# A\n"
4046        )
4047    }
4048
4049    // ── store marker ──────────────────────────────────────────────────────────
4050
4051    #[test]
4052    fn not_a_store_when_db_md_absent() {
4053        let fx = Fixture::bare();
4054        let issues = fx.store_all();
4055        assert_eq!(issues.len(), 1, "only NOT_A_STORE expected: {issues:#?}");
4056        assert_eq!(issues[0].code, codes::NOT_A_STORE);
4057        assert!(issues[0].is_error());
4058    }
4059
4060    #[test]
4061    fn working_set_also_reports_not_a_store() {
4062        let fx = Fixture::bare();
4063        let issues = validate_working_set(&fx.store(), None).unwrap();
4064        assert!(has(&issues, codes::NOT_A_STORE));
4065    }
4066
4067    #[test]
4068    fn clean_store_has_no_issues() {
4069        let fx = Fixture::new();
4070        fx.write("records/contacts/a.md", &valid_contact("A contact"));
4071        // Build the canonical indexes (complete per-field jsonl included) the
4072        // same way `dbmd index rebuild` does, so a freshly-rebuilt store is
4073        // proven clean across every projected field, not just summary/type.
4074        fx.rebuild_indexes();
4075        let issues = fx.store_all();
4076        assert!(
4077            issues.is_empty(),
4078            "expected a clean store, got: {issues:#?}"
4079        );
4080    }
4081
4082    // ── meta-type closed enum ─────────────────────────────────────────────────
4083
4084    /// Regression (adversarial review): a NON-SCALAR `meta-type` (a YAML list or
4085    /// mapping) must be rejected with `FM_BAD_META_TYPE`, not silently slip past
4086    /// the enum check (and then get reclassified as the default `fact`). Pre-fix
4087    /// the check was gated on `and_then(scalar_string)`, which returned `None`
4088    /// for a sequence/mapping and short-circuited the whole branch.
4089    #[test]
4090    fn meta_type_enum_is_closed_for_scalars_and_non_scalars() {
4091        let fx = Fixture::new();
4092        let body = |mt: &str| {
4093            format!(
4094                "---\ntype: profile\nmeta-type: {mt}\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n"
4095            )
4096        };
4097
4098        // Valid enum members + absent (default fact) → no FM_BAD_META_TYPE.
4099        for ok in ["fact", "operational", "conclusion"] {
4100            fx.write("records/profiles/ok.md", &body(ok));
4101            let issues = validate_working_set(&fx.store(), None).unwrap();
4102            assert!(
4103                !has(&issues, codes::FM_BAD_META_TYPE),
4104                "`meta-type: {ok}` must be accepted; got {issues:#?}"
4105            );
4106        }
4107        fx.write(
4108            "records/profiles/absent.md",
4109            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n",
4110        );
4111        assert!(
4112            !has(
4113                &validate_working_set(&fx.store(), None).unwrap(),
4114                codes::FM_BAD_META_TYPE
4115            ),
4116            "an absent meta-type is the default `fact` and must be accepted"
4117        );
4118
4119        // Scalar-but-wrong, AND non-scalar (list / mapping) → FM_BAD_META_TYPE.
4120        for bad in ["xyz", "Fact", "[fact, conclusion]", "{kind: conclusion}"] {
4121            let fx2 = Fixture::new();
4122            fx2.write("records/profiles/bad.md", &body(bad));
4123            let issues = validate_working_set(&fx2.store(), None).unwrap();
4124            assert!(
4125                has(&issues, codes::FM_BAD_META_TYPE),
4126                "`meta-type: {bad}` must be rejected with FM_BAD_META_TYPE; got {issues:#?}"
4127            );
4128        }
4129    }
4130
4131    // ── DB.md structure ───────────────────────────────────────────────────────
4132
4133    /// The `Fixture::new` DB.md is valid → no `DB_MD_*` issue. This pins the
4134    /// "valid identity file is silent" half (a bug that flagged a valid DB.md
4135    /// would fail here).
4136    #[test]
4137    fn valid_db_md_emits_no_structure_issue() {
4138        let fx = Fixture::new();
4139        let issues = fx.store_all();
4140        assert!(
4141            !has(&issues, codes::DB_MD_BAD_TYPE)
4142                && !has(&issues, codes::DB_MD_MISSING_FIELD)
4143                && !has(&issues, codes::DB_MD_UNKNOWN_SECTION),
4144            "a valid DB.md (type: db-md + scope + owner, recognized sections) is silent: {issues:#?}"
4145        );
4146    }
4147
4148    /// A DB.md whose `type:` isn't `db-md` → `DB_MD_BAD_TYPE`, keyed on `type`,
4149    /// anchored to the `type:` line (file line 2). Failing to read the type, or
4150    /// accepting a non-`db-md` type, breaks this.
4151    #[test]
4152    fn db_md_wrong_type_is_error() {
4153        let fx = Fixture::new();
4154        fx.write("DB.md", "---\ntype: notes\nscope: company\nowner: T\n---\n");
4155        let issues = fx.store_all();
4156        let i = find(&issues, codes::DB_MD_BAD_TYPE);
4157        assert!(i.is_error());
4158        assert_eq!(i.file, PathBuf::from("DB.md"));
4159        assert_eq!(i.key.as_deref(), Some("type"));
4160        assert_eq!(i.line, Some(2), "anchors to the `type:` line");
4161    }
4162
4163    /// A DB.md missing `scope` and `owner` → one `DB_MD_MISSING_FIELD` per
4164    /// absent field, each keyed on its field name, anchored to the block top.
4165    #[test]
4166    fn db_md_missing_scope_and_owner_each_report() {
4167        let fx = Fixture::new();
4168        fx.write("DB.md", "---\ntype: db-md\n---\n");
4169        let issues = fx.store_all();
4170        assert_eq!(
4171            count(&issues, codes::DB_MD_MISSING_FIELD),
4172            2,
4173            "both scope and owner absent → two issues: {issues:#?}"
4174        );
4175        let keys: BTreeSet<Option<String>> = issues
4176            .iter()
4177            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
4178            .map(|i| i.key.clone())
4179            .collect();
4180        assert_eq!(
4181            keys,
4182            BTreeSet::from([Some("scope".to_string()), Some("owner".to_string())]),
4183            "one issue keyed on each missing field"
4184        );
4185        for i in issues
4186            .iter()
4187            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
4188        {
4189            assert!(i.is_error());
4190            assert_eq!(i.line, Some(1), "absent field anchors to the block top");
4191        }
4192    }
4193
4194    /// A present-but-blank required field is still missing (`DB_MD_MISSING_FIELD`),
4195    /// anchored to its own line — guarding against an "is the key textually
4196    /// present?" shortcut that would miss `owner:` with an empty value.
4197    #[test]
4198    fn db_md_blank_required_field_is_missing() {
4199        let fx = Fixture::new();
4200        fx.write(
4201            "DB.md",
4202            "---\ntype: db-md\nscope: company\nowner: \"\"\n---\n",
4203        );
4204        let issues = fx.store_all();
4205        let i = find(&issues, codes::DB_MD_MISSING_FIELD);
4206        assert_eq!(i.key.as_deref(), Some("owner"));
4207        assert_eq!(
4208            i.line,
4209            Some(4),
4210            "a present-but-empty field anchors to its line"
4211        );
4212        assert!(
4213            count(&issues, codes::DB_MD_MISSING_FIELD) == 1,
4214            "scope is present and non-empty → only owner reported"
4215        );
4216    }
4217
4218    /// An unrecognized `##` section → `DB_MD_UNKNOWN_SECTION` (warning), anchored
4219    /// to the heading's file line; the three recognized sections stay silent.
4220    #[test]
4221    fn db_md_unknown_section_is_warning() {
4222        let fx = Fixture::new();
4223        fx.write(
4224            "DB.md",
4225            // line 1 `---`, 2 type, 3 scope, 4 owner, 5 `---`, 6 blank,
4226            // 7 `## Agent instructions`, 8 blank, 9 prose, 10 blank,
4227            // 11 `## Glossary`.
4228            "---\ntype: db-md\nscope: company\nowner: T\n---\n\n## Agent instructions\n\nbe good\n\n## Glossary\n\nterms\n",
4229        );
4230        let issues = fx.store_all();
4231        let i = find(&issues, codes::DB_MD_UNKNOWN_SECTION);
4232        assert!(!i.is_error(), "unknown section is a warning, not an error");
4233        assert_eq!(i.severity, Severity::Warning);
4234        assert_eq!(
4235            i.line,
4236            Some(11),
4237            "anchors to the `## Glossary` heading line"
4238        );
4239        assert!(
4240            i.message.contains("Glossary"),
4241            "the message names the offending section: {}",
4242            i.message
4243        );
4244        // The recognized `## Agent instructions` section did NOT fire.
4245        assert_eq!(
4246            count(&issues, codes::DB_MD_UNKNOWN_SECTION),
4247            1,
4248            "only the unrecognized section is flagged: {issues:#?}"
4249        );
4250    }
4251
4252    /// A DB.md with no frontmatter at all → `DB_MD_BAD_TYPE` plus both
4253    /// `DB_MD_MISSING_FIELD`s (no provable type, no provable fields).
4254    #[test]
4255    fn db_md_no_frontmatter_reports_type_and_both_fields() {
4256        let fx = Fixture::new();
4257        fx.write("DB.md", "# just a heading, no frontmatter\n");
4258        let issues = fx.store_all();
4259        assert!(has(&issues, codes::DB_MD_BAD_TYPE));
4260        assert_eq!(count(&issues, codes::DB_MD_MISSING_FIELD), 2);
4261    }
4262
4263    // ── frontmatter ─────────────────────────────────────────────────────────
4264
4265    #[test]
4266    fn missing_type_is_error() {
4267        let fx = Fixture::new();
4268        fx.write(
4269            "records/contacts/a.md",
4270            "---\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\n# A\n",
4271        );
4272        let issues = fx.store_all();
4273        assert!(has(&issues, codes::FM_MISSING_TYPE));
4274        assert!(find(&issues, codes::FM_MISSING_TYPE).is_error());
4275    }
4276
4277    #[test]
4278    fn missing_universal_timestamps_are_errors_on_content_files() {
4279        let fx = Fixture::new();
4280        fx.write(
4281            "records/contacts/a.md",
4282            "---\ntype: contact\nsummary: x\nname: A\n---\n\n# A\n",
4283        );
4284        let issues = fx.store_all();
4285
4286        let missing_created = find(&issues, codes::FM_MISSING_CREATED);
4287        assert_eq!(missing_created.key.as_deref(), Some("created"));
4288        assert!(missing_created.is_error());
4289
4290        let missing_updated = find(&issues, codes::FM_MISSING_UPDATED);
4291        assert_eq!(missing_updated.key.as_deref(), Some("updated"));
4292        assert!(missing_updated.is_error());
4293    }
4294
4295    #[test]
4296    fn meta_files_do_not_require_universal_timestamps() {
4297        let fx = Fixture::new();
4298        let issues = fx.store_all();
4299
4300        assert!(
4301            !has(&issues, codes::FM_MISSING_CREATED),
4302            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
4303        );
4304        assert!(
4305            !has(&issues, codes::FM_MISSING_UPDATED),
4306            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
4307        );
4308    }
4309
4310    #[test]
4311    fn content_file_with_no_frontmatter_block_reports_type_and_summary() {
4312        let fx = Fixture::new();
4313        fx.write(
4314            "records/profiles/a.md",
4315            "# Just a heading\n\nNo frontmatter here.\n",
4316        );
4317        let issues = fx.store_all();
4318        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
4319        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4320    }
4321
4322    #[test]
4323    fn content_file_with_empty_frontmatter_reports_type_and_summary() {
4324        let fx = Fixture::new();
4325        fx.write("records/profiles/a.md", "---\n---\n\nbody\n");
4326        let issues = fx.store_all();
4327        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
4328        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4329    }
4330
4331    #[test]
4332    fn malformed_yaml_is_error_and_suppresses_field_checks() {
4333        let fx = Fixture::new();
4334        // A tab inside a mapping value is invalid YAML.
4335        fx.write(
4336            "records/contacts/a.md",
4337            "---\ntype: contact\n  bad: : : :\n: : nope\n---\n\nbody\n",
4338        );
4339        let issues = fx.store_all();
4340        let issue = find(&issues, codes::FM_MALFORMED_YAML);
4341        assert!(issue.is_error());
4342        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4343        // When YAML doesn't parse we don't *also* claim the summary is missing;
4344        // the agent fixes the YAML first.
4345        assert!(
4346            !has(&issues, codes::SUMMARY_MISSING),
4347            "malformed YAML should suppress SUMMARY_MISSING: {issues:#?}"
4348        );
4349    }
4350
4351    #[test]
4352    fn bad_created_timestamp_is_error() {
4353        let fx = Fixture::new();
4354        fx.write(
4355            "records/contacts/a.md",
4356            "---\ntype: contact\ncreated: not-a-date\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
4357        );
4358        let issues = fx.store_all();
4359        let issue = find(&issues, codes::FM_BAD_TIMESTAMP);
4360        assert_eq!(issue.key.as_deref(), Some("created"));
4361        assert!(issue.is_error());
4362    }
4363
4364    #[test]
4365    fn date_only_created_is_rejected_but_type_date_field_accepted() {
4366        let fx = Fixture::new();
4367        // `created` must be a full RFC3339 datetime → a date-only value is bad.
4368        // `last_touch` is a type-specific date field → date-only is fine.
4369        fx.write(
4370            "records/contacts/a.md",
4371            "---\ntype: contact\ncreated: 2026-05-22\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\nlast_touch: 2026-05-22\n---\n\n# A\n",
4372        );
4373        let issues = fx.store_all();
4374        let created_issues: Vec<_> = issues
4375            .iter()
4376            .filter(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created"))
4377            .collect();
4378        assert_eq!(
4379            created_issues.len(),
4380            1,
4381            "date-only `created` must fail: {issues:#?}"
4382        );
4383        assert!(
4384            !issues.iter().any(
4385                |i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("last_touch")
4386            ),
4387            "date-only `last_touch` is valid: {issues:#?}"
4388        );
4389    }
4390
4391    // ── summary ─────────────────────────────────────────────────────────────
4392
4393    #[test]
4394    fn summary_missing_empty_multiline_toolong() {
4395        let fx = Fixture::new();
4396        fx.write(
4397            "records/profiles/missing.md",
4398            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\n---\n\nbody\n",
4399        );
4400        fx.write(
4401            "records/profiles/empty.md",
4402            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"   \"\n---\n\nbody\n",
4403        );
4404        let long = "x".repeat(201);
4405        fx.write(
4406            "records/profiles/long.md",
4407            &format!("---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{long}\"\n---\n\nbody\n"),
4408        );
4409        let issues = fx.store_all();
4410        assert!(has(&issues, codes::SUMMARY_MISSING));
4411        assert_eq!(
4412            find(&issues, codes::SUMMARY_MISSING).file,
4413            PathBuf::from("records/profiles/missing.md")
4414        );
4415        assert!(has(&issues, codes::SUMMARY_EMPTY));
4416        assert!(has(&issues, codes::SUMMARY_TOO_LONG));
4417        assert_eq!(
4418            find(&issues, codes::SUMMARY_TOO_LONG).severity,
4419            Severity::Warning
4420        );
4421    }
4422
4423    #[test]
4424    fn summary_multiline_via_yaml_block_scalar() {
4425        let fx = Fixture::new();
4426        // A literal block scalar produces a value with a newline.
4427        fx.write(
4428            "records/profiles/a.md",
4429            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: |\n  line one\n  line two\n---\n\nbody\n",
4430        );
4431        let issues = fx.store_all();
4432        assert!(has(&issues, codes::SUMMARY_MULTILINE), "{issues:#?}");
4433    }
4434
4435    #[test]
4436    fn summary_exactly_200_chars_is_ok() {
4437        let fx = Fixture::new();
4438        let s = "y".repeat(200);
4439        fx.write(
4440            "records/profiles/a.md",
4441            &format!("---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{s}\"\n---\n\nbody\n"),
4442        );
4443        let issues = fx.store_all();
4444        assert!(
4445            !has(&issues, codes::SUMMARY_TOO_LONG),
4446            "200 is the bound, inclusive: {issues:#?}"
4447        );
4448    }
4449
4450    #[test]
4451    fn meta_files_need_no_summary() {
4452        let fx = Fixture::new();
4453        // The root/layer/type indexes + log carry no summary and must not be
4454        // flagged. (A lone DB.md store with one contact and full indexes.)
4455        fx.write("records/contacts/a.md", &valid_contact("A contact"));
4456        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n# I\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4457        fx.write(
4458            "records/index.md",
4459            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4460        );
4461        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — A contact\n");
4462        fx.write(
4463            "records/contacts/index.jsonl",
4464            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"A contact\"}\n",
4465        );
4466        fx.write("log.md", "---\ntype: log\n---\n\n# Log\n");
4467        let issues = fx.store_all();
4468        assert!(!has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4469    }
4470
4471    // ── tags ────────────────────────────────────────────────────────────────
4472
4473    #[test]
4474    fn nested_tags_warns_flat_tags_ok() {
4475        let fx = Fixture::new();
4476        fx.write(
4477            "records/contacts/nested.md",
4478            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags:\n  - good\n  - [nested, list]\n---\n\n# A\n",
4479        );
4480        fx.write(
4481            "records/contacts/flat.md",
4482            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags: [customer, vip]\n---\n\n# A\n",
4483        );
4484        let issues = fx.store_all();
4485        let tag_issues: Vec<_> = issues
4486            .iter()
4487            .filter(|i| i.code == codes::TAGS_MALFORMED)
4488            .collect();
4489        assert_eq!(
4490            tag_issues.len(),
4491            1,
4492            "only the nested-tags file should warn: {issues:#?}"
4493        );
4494        assert_eq!(
4495            tag_issues[0].file,
4496            PathBuf::from("records/contacts/nested.md")
4497        );
4498        assert_eq!(tag_issues[0].severity, Severity::Warning);
4499    }
4500
4501    // ── wiki-links ────────────────────────────────────────────────────────────
4502
4503    #[test]
4504    fn short_form_wiki_link_is_error() {
4505        let fx = Fixture::new();
4506        let mut body = valid_contact("links to a short form");
4507        body.push_str("\nSee [[sarah-chen]] for details.\n");
4508        fx.write("records/contacts/a.md", &body);
4509        let issues = fx.store_all();
4510        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
4511        assert!(issue.is_error());
4512        assert!(issue.message.contains("sarah-chen"));
4513        // A short-form link must NOT also be reported broken — fix the form first.
4514        assert!(
4515            !issues
4516                .iter()
4517                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.message.contains("sarah-chen")),
4518            "short-form should suppress broken: {issues:#?}"
4519        );
4520    }
4521
4522    #[test]
4523    fn broken_full_path_wiki_link_is_error() {
4524        let fx = Fixture::new();
4525        let mut body = valid_contact("links to a missing file");
4526        body.push_str("\nSee [[records/contacts/ghost]].\n");
4527        fx.write("records/contacts/a.md", &body);
4528        let issues = fx.store_all();
4529        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
4530        assert!(issue.is_error());
4531        assert!(issue.message.contains("records/contacts/ghost"));
4532        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4533    }
4534
4535    #[test]
4536    fn traversal_full_path_wiki_link_is_rejected_before_probe() {
4537        let fx = Fixture::new();
4538        let mut body = valid_contact("links with traversal");
4539        body.push_str("\nSee [[records/contacts/../../ghost]].\n");
4540        fx.write("records/contacts/a.md", &body);
4541        let issues = fx.store_all();
4542        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
4543        assert!(issue.message.contains("not a safe store-relative path"));
4544        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4545    }
4546
4547    #[test]
4548    fn valid_full_path_wiki_link_passes() {
4549        let fx = Fixture::new();
4550        fx.write("records/contacts/target.md", &valid_contact("target"));
4551        let mut body = valid_contact("links to target");
4552        body.push_str("\nSee [[records/contacts/target]].\n");
4553        fx.write("records/contacts/a.md", &body);
4554        let issues = fx.store_all();
4555        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4556        assert!(!has(&issues, codes::WIKI_LINK_SHORT_FORM), "{issues:#?}");
4557    }
4558
4559    #[test]
4560    fn md_extension_wiki_link_warns_and_resolves() {
4561        let fx = Fixture::new();
4562        fx.write("records/contacts/target.md", &valid_contact("target"));
4563        let mut body = valid_contact("links with extension");
4564        body.push_str("\nSee [[records/contacts/target.md]].\n");
4565        fx.write("records/contacts/a.md", &body);
4566        let issues = fx.store_all();
4567        let issue = find(&issues, codes::WIKI_LINK_HAS_EXTENSION);
4568        assert_eq!(issue.severity, Severity::Warning);
4569        assert_eq!(
4570            issue.suggestion.as_deref(),
4571            Some("drop the extension: [[records/contacts/target]]")
4572        );
4573        // The target exists once `.md` is stripped → not broken.
4574        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4575    }
4576
4577    #[test]
4578    fn wiki_links_in_code_fences_are_ignored() {
4579        let fx = Fixture::new();
4580        let mut body = valid_contact("has a fenced example");
4581        body.push_str("\n```\n[[sarah-chen]]\n```\n");
4582        fx.write("records/contacts/a.md", &body);
4583        let issues = fx.store_all();
4584        assert!(
4585            !has(&issues, codes::WIKI_LINK_SHORT_FORM),
4586            "fenced wiki-links must be ignored: {issues:#?}"
4587        );
4588    }
4589
4590    #[test]
4591    fn flow_form_link_list_in_frontmatter_is_error() {
4592        let fx = Fixture::new();
4593        fx.write(
4594            "records/meetings/m.md",
4595            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees: [[[records/contacts/a]], [[records/contacts/b]]]\n---\n\n# M\n",
4596        );
4597        let issues = fx.store_all();
4598        let issue = find(&issues, codes::WIKI_LINK_FLOW_FORM_LIST);
4599        assert!(issue.is_error());
4600        assert_eq!(issue.key.as_deref(), Some("attendees"));
4601    }
4602
4603    #[test]
4604    fn block_form_link_list_in_frontmatter_is_not_flow_form() {
4605        let fx = Fixture::new();
4606        fx.write("records/contacts/a.md", &valid_contact("a"));
4607        fx.write("records/contacts/b.md", &valid_contact("b"));
4608        fx.write(
4609            "records/meetings/m.md",
4610            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees:\n  - [[records/contacts/a]]\n  - [[records/contacts/b]]\n---\n\n# M\n",
4611        );
4612        let issues = fx.store_all();
4613        assert!(
4614            !has(&issues, codes::WIKI_LINK_FLOW_FORM_LIST),
4615            "{issues:#?}"
4616        );
4617        // Block-form link targets are still integrity-checked (both exist here).
4618        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4619    }
4620
4621    #[test]
4622    fn frontmatter_short_form_link_field_is_error() {
4623        let fx = Fixture::new();
4624        // `related` is a *custom* (non-schema) wiki-link field, so it goes
4625        // through the generic doctrine path → a short form is WIKI_LINK_SHORT_FORM.
4626        fx.write(
4627            "records/synthesis/a.md",
4628            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: \"[[sarah-chen]]\"\n---\n\n# A\n",
4629        );
4630        let issues = fx.store_all();
4631        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
4632        assert!(issue.is_error());
4633        assert_eq!(issue.key.as_deref(), Some("related"));
4634    }
4635
4636    #[test]
4637    fn unquoted_frontmatter_link_is_recognized() {
4638        // An UNQUOTED `[[...]]` parses in YAML as a nested sequence, not a
4639        // string. The validator must still see it as a wiki-link (text-based
4640        // extraction). A short-form custom field must report SHORT_FORM, and a
4641        // full-path one with a missing target must report BROKEN.
4642        let fx = Fixture::new();
4643        fx.write(
4644            "records/synthesis/short.md",
4645            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[sarah-chen]]\n---\n\n# A\n",
4646        );
4647        fx.write(
4648            "records/synthesis/broken.md",
4649            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[records/contacts/ghost]]\n---\n\n# A\n",
4650        );
4651        let issues = fx.store_all();
4652        assert!(
4653            issues.iter().any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
4654                && i.file == Path::new("records/synthesis/short.md")
4655                && i.key.as_deref() == Some("related")),
4656            "unquoted short-form frontmatter link must be caught: {issues:#?}"
4657        );
4658        assert!(
4659            issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN
4660                && i.file == Path::new("records/synthesis/broken.md")),
4661            "unquoted full-path frontmatter link to a missing file must be caught: {issues:#?}"
4662        );
4663    }
4664
4665    #[test]
4666    fn short_form_in_declared_link_field_is_prefix_mismatch_not_double_reported() {
4667        // A short-form value in a *declared* link field (a `### contact` schema
4668        // with `company link to records/companies/`) is SCHEMA_LINK_PREFIX_MISMATCH
4669        // (the target isn't under the prefix), and must NOT also be reported as a
4670        // bare WIKI_LINK_SHORT_FORM — the schema path owns that field once.
4671        let mut fx = Fixture::new();
4672        fx.config.schemas.insert(
4673            "contact".into(),
4674            Schema {
4675                fields: vec![FieldSpec {
4676                    name: "company".into(),
4677                    link_prefix: Some(PathBuf::from("records/companies")),
4678                    ..Default::default()
4679                }],
4680                ..Default::default()
4681            },
4682        );
4683        fx.write(
4684            "records/contacts/a.md",
4685            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[northstar]]\"\n---\n\n# A\n",
4686        );
4687        let issues = fx.store_all();
4688        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
4689        assert_eq!(issue.key.as_deref(), Some("company"));
4690        // The same link must NOT also be double-reported via the generic path.
4691        assert!(
4692            !issues
4693                .iter()
4694                .any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
4695                    && i.key.as_deref() == Some("company")),
4696            "schema link fields are checked once, by the schema path: {issues:#?}"
4697        );
4698    }
4699
4700    #[test]
4701    fn schema_link_field_with_md_extension_still_warns() {
4702        let mut fx = Fixture::new();
4703        fx.config.schemas.insert(
4704            "contact".into(),
4705            Schema {
4706                fields: vec![FieldSpec {
4707                    name: "company".into(),
4708                    link_prefix: Some(PathBuf::from("records/companies")),
4709                    ..Default::default()
4710                }],
4711                ..Default::default()
4712            },
4713        );
4714        fx.write(
4715            "records/companies/acme.md",
4716            "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: Acme\nname: Acme\n---\n\n# Acme\n",
4717        );
4718        fx.write(
4719            "records/contacts/a.md",
4720            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[records/companies/acme.md]]\"\n---\n\n# A\n",
4721        );
4722        let issues = fx.store_all();
4723        let issue = issues
4724            .iter()
4725            .find(|i| {
4726                i.code == codes::WIKI_LINK_HAS_EXTENSION && i.key.as_deref() == Some("company")
4727            })
4728            .unwrap_or_else(|| panic!("schema link extension warning missing: {issues:#?}"));
4729        assert_eq!(issue.severity, Severity::Warning);
4730        assert!(
4731            !issues
4732                .iter()
4733                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.key.as_deref() == Some("company")),
4734            "extensionless existence check should still find acme.md: {issues:#?}"
4735        );
4736    }
4737
4738    // ── schema: explicit DB.md schema (required / shape / enum) ───────────────
4739
4740    #[test]
4741    fn explicit_schema_required_shape_enum() {
4742        let fx = {
4743            let mut fx = Fixture::new();
4744            // contact schema: name required, email required+email shape,
4745            // status enum: active|inactive
4746            let schema = Schema {
4747                fields: vec![
4748                    FieldSpec {
4749                        name: "name".into(),
4750                        required: true,
4751                        ..Default::default()
4752                    },
4753                    FieldSpec {
4754                        name: "email".into(),
4755                        required: true,
4756                        shape: Some(Shape::Email),
4757                        ..Default::default()
4758                    },
4759                    FieldSpec {
4760                        name: "status".into(),
4761                        enum_values: Some(vec!["active".into(), "inactive".into()]),
4762                        ..Default::default()
4763                    },
4764                ],
4765                ..Default::default()
4766            };
4767            fx.config.schemas.insert("contact".into(), schema);
4768            fx
4769        };
4770        fx.write(
4771            "records/contacts/a.md",
4772            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nemail: not-an-email\nstatus: archived\n---\n\n# A\n",
4773        );
4774        let issues = fx.store_all();
4775        // name absent → MISSING_REQUIRED
4776        assert!(
4777            issues
4778                .iter()
4779                .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
4780                    && i.key.as_deref() == Some("name")),
4781            "{issues:#?}"
4782        );
4783        // email malformed → SHAPE_MISMATCH
4784        assert!(
4785            issues.iter().any(
4786                |i| i.code == codes::SCHEMA_SHAPE_MISMATCH && i.key.as_deref() == Some("email")
4787            ),
4788            "{issues:#?}"
4789        );
4790        // status archived not in enum → ENUM_VIOLATION
4791        assert!(
4792            issues
4793                .iter()
4794                .any(|i| i.code == codes::SCHEMA_ENUM_VIOLATION
4795                    && i.key.as_deref() == Some("status")),
4796            "{issues:#?}"
4797        );
4798    }
4799
4800    #[test]
4801    fn schema_without_link_field_allows_plain_value() {
4802        // A `contact` schema with no `company` link field means a plain `company`
4803        // string is fine — schema enforcement is exactly what the store declares,
4804        // nothing implicit.
4805        let mut fx = Fixture::new();
4806        fx.config.schemas.insert(
4807            "contact".into(),
4808            Schema {
4809                fields: vec![FieldSpec {
4810                    name: "name".into(),
4811                    required: true,
4812                    ..Default::default()
4813                }],
4814                ..Default::default()
4815            },
4816        );
4817        fx.write(
4818            "records/contacts/a.md",
4819            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
4820        );
4821        let issues = fx.store_all();
4822        assert!(
4823            !has(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH),
4824            "no declared link field for `company` → a plain value is fine: {issues:#?}"
4825        );
4826    }
4827
4828    #[test]
4829    fn schema_link_field_plain_value_is_prefix_mismatch() {
4830        // The surviving link-enforcement path: a declared `link to <prefix>/`
4831        // field with a plain-string value is SCHEMA_LINK_PREFIX_MISMATCH.
4832        let mut fx = Fixture::new();
4833        fx.config.schemas.insert(
4834            "contact".into(),
4835            Schema {
4836                fields: vec![FieldSpec {
4837                    name: "company".into(),
4838                    link_prefix: Some(PathBuf::from("records/companies")),
4839                    ..Default::default()
4840                }],
4841                ..Default::default()
4842            },
4843        );
4844        fx.write(
4845            "records/contacts/a.md",
4846            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
4847        );
4848        let issues = fx.store_all();
4849        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
4850        assert_eq!(issue.key.as_deref(), Some("company"));
4851        assert!(issue
4852            .suggestion
4853            .as_deref()
4854            .unwrap()
4855            .contains("records/companies/"));
4856    }
4857
4858    #[test]
4859    fn schema_shape_int_and_url_and_currency() {
4860        let mut fx = Fixture::new();
4861        fx.config.schemas.insert(
4862            "widget".into(),
4863            Schema {
4864                fields: vec![
4865                    FieldSpec {
4866                        name: "qty".into(),
4867                        shape: Some(Shape::Int),
4868                        ..Default::default()
4869                    },
4870                    FieldSpec {
4871                        name: "site".into(),
4872                        shape: Some(Shape::Url),
4873                        ..Default::default()
4874                    },
4875                    FieldSpec {
4876                        name: "price".into(),
4877                        shape: Some(Shape::Currency),
4878                        ..Default::default()
4879                    },
4880                ],
4881                ..Default::default()
4882            },
4883        );
4884        // `USD 100` is the corpus-realistic shape (an `expense.currency`-style
4885        // ISO code + amount). It must pass — it used to spuriously fail.
4886        fx.write(
4887            "records/widgets/ok.md",
4888            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nqty: 5\nsite: https://example.com\nprice: \"USD 1,234.50\"\n---\n\n# ok\n",
4889        );
4890        // `free` is non-numeric; `inf`/`NaN`/3-decimal used to slip through
4891        // because the old impl leaned on `f64::parse`. `price: inf` here guards
4892        // the under-rejection half of the finding.
4893        fx.write(
4894            "records/widgets/bad.md",
4895            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nqty: five\nsite: ftp://nope\nprice: inf\n---\n\n# bad\n",
4896        );
4897        let issues = fx.store_all();
4898        let bad_shape: Vec<_> = issues
4899            .iter()
4900            .filter(|i| {
4901                i.code == codes::SCHEMA_SHAPE_MISMATCH
4902                    && i.file == Path::new("records/widgets/bad.md")
4903            })
4904            .map(|i| i.key.clone().unwrap_or_default())
4905            .collect();
4906        assert!(bad_shape.contains(&"qty".to_string()), "{issues:#?}");
4907        assert!(bad_shape.contains(&"site".to_string()), "{issues:#?}");
4908        assert!(
4909            bad_shape.contains(&"price".to_string()),
4910            "inf must be rejected as currency: {issues:#?}"
4911        );
4912        assert!(
4913            !issues.iter().any(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH
4914                && i.file == Path::new("records/widgets/ok.md")),
4915            "valid shapes (incl. `USD 1,234.50`) must not fire: {issues:#?}"
4916        );
4917    }
4918
4919    #[test]
4920    fn schema_shape_or_enum_field_with_non_scalar_value_is_shape_mismatch() {
4921        let mut fx = Fixture::new();
4922        fx.config.schemas.insert(
4923            "contact".into(),
4924            Schema {
4925                fields: vec![
4926                    FieldSpec {
4927                        name: "email".into(),
4928                        required: true,
4929                        shape: Some(Shape::Email),
4930                        ..Default::default()
4931                    },
4932                    FieldSpec {
4933                        name: "status".into(),
4934                        enum_values: Some(vec!["active".into(), "inactive".into()]),
4935                        ..Default::default()
4936                    },
4937                ],
4938                ..Default::default()
4939            },
4940        );
4941        // A required EMAIL field and an ENUM field, each holding a LIST. Both
4942        // used to slip through entirely (`scalar_string` → None → the shape and
4943        // enum bodies silently no-op); now they flag SCHEMA_SHAPE_MISMATCH.
4944        fx.write(
4945            "records/contacts/bad.md",
4946            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nemail:\n  - a@b.com\n  - c@d.com\nstatus:\n  - active\n---\n\n# bad\n",
4947        );
4948        let issues = fx.store_all();
4949        let mismatched: Vec<_> = issues
4950            .iter()
4951            .filter(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH)
4952            .map(|i| i.key.clone().unwrap_or_default())
4953            .collect();
4954        assert!(
4955            mismatched.contains(&"email".to_string()),
4956            "list-valued required email must flag: {issues:#?}"
4957        );
4958        assert!(
4959            mismatched.contains(&"status".to_string()),
4960            "list-valued enum must flag: {issues:#?}"
4961        );
4962    }
4963
4964    #[test]
4965    fn is_currency_accepts_codes_and_rejects_non_numeric() {
4966        // Symbols and 3-letter ISO codes both strip; plain numbers pass.
4967        for ok in [
4968            "100",
4969            "1234.56",
4970            "$1,234.50",
4971            "USD 100", // the finding's headline probe — used to be false
4972            "usd 100", // case-insensitive code
4973            "EUR 9.50",
4974            "£12",
4975            "¥1000",
4976            "-5.00", // signed amounts are real (refunds)
4977            "+5",
4978            "1,000,000",
4979        ] {
4980            assert!(is_currency(ok), "expected currency: {ok:?}");
4981        }
4982        // Non-numeric floats `f64::parse` would accept, and the > 2-decimal /
4983        // bare-code / exponent cases the docstring forbids.
4984        for bad in [
4985            "inf", "-inf", "infinity", "NaN", "nan",    // f64 accepts these; we must not
4986            "12.999", // 3 decimals
4987            "1.2345", // 4 decimals
4988            "USD",    // bare code, no amount
4989            "$",      // bare symbol
4990            "free", "", " ", "1e3",      // exponent form
4991            "1.",       // trailing dot, no fractional digits
4992            ".5",       // leading dot, no integer digits
4993            "1 000",    // space as separator is not a thousands separator
4994            "USDD 100", // 4-letter "code" must not strip
4995        ] {
4996            assert!(!is_currency(bad), "expected NOT currency: {bad:?}");
4997        }
4998    }
4999
5000    // ── policies ───────────────────────────────────────────────────────────
5001
5002    #[test]
5003    fn ignored_type_present_is_info() {
5004        let mut fx = Fixture::new();
5005        fx.config.ignored_types.push("temp".into());
5006        fx.write(
5007            "records/temps/x.md",
5008            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
5009        );
5010        let issues = fx.store_all();
5011        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_PRESENT);
5012        assert_eq!(issue.severity, Severity::Info);
5013        assert!(!issue.is_error());
5014        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
5015    }
5016
5017    #[test]
5018    fn conclusion_record_derived_from_ignored_type_warns() {
5019        let mut fx = Fixture::new();
5020        fx.config.ignored_types.push("temp".into());
5021        fx.write(
5022            "records/temps/x.md",
5023            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
5024        );
5025        // The policy now gates on `meta-type: conclusion` (not the retired
5026        // `type: wiki-page`): a conclusion record that derives from an
5027        // ignored-type record warns.
5028        fx.write(
5029            "records/synthesis/t.md",
5030            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: derived\nderived_from: \"[[records/temps/x]]\"\n---\n\n# t\n",
5031        );
5032        let issues = fx.store_all();
5033        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_DERIVED);
5034        assert_eq!(issue.severity, Severity::Warning);
5035        assert_eq!(issue.key.as_deref(), Some("derived_from"));
5036        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
5037    }
5038
5039    /// The shared `derived_from_ignored_type` entry point — the single
5040    /// policy-decision both `dbmd validate` (read) and `dbmd write` (write-time
5041    /// warning) now route through, so they cannot diverge. This pins its
5042    /// contract directly: the meta-type gate (now `meta-type: conclusion`, not
5043    /// the retired `type: wiki-page`), the empty-ignored-types gate, a positive
5044    /// match carrying the resolved target type, and a non-ignored target
5045    /// rejected.
5046    #[test]
5047    fn derived_from_ignored_type_is_the_shared_policy_decision() {
5048        let mut fx = Fixture::new();
5049        fx.config.ignored_types.push("secret".into());
5050        // An ignored-type record …
5051        fx.write(
5052            "records/secrets/s.md",
5053            "---\ntype: secret\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: hush\n---\n\n# s\n",
5054        );
5055        // … and a non-ignored record.
5056        fx.write(
5057            "records/contacts/c.md",
5058            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nname: C\n---\n\n# c\n",
5059        );
5060        let store = fx.store();
5061
5062        // Positive: a conclusion record deriving from the ignored-type record
5063        // matches, and the hit carries both the target (as written) and its
5064        // resolved type.
5065        let hit =
5066            derived_from_ignored_type(&store, "conclusion", std::iter::once("records/secrets/s"))
5067                .expect("conclusion → ignored-type record must match");
5068        assert_eq!(hit.target, "records/secrets/s");
5069        assert_eq!(hit.target_type, "secret");
5070
5071        // Meta-type gate: a non-`conclusion` meta-type never triggers, even with
5072        // the same ignored-type target.
5073        assert_eq!(
5074            derived_from_ignored_type(&store, "fact", std::iter::once("records/secrets/s")),
5075            None,
5076            "only conclusion derivation is policed"
5077        );
5078
5079        // Target gate: a conclusion deriving from a non-ignored record is fine.
5080        assert_eq!(
5081            derived_from_ignored_type(&store, "conclusion", std::iter::once("records/contacts/c")),
5082            None,
5083            "deriving from a non-ignored type is allowed"
5084        );
5085
5086        // First match wins across multiple targets (here the second is the hit).
5087        let hit = derived_from_ignored_type(
5088            &store,
5089            "conclusion",
5090            ["records/contacts/c", "records/secrets/s"],
5091        )
5092        .expect("a later ignored-type target must still be found");
5093        assert_eq!(hit.target, "records/secrets/s");
5094
5095        // Empty-policy gate: with no `### Ignored types`, nothing is policed.
5096        fx.config.ignored_types.clear();
5097        let store = fx.store();
5098        assert_eq!(
5099            derived_from_ignored_type(&store, "conclusion", std::iter::once("records/secrets/s")),
5100            None,
5101            "an empty ignored-types policy short-circuits"
5102        );
5103    }
5104
5105    // ── duplicates ───────────────────────────────────────────────────────────
5106
5107    #[test]
5108    fn dup_id_is_hard_error_with_related() {
5109        let fx = Fixture::new();
5110        fx.write(
5111            "records/contacts/a.md",
5112            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
5113        );
5114        fx.write(
5115            "records/contacts/b.md",
5116            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
5117        );
5118        let issues = fx.store_all();
5119        // Reporting rule #1: ONE issue per collision group, keyed on the
5120        // lexicographically smallest path (`a.md`), partner in `related`.
5121        assert_eq!(
5122            count(&issues, codes::DUP_ID),
5123            1,
5124            "one issue per group: {issues:#?}"
5125        );
5126        let a = issues.iter().find(|i| i.code == codes::DUP_ID).unwrap();
5127        assert_eq!(a.file, PathBuf::from("records/contacts/a.md"));
5128        assert!(a.is_error());
5129        assert_eq!(a.key.as_deref(), Some("id"));
5130        assert_eq!(
5131            a.line,
5132            Some(3),
5133            "anchors to the `id` line on the reported file"
5134        );
5135        assert_eq!(a.related, vec![PathBuf::from("records/contacts/b.md")]);
5136    }
5137
5138    #[test]
5139    fn dup_id_not_fired_in_working_set() {
5140        // DUP_* is an --all-only cross-file check; the working set must not run it.
5141        let fx = Fixture::new();
5142        fx.write(
5143            "records/contacts/a.md",
5144            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
5145        );
5146        fx.write(
5147            "records/contacts/b.md",
5148            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
5149        );
5150        // Log says both changed since epoch, so they're in the working set.
5151        fx.write(
5152            "log.md",
5153            "---\ntype: log\n---\n\n## [2026-05-22 10:00] create | records/contacts/a\nx\n\n## [2026-05-22 10:01] create | records/contacts/b\nx\n",
5154        );
5155        let issues = validate_working_set(&fx.store(), None).unwrap();
5156        assert!(
5157            !has(&issues, codes::DUP_ID),
5158            "DUP_ID is --all only: {issues:#?}"
5159        );
5160    }
5161
5162    #[test]
5163    fn dup_unique_key_single_field_is_warning() {
5164        let mut fx = Fixture::new();
5165        // contact declares `- unique: email`.
5166        fx.config.schemas.insert(
5167            "contact".into(),
5168            Schema {
5169                unique_keys: vec![vec!["email".into()]],
5170                ..Default::default()
5171            },
5172        );
5173        for (f, name) in [("a", "A"), ("b", "B")] {
5174            fx.write(
5175                &format!("records/contacts/{f}.md"),
5176                &format!("---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: s\nname: {name}\nemail: dup@x.com\n---\n\n# {name}\n"),
5177            );
5178        }
5179        let issues = fx.store_all();
5180        // One issue per group (rule #1), keyed on the smallest path, anchored to
5181        // the single `email` field.
5182        assert_eq!(count(&issues, codes::DUP_UNIQUE_KEY), 1);
5183        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
5184        assert_eq!(dup.severity, Severity::Warning);
5185        assert_eq!(dup.file, PathBuf::from("records/contacts/a.md"));
5186        assert_eq!(dup.key.as_deref(), Some("email"));
5187        assert_eq!(dup.related, vec![PathBuf::from("records/contacts/b.md")]);
5188    }
5189
5190    #[test]
5191    fn dup_unique_key_compound_and_clean_when_one_field_differs() {
5192        let mut fx = Fixture::new();
5193        // expense declares `- unique: date, amount, vendor` (a compound key).
5194        fx.config.schemas.insert(
5195            "expense".into(),
5196            Schema {
5197                unique_keys: vec![vec!["date".into(), "amount".into(), "vendor".into()]],
5198                ..Default::default()
5199            },
5200        );
5201        fx.write("records/companies/acme.md", "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: c\nname: Acme\n---\n# A\n");
5202        let exp = |f: &str, amount: &str| {
5203            format!(
5204            "---\ntype: expense\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: e\ndate: 2026-05-01\namount: {amount}\nvendor: \"[[records/companies/acme]]\"\n---\n\n# {f}\n"
5205        )
5206        };
5207        fx.write("records/expenses/e1.md", &exp("e1", "100"));
5208        fx.write("records/expenses/e2.md", &exp("e2", "100"));
5209        fx.write("records/expenses/e3.md", &exp("e3", "200")); // different amount
5210        let issues = fx.store_all();
5211        // One issue for the e1+e2 group (rule #1), keyed on the smallest path
5212        // (e1) with e2 in `related`; e3 differs on amount and never appears.
5213        assert_eq!(
5214            count(&issues, codes::DUP_UNIQUE_KEY),
5215            1,
5216            "only e1+e2 collide, one issue: {issues:#?}"
5217        );
5218        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
5219        assert_eq!(dup.file, PathBuf::from("records/expenses/e1.md"));
5220        assert_eq!(
5221            dup.line,
5222            Some(1),
5223            "compound-key collision anchors to line 1"
5224        );
5225        assert_eq!(dup.related, vec![PathBuf::from("records/expenses/e2.md")]);
5226        assert!(
5227            !issues.iter().any(|i| i.code == codes::DUP_UNIQUE_KEY
5228                && i.related.contains(&PathBuf::from("records/expenses/e3.md"))),
5229            "e3 differs on amount and must not collide: {issues:#?}"
5230        );
5231    }
5232
5233    #[test]
5234    fn dup_unique_key_list_field_is_order_independent() {
5235        let mut fx = Fixture::new();
5236        // meeting declares `- unique: date, attendees`; the list field is a set.
5237        fx.config.schemas.insert(
5238            "meeting".into(),
5239            Schema {
5240                unique_keys: vec![vec!["date".into(), "attendees".into()]],
5241                ..Default::default()
5242            },
5243        );
5244        fx.write("records/contacts/a.md", &valid_contact("a"));
5245        fx.write("records/contacts/b.md", &valid_contact("b"));
5246        let m = |f: &str, order: &str| {
5247            let attendees = if order == "ab" {
5248                "  - [[records/contacts/a]]\n  - [[records/contacts/b]]"
5249            } else {
5250                "  - [[records/contacts/b]]\n  - [[records/contacts/a]]"
5251            };
5252            format!(
5253                "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nattendees:\n{attendees}\n---\n\n# {f}\n"
5254            )
5255        };
5256        fx.write("records/meetings/m1.md", &m("m1", "ab"));
5257        fx.write("records/meetings/m2.md", &m("m2", "ba"));
5258        let issues = fx.store_all();
5259        // The attendee SET is order-independent, so m1 (ab) and m2 (ba) collide
5260        // → a single issue on the smaller path.
5261        assert_eq!(
5262            count(&issues, codes::DUP_UNIQUE_KEY),
5263            1,
5264            "same date + same attendee set (any order) collide as one issue: {issues:#?}"
5265        );
5266        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
5267        assert_eq!(dup.file, PathBuf::from("records/meetings/m1.md"));
5268        assert_eq!(dup.related, vec![PathBuf::from("records/meetings/m2.md")]);
5269    }
5270
5271    // ── indexes ───────────────────────────────────────────────────────────────
5272
5273    #[test]
5274    fn missing_indexes_at_all_three_levels() {
5275        let fx = Fixture::new();
5276        fx.write("records/contacts/a.md", &valid_contact("a"));
5277        let issues = fx.store_all();
5278        // root, layer (records), and type-folder (records/contacts) all missing.
5279        // The type-folder INDEX_MISSING is keyed on the FOLDER path (not its
5280        // would-be index.md), per the field convention `EXPECTED` pins.
5281        let missing_files: BTreeSet<PathBuf> = issues
5282            .iter()
5283            .filter(|i| i.code == codes::INDEX_MISSING)
5284            .map(|i| i.file.clone())
5285            .collect();
5286        assert!(
5287            missing_files.contains(&PathBuf::from("index.md")),
5288            "{issues:#?}"
5289        );
5290        assert!(
5291            missing_files.contains(&PathBuf::from("records/index.md")),
5292            "{issues:#?}"
5293        );
5294        assert!(
5295            missing_files.contains(&PathBuf::from("records/contacts")),
5296            "{issues:#?}"
5297        );
5298        // When the index.md is entirely absent we do NOT additionally fire
5299        // INDEX_JSONL_MISSING — one INDEX_MISSING covers the folder (rule #4).
5300        assert!(!has(&issues, codes::INDEX_JSONL_MISSING), "{issues:#?}");
5301    }
5302
5303    #[test]
5304    fn index_stale_entry_and_missing_entry() {
5305        let fx = Fixture::new();
5306        fx.write(
5307            "records/contacts/present.md",
5308            &valid_contact("present contact"),
5309        );
5310        // Indexes for the parents (root/layer) present so we isolate type-folder.
5311        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5312        fx.write(
5313            "records/index.md",
5314            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5315        );
5316        // Type-folder index lists a GHOST (stale) and omits `present` (missing).
5317        fx.write(
5318            "records/contacts/index.md",
5319            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/ghost]] — gone\n",
5320        );
5321        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/present.md\",\"type\":\"contact\",\"summary\":\"present contact\"}\n");
5322        let issues = fx.store_all();
5323        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
5324        assert!(stale.message.contains("ghost"));
5325        assert!(stale.is_error());
5326        let missing = find(&issues, codes::INDEX_MISSING_ENTRY);
5327        assert!(
5328            missing.message.contains("present.md"),
5329            "{}",
5330            missing.message
5331        );
5332    }
5333
5334    #[test]
5335    fn index_md_entry_with_traversal_path_is_stale_not_probe() {
5336        let fx = Fixture::new();
5337        fx.write("records/contacts/a.md", &valid_contact("a"));
5338        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5339        fx.write(
5340            "records/index.md",
5341            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5342        );
5343        fx.write(
5344            "records/contacts/index.md",
5345            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/../../ghost]] — unsafe\n",
5346        );
5347        fx.write(
5348            "records/contacts/index.jsonl",
5349            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5350        );
5351        let issues = fx.store_all();
5352        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
5353        assert!(stale.message.contains("not a safe store-relative path"));
5354    }
5355
5356    #[test]
5357    fn index_summary_mismatch() {
5358        let fx = Fixture::new();
5359        fx.write("records/contacts/a.md", &valid_contact("the real summary"));
5360        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5361        fx.write(
5362            "records/index.md",
5363            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5364        );
5365        fx.write(
5366            "records/contacts/index.md",
5367            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a STALE summary\n",
5368        );
5369        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"the real summary\"}\n");
5370        let issues = fx.store_all();
5371        let issue = find(&issues, codes::INDEX_SUMMARY_MISMATCH);
5372        assert!(issue.is_error());
5373        assert_eq!(issue.related, vec![PathBuf::from("records/contacts/a.md")]);
5374    }
5375
5376    #[test]
5377    fn index_summary_match_passes() {
5378        let fx = Fixture::new();
5379        fx.write("records/contacts/a.md", &valid_contact("matching summary"));
5380        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5381        fx.write(
5382            "records/index.md",
5383            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5384        );
5385        fx.write(
5386            "records/contacts/index.md",
5387            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — matching summary\n",
5388        );
5389        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"matching summary\"}\n");
5390        let issues = fx.store_all();
5391        assert!(!has(&issues, codes::INDEX_SUMMARY_MISMATCH), "{issues:#?}");
5392    }
5393
5394    #[test]
5395    fn index_entry_with_tag_suffix_matches_summary() {
5396        let fx = Fixture::new();
5397        fx.write("records/contacts/a.md", &valid_contact("clean summary"));
5398        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5399        fx.write(
5400            "records/index.md",
5401            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5402        );
5403        // Entry carries the renderer's `  ·  #tag` suffix (the EXACT double-spaced
5404        // delimiter `crate::index::format_md_entry` emits for a tagged file),
5405        // which must be stripped before comparing against the file's summary.
5406        fx.write(
5407            "records/contacts/index.md",
5408            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — clean summary  ·  #customer\n",
5409        );
5410        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"clean summary\"}\n");
5411        let issues = fx.store_all();
5412        assert!(
5413            !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
5414            "tag suffix should be stripped: {issues:#?}"
5415        );
5416    }
5417
5418    #[test]
5419    fn index_entry_single_spaced_middot_tail_is_part_of_summary() {
5420        // Regression (the finding): a tagless file whose `summary` legitimately
5421        // ends in a single-spaced ` · #word` tail round-trips through `index
5422        // rebuild` verbatim (the renderer appends NO `  ·  #tag` block, since the
5423        // file has no tags). The validator must NOT mistake that single-spaced
5424        // tail for the renderer's tag suffix, or it reports a spurious — and
5425        // unfixable — INDEX_SUMMARY_MISMATCH on a freshly rebuilt store.
5426        let fx = Fixture::new();
5427        fx.write(
5428            "records/contacts/a.md",
5429            &valid_contact("Standup notes · #standup"),
5430        );
5431        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5432        fx.write(
5433            "records/index.md",
5434            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5435        );
5436        fx.write(
5437            "records/contacts/index.md",
5438            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — Standup notes · #standup\n",
5439        );
5440        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"Standup notes · #standup\"}\n");
5441        let issues = fx.store_all();
5442        assert!(
5443            !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
5444            "a single-spaced middot tail is part of the summary, not a tag block: {issues:#?}"
5445        );
5446    }
5447
5448    #[test]
5449    fn index_jsonl_desync_missing_file_in_jsonl() {
5450        let fx = Fixture::new();
5451        fx.write("records/contacts/a.md", &valid_contact("a"));
5452        fx.write("records/contacts/b.md", &valid_contact("b"));
5453        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (2 files)\n");
5454        fx.write(
5455            "records/index.md",
5456            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5457        );
5458        fx.write(
5459            "records/contacts/index.md",
5460            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n- [[records/contacts/b]] — b\n",
5461        );
5462        // jsonl only lists `a` → `b` is a desync (the twin must be complete).
5463        fx.write(
5464            "records/contacts/index.jsonl",
5465            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5466        );
5467        let issues = fx.store_all();
5468        let desync = find(&issues, codes::INDEX_JSONL_DESYNC);
5469        assert!(desync.message.contains("b.md"), "{}", desync.message);
5470    }
5471
5472    #[test]
5473    fn index_jsonl_desync_record_points_at_missing_file() {
5474        let fx = Fixture::new();
5475        fx.write("records/contacts/a.md", &valid_contact("a"));
5476        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5477        fx.write(
5478            "records/index.md",
5479            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5480        );
5481        fx.write(
5482            "records/contacts/index.md",
5483            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
5484        );
5485        fx.write(
5486            "records/contacts/index.jsonl",
5487            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
5488        );
5489        let issues = fx.store_all();
5490        assert!(
5491            issues
5492                .iter()
5493                .any(|i| i.code == codes::INDEX_JSONL_DESYNC && i.message.contains("ghost.md")),
5494            "{issues:#?}"
5495        );
5496    }
5497
5498    #[test]
5499    fn index_jsonl_record_with_traversal_path_is_desync_not_probe() {
5500        let fx = Fixture::new();
5501        fx.write("records/contacts/a.md", &valid_contact("a"));
5502        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5503        fx.write(
5504            "records/index.md",
5505            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5506        );
5507        fx.write(
5508            "records/contacts/index.md",
5509            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
5510        );
5511        fx.write(
5512            "records/contacts/index.jsonl",
5513            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/../../ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
5514        );
5515        let issues = fx.store_all();
5516        assert!(
5517            issues.iter().any(|i| i.code == codes::INDEX_JSONL_DESYNC
5518                && i.message.contains("not a safe store-relative path")),
5519            "{issues:#?}"
5520        );
5521    }
5522
5523    #[test]
5524    fn index_jsonl_stale_summary() {
5525        let fx = Fixture::new();
5526        fx.write("records/contacts/a.md", &valid_contact("real summary"));
5527        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5528        fx.write(
5529            "records/index.md",
5530            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5531        );
5532        fx.write(
5533            "records/contacts/index.md",
5534            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — real summary\n",
5535        );
5536        // jsonl summary disagrees with the file frontmatter.
5537        fx.write(
5538            "records/contacts/index.jsonl",
5539            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"OUTDATED\"}\n",
5540        );
5541        let issues = fx.store_all();
5542        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5543        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
5544        assert!(stale.key.as_deref().unwrap().contains("summary"));
5545    }
5546
5547    /// The whole point of `INDEX_JSONL_STALE`: a sidecar field the query/search
5548    /// path actually reads (`email`, `domain`, the `(date,amount,vendor)` dedup
5549    /// tuple, `tags`, `updated`, `links`, `company` …) that disagrees with the
5550    /// `.md` is STALE — even when `summary` and `type` are perfectly correct.
5551    /// Pre-fix the validator only diffed summary+type, so a sidecar with a wrong
5552    /// `email` validated clean and answered `--where email=…` with a phantom
5553    /// value present in no file. This is the direct regression guard.
5554    #[test]
5555    fn index_jsonl_stale_queryable_field_email() {
5556        let fx = Fixture::new();
5557        let contact = "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"a contact\"\nname: A\nemail: real@correct.com\n---\n\n# A\n";
5558        fx.write("records/contacts/a.md", contact);
5559        // Start from the canonical, fully-correct sidecar set …
5560        fx.rebuild_indexes();
5561        let jsonl_path = fx.dir.path().join("records/contacts/index.jsonl");
5562        let good = fs::read_to_string(&jsonl_path).unwrap();
5563        // sanity: the canonical store is clean (no STALE on a fresh rebuild).
5564        assert!(
5565            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
5566            "freshly-rebuilt sidecar must not be stale"
5567        );
5568        // … then desync ONLY the email so it's the single differing field.
5569        assert!(
5570            good.contains("real@correct.com"),
5571            "sidecar projects email: {good}"
5572        );
5573        fx.write(
5574            "records/contacts/index.jsonl",
5575            &good.replace("real@correct.com", "STALE-WRONG@evil.com"),
5576        );
5577
5578        let issues = fx.store_all();
5579        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5580        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
5581        // The mismatch is reported precisely on `email`, and summary/type — which
5582        // still match — are NOT named.
5583        let key = stale.key.as_deref().unwrap();
5584        assert!(
5585            key.contains("email"),
5586            "expected `email` in stale key, got {key:?}"
5587        );
5588        assert!(!key.contains("summary"), "summary still matches: {key:?}");
5589        assert!(!key.contains("type"), "type still matches: {key:?}");
5590    }
5591
5592    /// Broaden the guard across the typed/list/timestamp projections at once:
5593    /// a wrong `tags`, `updated`, and a custom dedup field (`amount`) are each
5594    /// caught, with all three named in one issue.
5595    #[test]
5596    fn index_jsonl_stale_typed_and_list_fields() {
5597        let fx = Fixture::new();
5598        let expense = "---\ntype: expense\ncreated: 2026-05-20T08:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"office chairs\"\ntags: [furniture, q2]\namount: 1299\nvendor: Acme\ndate: 2026-05-20\n---\n\n# Expense\n";
5599        fx.write("records/expenses/e.md", expense);
5600        fx.rebuild_indexes();
5601        let jsonl_path = fx.dir.path().join("records/expenses/index.jsonl");
5602        let good = fs::read_to_string(&jsonl_path).unwrap();
5603        assert!(
5604            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
5605            "freshly-rebuilt sidecar must not be stale"
5606        );
5607        // Desync a list field (tags), a timestamp (updated), and a number (amount).
5608        let stale_line = good
5609            .replace("\"q2\"", "\"WRONG-TAG\"")
5610            .replace("2026-05-22T10:00:00-07:00", "2099-01-01T00:00:00-07:00")
5611            .replace("1299", "9999");
5612        fx.write("records/expenses/index.jsonl", &stale_line);
5613
5614        let issues = fx.store_all();
5615        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5616        let key = stale.key.as_deref().unwrap();
5617        for expected in ["amount", "tags", "updated"] {
5618            assert!(
5619                key.contains(expected),
5620                "expected `{expected}` in stale key, got {key:?}"
5621            );
5622        }
5623    }
5624
5625    #[test]
5626    fn index_orphan_in_noncanonical_folder() {
5627        let fx = Fixture::new();
5628        fx.write("records/contacts/a.md", &valid_contact("a"));
5629        // Build the canonical indexes so they aren't reported as orphans.
5630        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5631        fx.write(
5632            "records/index.md",
5633            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5634        );
5635        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
5636        fx.write(
5637            "records/contacts/index.jsonl",
5638            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5639        );
5640        // An index.md inside a sub-sub-folder (operator territory) is an orphan.
5641        fx.write(
5642            "records/contacts/subfolder/index.md",
5643            "---\ntype: index\nscope: type-folder\n---\n\n# stray\n",
5644        );
5645        let issues = fx.store_all();
5646        let orphan = find(&issues, codes::INDEX_ORPHAN);
5647        assert_eq!(orphan.severity, Severity::Warning);
5648        assert_eq!(
5649            orphan.file,
5650            PathBuf::from("records/contacts/subfolder/index.md")
5651        );
5652    }
5653
5654    #[test]
5655    fn index_wrong_scope() {
5656        let fx = Fixture::new();
5657        fx.write("records/contacts/a.md", &valid_contact("a"));
5658        // Root index declares the wrong scope.
5659        fx.write("index.md", "---\ntype: index\nscope: layer\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5660        fx.write(
5661            "records/index.md",
5662            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5663        );
5664        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
5665        fx.write(
5666            "records/contacts/index.jsonl",
5667            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5668        );
5669        let issues = fx.store_all();
5670        let issue = find(&issues, codes::INDEX_WRONG_SCOPE);
5671        assert_eq!(issue.severity, Severity::Warning);
5672        assert_eq!(issue.file, PathBuf::from("index.md"));
5673    }
5674
5675    #[test]
5676    fn capped_type_folder_index_does_not_flag_missing_entries() {
5677        // Over the 500-entry cap, omitted entries are expected, not an error.
5678        let fx = Fixture::new();
5679        for i in 0..501 {
5680            fx.write(
5681                &format!("records/contacts/c{i:04}.md"),
5682                &valid_contact(&format!("contact {i}")),
5683            );
5684        }
5685        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (501 files)\n");
5686        fx.write(
5687            "records/index.md",
5688            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5689        );
5690        // Type-folder index lists only ONE entry + a More footer.
5691        fx.write(
5692            "records/contacts/index.md",
5693            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/c0000]] — contact 0\n\n## More\n\nThis folder has 501 files.\n",
5694        );
5695        // jsonl must still be complete — write all 501 lines.
5696        let mut jsonl = String::new();
5697        for i in 0..501 {
5698            jsonl.push_str(&format!(
5699                "{{\"path\":\"records/contacts/c{i:04}.md\",\"type\":\"contact\",\"summary\":\"contact {i}\"}}\n"
5700            ));
5701        }
5702        fx.write("records/contacts/index.jsonl", &jsonl);
5703        let issues = fx.store_all();
5704        assert!(
5705            !has(&issues, codes::INDEX_MISSING_ENTRY),
5706            "over the cap, missing browse entries are expected: {issues:#?}"
5707        );
5708        // But the jsonl is complete → no desync.
5709        assert!(
5710            !has(&issues, codes::INDEX_JSONL_DESYNC),
5711            "{:#?}",
5712            issues
5713                .iter()
5714                .filter(|i| i.code == codes::INDEX_JSONL_DESYNC)
5715                .collect::<Vec<_>>()
5716        );
5717    }
5718
5719    // ── log ────────────────────────────────────────────────────────────────
5720
5721    #[test]
5722    fn log_bad_timestamp_unknown_kind_out_of_order() {
5723        let fx = Fixture::new();
5724        fx.write(
5725            "log.md",
5726            concat!(
5727                "---\ntype: log\n---\n\n# Log\n\n",
5728                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5729                "## [2026-05-27 09:00] update | records/contacts/b\nx\n\n", // out of order
5730                "## [2026-05-27 11:00] frobnicate | records/contacts/c\nx\n\n", // unknown kind
5731                "## [not-a-date] create | records/contacts/d\nx\n",         // bad timestamp
5732            ),
5733        );
5734        let issues = fx.store_all();
5735        assert!(has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
5736        assert_eq!(
5737            find(&issues, codes::LOG_OUT_OF_ORDER).severity,
5738            Severity::Warning
5739        );
5740        let unknown = find(&issues, codes::LOG_UNKNOWN_KIND);
5741        assert_eq!(unknown.severity, Severity::Warning);
5742        assert!(unknown.message.contains("frobnicate"));
5743        assert!(unknown
5744            .suggestion
5745            .as_deref()
5746            .is_some_and(|s| s.contains("create")));
5747        let bad = find(&issues, codes::LOG_BAD_TIMESTAMP);
5748        assert!(bad.is_error());
5749    }
5750
5751    #[test]
5752    fn log_validate_entry_without_object_is_well_formed() {
5753        let fx = Fixture::new();
5754        fx.write(
5755            "log.md",
5756            "---\ntype: log\n---\n\n## [2026-05-27 10:00] validate\nPASS\n",
5757        );
5758        let issues = fx.store_all();
5759        assert!(!has(&issues, codes::LOG_BAD_TIMESTAMP), "{issues:#?}");
5760        assert!(!has(&issues, codes::LOG_UNKNOWN_KIND), "{issues:#?}");
5761    }
5762
5763    #[test]
5764    fn log_in_order_is_clean() {
5765        let fx = Fixture::new();
5766        fx.write(
5767            "log.md",
5768            concat!(
5769                "---\ntype: log\n---\n\n",
5770                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5771                "## [2026-05-27 10:05] update | records/contacts/a\nx\n",
5772            ),
5773        );
5774        let issues = fx.store_all();
5775        assert!(!has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
5776    }
5777
5778    #[test]
5779    fn log_not_checked_in_working_set() {
5780        // log.md ordering is an --all-only check.
5781        let fx = Fixture::new();
5782        fx.write(
5783            "log.md",
5784            concat!(
5785                "---\ntype: log\n---\n\n",
5786                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5787                "## [2026-05-27 09:00] update | records/contacts/a\nx\n",
5788            ),
5789        );
5790        let issues = validate_working_set(&fx.store(), None).unwrap();
5791        assert!(
5792            !has(&issues, codes::LOG_OUT_OF_ORDER),
5793            "log ordering is --all only: {issues:#?}"
5794        );
5795    }
5796
5797    // ── working-set scoping ───────────────────────────────────────────────────
5798
5799    #[test]
5800    fn working_set_validates_only_changed_files() {
5801        let fx = Fixture::new();
5802        // `dirty` has a bad timestamp; `clean_but_unlogged` also does but is NOT
5803        // in the log → working set must skip it.
5804        fx.write(
5805            "records/contacts/dirty.md",
5806            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5807        );
5808        fx.write(
5809            "records/contacts/unlogged.md",
5810            "---\ntype: contact\ncreated: ALSO-BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5811        );
5812        fx.write(
5813            "log.md",
5814            "---\ntype: log\n---\n\n## [2026-05-22 10:00] update | records/contacts/dirty\nedited\n",
5815        );
5816        let issues = validate_working_set(&fx.store(), None).unwrap();
5817        assert!(
5818            issues.iter().any(|i| i.code == codes::FM_BAD_TIMESTAMP
5819                && i.file == Path::new("records/contacts/dirty.md")),
5820            "{issues:#?}"
5821        );
5822        assert!(
5823            !issues
5824                .iter()
5825                .any(|i| i.file == Path::new("records/contacts/unlogged.md")),
5826            "unlogged file must not be in the working set: {issues:#?}"
5827        );
5828    }
5829
5830    #[test]
5831    fn working_set_includes_incoming_linkers_to_changed_path() {
5832        let fx = Fixture::new();
5833        // `changed` was renamed/removed (logged). `linker` points at it with a
5834        // now-broken link and was NOT itself logged — but must be pulled in.
5835        fx.write(
5836            "records/profiles/linker.md",
5837            "---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: links to a removed page\n---\n\nSee [[records/contacts/changed]].\n",
5838        );
5839        // `changed.md` does NOT exist on disk (removed).
5840        fx.write(
5841            "log.md",
5842            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/changed\nremoved\n",
5843        );
5844        let issues = validate_working_set(&fx.store(), None).unwrap();
5845        assert!(
5846            issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN
5847                && i.file == Path::new("records/profiles/linker.md")),
5848            "incoming linker to a removed path must be validated: {issues:#?}"
5849        );
5850    }
5851
5852    #[test]
5853    fn working_set_respects_explicit_since_cutoff() {
5854        let fx = Fixture::new();
5855        fx.write(
5856            "records/contacts/old.md",
5857            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5858        );
5859        fx.write(
5860            "records/contacts/new.md",
5861            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5862        );
5863        fx.write(
5864            "log.md",
5865            concat!(
5866                "---\ntype: log\n---\n\n",
5867                "## [2026-05-20 10:00] update | records/contacts/old\nx\n\n",
5868                "## [2026-05-25 10:00] update | records/contacts/new\nx\n",
5869            ),
5870        );
5871        // Cutoff after `old` but before `new`.
5872        let since = DateTime::parse_from_rfc3339("2026-05-22T00:00:00+00:00").unwrap();
5873        let issues = validate_working_set(&fx.store(), Some(since)).unwrap();
5874        assert!(
5875            issues
5876                .iter()
5877                .any(|i| i.file == Path::new("records/contacts/new.md")),
5878            "{issues:#?}"
5879        );
5880        assert!(
5881            !issues
5882                .iter()
5883                .any(|i| i.file == Path::new("records/contacts/old.md")),
5884            "old change is before the cutoff: {issues:#?}"
5885        );
5886    }
5887
5888    #[test]
5889    fn working_set_default_since_is_last_validate_entry() {
5890        let fx = Fixture::new();
5891        // `before` changed before the last validate; `after` changed after.
5892        fx.write(
5893            "records/contacts/before.md",
5894            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5895        );
5896        fx.write(
5897            "records/contacts/after.md",
5898            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5899        );
5900        fx.write(
5901            "log.md",
5902            concat!(
5903                "---\ntype: log\n---\n\n",
5904                "## [2026-05-20 10:00] update | records/contacts/before\nx\n\n",
5905                "## [2026-05-21 10:00] validate\nPASS\n\n",
5906                "## [2026-05-22 10:00] update | records/contacts/after\nx\n",
5907            ),
5908        );
5909        let issues = validate_working_set(&fx.store(), None).unwrap();
5910        assert!(
5911            issues
5912                .iter()
5913                .any(|i| i.file == Path::new("records/contacts/after.md")),
5914            "{issues:#?}"
5915        );
5916        assert!(
5917            !issues
5918                .iter()
5919                .any(|i| i.file == Path::new("records/contacts/before.md")),
5920            "change before the last validate entry is outside the default window: {issues:#?}"
5921        );
5922    }
5923
5924    // ── ordering / determinism ────────────────────────────────────────────────
5925
5926    #[test]
5927    fn issues_are_sorted_by_file_then_line() {
5928        let fx = Fixture::new();
5929        fx.write("records/profiles/z.md", "---\ntype: profile\nmeta-type: conclusion\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
5930        fx.write("records/profiles/a.md", "---\ntype: profile\nmeta-type: conclusion\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
5931        let issues = fx.store_all();
5932        let files: Vec<&PathBuf> = issues.iter().map(|i| &i.file).collect();
5933        let mut sorted = files.clone();
5934        sorted.sort();
5935        assert_eq!(
5936            files, sorted,
5937            "issues must be emitted in a stable file order"
5938        );
5939    }
5940
5941    // ── boundaries: codes validate must NOT emit ──────────────────────────────
5942
5943    #[test]
5944    fn frozen_page_is_not_a_validate_error() {
5945        // POLICY_FROZEN_PAGE is a *write-time* refusal, never a validate finding.
5946        // A clean file listed in `### Frozen pages` must validate clean.
5947        let mut fx = Fixture::new();
5948        fx.config
5949            .frozen_pages
5950            .push(PathBuf::from("records/decisions/d.md"));
5951        fx.write(
5952            "records/decisions/d.md",
5953            "---\ntype: decision\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a finalized decision\n---\n\n# D\n",
5954        );
5955        let issues = fx.store_all();
5956        assert!(
5957            !has(&issues, codes::POLICY_FROZEN_PAGE),
5958            "frozen pages are enforced at write-time, not by validate: {issues:#?}"
5959        );
5960    }
5961
5962    #[test]
5963    fn wiki_link_ambiguous_is_never_emitted_under_full_path_doctrine() {
5964        // The full-path doctrine makes ambiguity impossible; the defensive code
5965        // must never fire on a normal store.
5966        let fx = Fixture::new();
5967        fx.write("records/contacts/sarah-chen.md", &valid_contact("sarah"));
5968        let mut body = valid_contact("links to sarah");
5969        body.push_str("\nSee [[records/contacts/sarah-chen]].\n");
5970        fx.write("records/contacts/p.md", &body);
5971        let issues = fx.store_all();
5972        assert!(!has(&issues, codes::WIKI_LINK_AMBIGUOUS), "{issues:#?}");
5973    }
5974
5975    // ── unknown-type / unknown-field passthrough ──────────────────────────────
5976
5977    #[test]
5978    fn unknown_type_passes_through() {
5979        // A custom type is ambient context: it has a `type`, so no
5980        // FM_MISSING_TYPE, and with no matching schema there are no schema
5981        // errors. Only the universal contract (summary, timestamps) applies.
5982        let fx = Fixture::new();
5983        fx.write(
5984            "records/proposals/x.md",
5985            "---\ntype: proposal\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a proposal\ncustom_field: anything\nbudget: 5000\n---\n\n# Proposal\n",
5986        );
5987        let issues = fx.store_all();
5988        assert!(!has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
5989        assert!(!has(&issues, codes::SCHEMA_MISSING_REQUIRED), "{issues:#?}");
5990        assert!(!has(&issues, codes::SCHEMA_SHAPE_MISMATCH), "{issues:#?}");
5991        // The unknown fields don't trip anything.
5992        assert!(
5993            !issues
5994                .iter()
5995                .any(|i| i.key.as_deref() == Some("custom_field")
5996                    || i.key.as_deref() == Some("budget")),
5997            "unknown fields are ambient context: {issues:#?}"
5998        );
5999    }
6000
6001    // ── find_links_to prefix-collision safety (working set) ───────────────────
6002
6003    #[test]
6004    fn incoming_linker_scan_does_not_prefix_match() {
6005        // A changed `records/contacts/sarah` must NOT pull in a file that only
6006        // links to `records/contacts/sarah-chen` (a longer path sharing a prefix).
6007        let fx = Fixture::new();
6008        fx.write(
6009            "records/profiles/only-sarah-chen.md",
6010            "---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
6011        );
6012        // The log says `records/contacts/sarah` (the shorter path) changed.
6013        fx.write(
6014            "log.md",
6015            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah\nremoved\n",
6016        );
6017        let issues = validate_working_set(&fx.store(), None).unwrap();
6018        assert!(
6019            !issues
6020                .iter()
6021                .any(|i| i.file == Path::new("records/profiles/only-sarah-chen.md")),
6022            "a prefix-sharing link must not pull a file into the working set: {issues:#?}"
6023        );
6024    }
6025
6026    #[test]
6027    fn working_set_does_not_flag_stale_catalog_index_as_wiki_link_broken() {
6028        // The working-set incoming-linker scan rides embedded-ripgrep
6029        // `Store::find_links_to`, which scans EVERY `.md` — so a type-folder
6030        // `index.md` listing a now-deleted target IS pulled into the working set.
6031        // But its entries are GENERATED catalog entries, not authored body links:
6032        // a dangling one is an `INDEX_STALE_ENTRY` ("run `dbmd index rebuild`"),
6033        // the job of `check_indexes` under `--all` — NOT a `WIKI_LINK_BROKEN`
6034        // ("create the target"), whose remedy would steer an agent to recreate
6035        // the very data it just deleted. The loop default must therefore NOT
6036        // body-link-check the derived catalog (index integrity is an O(store)
6037        // sweep concern, not an O(changed) loop concern). Adversarial review #11:
6038        // the prior behavior gave WIKI_LINK_BROKEN here while `--all` gave
6039        // INDEX_STALE_ENTRY for the identical condition — two codes, opposite
6040        // remedies, across the loop default vs the sweep.
6041        let fx = Fixture::new();
6042        // A catalog that still lists the deleted contact (a real, common stale
6043        // state after an out-of-band `delete`).
6044        fx.write(
6045            "records/contacts/index.md",
6046            "---\ntype: index\n---\n\n- [[records/contacts/sarah-chen]] — Sarah Chen\n",
6047        );
6048        // The log says `records/contacts/sarah-chen` was deleted.
6049        fx.write(
6050            "log.md",
6051            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n",
6052        );
6053        let issues = validate_working_set(&fx.store(), None).unwrap();
6054        assert!(
6055            !issues
6056                .iter()
6057                .any(|i| i.file == Path::new("records/contacts/index.md")
6058                    && i.code == codes::WIKI_LINK_BROKEN),
6059            "a stale catalog `index.md` entry must NOT be WIKI_LINK_BROKEN in the \
6060             working set (it is an INDEX_STALE_ENTRY under `--all`): {issues:#?}"
6061        );
6062    }
6063
6064    #[test]
6065    fn incoming_linker_scan_covers_the_whole_changed_set_in_one_pass() {
6066        // CONTRACT (the O(changed × store) fix): the working-set scan finds
6067        // incoming linkers for EVERY changed object, and does so via the single
6068        // batch pass `Store::find_links_to_any` — not one full store read per
6069        // changed object. This test pins the behavior that makes the single-pass
6070        // correct: with two DISTINCT deleted targets, the linker to EACH is pulled
6071        // into the working set and flagged. A regression that scanned for only the
6072        // first/last changed object, or that dropped the batch union, would leave
6073        // one of the two broken links unreported and fail here.
6074        let fx = Fixture::new();
6075        // Linker A → deleted target #1 (in the body).
6076        fx.write(
6077            "records/profiles/refers-sarah.md",
6078            "---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
6079        );
6080        // Linker B → deleted target #2 (in a typed frontmatter field — an edge the
6081        // sidecar `links` projection would miss, which is why this must be a
6082        // content scan, not a sidecar read).
6083        fx.write(
6084            "records/meetings/2026/05/kickoff.md",
6085            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\ncompany: \"[[records/companies/acme]]\"\n---\n\n# Kickoff\n",
6086        );
6087        // The log says BOTH targets were deleted in this window.
6088        fx.write(
6089            "log.md",
6090            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n\n## [2026-05-22 10:05] delete | records/companies/acme\nremoved\n",
6091        );
6092
6093        let issues = validate_working_set(&fx.store(), None).unwrap();
6094        assert!(
6095            issues
6096                .iter()
6097                .any(|i| i.file == Path::new("records/profiles/refers-sarah.md")
6098                    && i.code == codes::WIKI_LINK_BROKEN),
6099            "linker to the FIRST deleted target must be pulled in and flagged: {issues:#?}"
6100        );
6101        assert!(
6102            issues.iter().any(
6103                |i| i.file == Path::new("records/meetings/2026/05/kickoff.md")
6104                    && i.code == codes::WIKI_LINK_BROKEN
6105            ),
6106            "linker to the SECOND deleted target (typed-field edge) must also be \
6107             pulled in and flagged — proves the scan covers the whole changed set, \
6108             not just one object: {issues:#?}"
6109        );
6110    }
6111
6112    #[test]
6113    fn frontmatter_block_sequence_links_each_get_their_own_line() {
6114        // Each block-sequence wiki-link reports on its own source line.
6115        let fx = Fixture::new();
6116        // Neither target exists → two WIKI_LINK_BROKEN, on different lines.
6117        fx.write(
6118            "records/meetings/m.md",
6119            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nparticipants:\n  - [[records/contacts/ghost1]]\n  - [[records/contacts/ghost2]]\n---\n\n# M\n",
6120        );
6121        let issues = fx.store_all();
6122        let broken_lines: BTreeSet<Option<u32>> = issues
6123            .iter()
6124            .filter(|i| i.code == codes::WIKI_LINK_BROKEN)
6125            .map(|i| i.line)
6126            .collect();
6127        assert_eq!(
6128            broken_lines.len(),
6129            2,
6130            "two distinct broken-link lines: {issues:#?}"
6131        );
6132    }
6133
6134    // ── Regression: null / non-scalar created/updated ────────────────────────
6135
6136    #[test]
6137    fn null_created_is_missing_not_silently_passed() {
6138        // Regression: a present-but-`null` `created:` previously slipped past
6139        // both FM_MISSING_CREATED (only `!contains_key` was checked) and
6140        // FM_BAD_TIMESTAMP (`scalar_string(null)` is None → branch no-oped).
6141        let fx = Fixture::new();
6142        fx.write(
6143            "records/contacts/a.md",
6144            "---\ntype: contact\ncreated:\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
6145        );
6146        let issues = fx.store_all();
6147        assert!(
6148            has(&issues, codes::FM_MISSING_CREATED),
6149            "null `created:` must read as missing: {issues:#?}"
6150        );
6151    }
6152
6153    #[test]
6154    fn sequence_created_is_bad_timestamp() {
6155        // A non-scalar `created: [2026]` is not a timestamp string → FM_BAD_TIMESTAMP.
6156        let fx = Fixture::new();
6157        fx.write(
6158            "records/contacts/a.md",
6159            "---\ntype: contact\ncreated: [2026]\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
6160        );
6161        let issues = fx.store_all();
6162        assert!(
6163            issues
6164                .iter()
6165                .any(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created")),
6166            "a sequence `created:` must be FM_BAD_TIMESTAMP: {issues:#?}"
6167        );
6168    }
6169
6170    // ── Regression: schema required null / empty-collection ──────────────────
6171
6172    #[test]
6173    fn required_field_null_or_empty_collection_is_missing() {
6174        // Regression: a plain required field (no shape/enum) holding YAML null
6175        // (`name:`), an empty list (`name: []`), or an empty mapping (`name: {}`)
6176        // previously validated with 0 issues — `scalar_string` returned None and
6177        // `.unwrap_or(false)` treated the value as non-empty.
6178        for value in ["", " []", " {}"] {
6179            let mut fx = Fixture::new();
6180            fx.config.schemas.insert(
6181                "contact".into(),
6182                Schema {
6183                    fields: vec![FieldSpec {
6184                        name: "name".into(),
6185                        required: true,
6186                        ..Default::default()
6187                    }],
6188                    ..Default::default()
6189                },
6190            );
6191            fx.write(
6192                "records/contacts/a.md",
6193                &format!(
6194                    "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname:{value}\n---\n\n# A\n"
6195                ),
6196            );
6197            let issues = fx.store_all();
6198            assert!(
6199                issues
6200                    .iter()
6201                    .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
6202                        && i.key.as_deref() == Some("name")),
6203                "required `name:{value}` must be SCHEMA_MISSING_REQUIRED: {issues:#?}"
6204            );
6205        }
6206    }
6207
6208    // ── Regression: WIKI_LINK_BROKEN on raw source files ─────────────────────
6209
6210    #[test]
6211    fn wiki_link_to_raw_source_file_resolves() {
6212        // Regression: a body link to a raw `.eml`/`.pdf` source kept verbatim
6213        // under `sources/` was flagged WIKI_LINK_BROKEN because the existence
6214        // probe only ever stat'd `{bare}.md`. It must resolve the literal path.
6215        let fx = Fixture::new();
6216        fx.write("sources/emails/2026-05-22-elena.eml", "raw email bytes\n");
6217        fx.write(
6218            "records/contacts/a.md",
6219            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\nSee [[sources/emails/2026-05-22-elena.eml]] for context.\n",
6220        );
6221        let issues = fx.store_all();
6222        assert!(
6223            !issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN),
6224            "a link to an existing raw source file must not be broken: {issues:#?}"
6225        );
6226    }
6227
6228    // ── Regression: wrong-case wiki-link must be platform-independent ─────────
6229
6230    #[test]
6231    fn wrong_case_wiki_link_is_broken_exact_case() {
6232        // Regression (cross-platform false-negative): on case-insensitive
6233        // APFS/macOS, `Path::is_file()` resolves `[[records/contacts/BOB]]` to the
6234        // on-disk `bob.md`, so validate passed — but on case-sensitive Linux that
6235        // file does not exist (WIKI_LINK_BROKEN). Existence resolution is now
6236        // exact-case, so a wrong-case target is flagged on every platform.
6237        let fx = Fixture::new();
6238        fx.write("records/contacts/bob.md", &valid_contact("Bob"));
6239        let mut body = valid_contact("links with the wrong case");
6240        body.push_str("\nKnows [[records/contacts/BOB]].\n");
6241        fx.write("records/contacts/alice.md", &body);
6242        let issues = fx.store_all();
6243        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
6244        assert!(issue.is_error());
6245        assert!(
6246            issue.message.contains("records/contacts/BOB"),
6247            "the wrong-case target must be named in the issue: {issues:#?}"
6248        );
6249    }
6250
6251    #[test]
6252    fn correct_case_wiki_link_still_resolves() {
6253        // The companion to the exact-case fix: a *correct*-case lowercase link to
6254        // the same on-disk file must STILL resolve clean. Only a genuine case
6255        // mismatch is newly flagged; correct case is never a false positive.
6256        let fx = Fixture::new();
6257        fx.write("records/contacts/bob.md", &valid_contact("Bob"));
6258        let mut body = valid_contact("links with the right case");
6259        body.push_str("\nKnows [[records/contacts/bob]].\n");
6260        fx.write("records/contacts/alice.md", &body);
6261        let issues = fx.store_all();
6262        assert!(
6263            !issues
6264                .iter()
6265                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.message.contains("contacts/bob")),
6266            "a correct-case link must resolve clean: {issues:#?}"
6267        );
6268    }
6269
6270    #[test]
6271    fn wrong_case_raw_source_wiki_link_is_broken() {
6272        // The literal-path candidate (raw `.eml`/`.pdf` sources kept verbatim)
6273        // gets the same exact-case treatment as the `.md`-appended candidate: a
6274        // wrong-case link to a raw source is broken on a case-sensitive host, so
6275        // it must flag on macOS too.
6276        let fx = Fixture::new();
6277        fx.write("sources/emails/2026-05-22-elena.eml", "raw email bytes\n");
6278        fx.write(
6279            "records/contacts/a.md",
6280            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\nSee [[sources/emails/2026-05-22-ELENA.eml]] for context.\n",
6281        );
6282        let issues = fx.store_all();
6283        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
6284        assert!(issue.is_error());
6285        assert!(
6286            issue.message.contains("2026-05-22-ELENA.eml"),
6287            "the wrong-case raw-source target must be flagged: {issues:#?}"
6288        );
6289    }
6290
6291    // ── Regression: unreadable (non-UTF-8) content file ──────────────────────
6292
6293    #[test]
6294    fn non_utf8_content_file_is_reported() {
6295        // Regression: a content file with invalid UTF-8 bytes made
6296        // check_content_file return None silently, so the store passed with exit
6297        // 0. It must surface FM_UNREADABLE instead of passing vacuously.
6298        let fx = Fixture::new();
6299        let abs = fx.dir.path().join("records/notes/corrupt.md");
6300        fs::create_dir_all(abs.parent().unwrap()).unwrap();
6301        fs::write(&abs, [0xFF, 0xFE, 0x00, 0x01]).unwrap();
6302        let issues = validate_working_set(&fx.store(), None).unwrap();
6303        assert!(
6304            has(&issues, codes::FM_UNREADABLE),
6305            "an unreadable content file must be reported, not silently skipped: {issues:#?}"
6306        );
6307    }
6308
6309    // ── Regression: code-fence char/run tracking ─────────────────────────────
6310
6311    #[test]
6312    fn tilde_fence_containing_backtick_fence_does_not_invert() {
6313        // Regression: a `~~~` block legally contains ``` lines (documenting a
6314        // backtick fence); a naive toggle inverted `in_fence` and checked the
6315        // demo `[[fake]]` inside the code block as a live link. The link inside
6316        // BOTH fences must be skipped.
6317        let body = "~~~markdown\n```\n[[fake-link]]\n```\n~~~\n";
6318        let links = extract_wiki_links(body);
6319        assert!(
6320            links.is_empty(),
6321            "wiki-link inside a nested code fence must be skipped: {links:?}"
6322        );
6323    }
6324
6325    // ── Regression: --all skips in-layer `log/` folder ───────────────────────
6326
6327    #[test]
6328    fn all_sweep_visits_in_layer_log_folder() {
6329        // Regression: `validate --all` pruned every dir named `log`, so a real
6330        // content folder like `records/log/` was invisible to the full sweep —
6331        // reporting FEWER errors than the default scope. A frontmatter-less file
6332        // there must still surface FM_MISSING_TYPE under --all.
6333        let fx = Fixture::new();
6334        fx.write("records/log/2026-06-01-pricing.md", "no frontmatter here\n");
6335        let issues = fx.store_all();
6336        assert!(
6337            has(&issues, codes::FM_MISSING_TYPE),
6338            "--all must validate files under an in-layer `log/` folder: {issues:#?}"
6339        );
6340    }
6341
6342    // ── Regression: flow-form list with whitespace ───────────────────────────
6343
6344    #[test]
6345    fn flow_form_link_list_with_spaces_is_flagged() {
6346        // Regression: `attendees: [ [[a]] ]` parses to the same nested-sequence
6347        // mis-encoding as `[[[a]]]` but evaded the literal `starts_with("[[[")`
6348        // text test. The value-based detector must catch the whitespace variant.
6349        let keys = detect_flow_form_link_lists("attendees: [ [[records/contacts/elena]] ]\n");
6350        assert!(
6351            keys.iter().any(|k| k == "attendees"),
6352            "spaced flow-form list must be detected: {keys:?}"
6353        );
6354    }
6355
6356    // ── Regression: INDEX_SUMMARY_MISMATCH middot tail ───────────────────────
6357
6358    #[test]
6359    fn middot_hashtag_summary_tail_round_trips() {
6360        // Regression: a tagless summary that legitimately ends in a single-spaced
6361        // ` · #word` tail round-trips through the renderer verbatim, but the loose
6362        // ` · ` strip mistook it for the tag block and reported a spurious,
6363        // unfixable INDEX_SUMMARY_MISMATCH. The strip must use the renderer's
6364        // exact double-spaced `  ·  ` delimiter.
6365        assert_eq!(
6366            extract_index_entry_summary("— Standup notes · #standup").as_deref(),
6367            Some("Standup notes · #standup"),
6368            "a single-spaced middot tail is part of the summary, not a tag block"
6369        );
6370        // The renderer's real double-spaced tag suffix IS still stripped.
6371        assert_eq!(
6372            extract_index_entry_summary("— Renewal champion  ·  #renewal #acme").as_deref(),
6373            Some("Renewal champion"),
6374            "the renderer's double-spaced `  ·  #tag` suffix is stripped"
6375        );
6376    }
6377
6378    // ── Regression: shape Url / Email edge cases ─────────────────────────────
6379
6380    #[test]
6381    fn url_shape_accepts_short_http_and_rejects_bare_scheme() {
6382        assert!(is_url("http://x"), "an 8-char http URL is valid");
6383        assert!(is_url("https://x"), "a 9-char https URL is valid");
6384        assert!(!is_url("http://"), "a bare scheme with no host is rejected");
6385        assert!(!is_url("https://"), "a bare https scheme is rejected");
6386    }
6387
6388    #[test]
6389    fn email_shape_rejects_double_at() {
6390        assert!(!is_email("sarah@@acme.com"), "double-@ domain is rejected");
6391        assert!(!is_email("a@b@c.com"), "two @ signs are rejected");
6392        assert!(is_email("sarah@acme.com"), "a normal address still passes");
6393    }
6394
6395    // ── Regression: working-set vs --all agree on log.md links ───────────────
6396
6397    #[test]
6398    fn working_set_does_not_flag_log_md_body_links() {
6399        // Regression: the working-set incoming-linker scan runs root `log.md`
6400        // through the body wiki-link check, flagging a historical `[[deleted]]`
6401        // mention as WIKI_LINK_BROKEN — an error `--all` never reports and that
6402        // the append-only log can't have "fixed". The root meta files must be
6403        // excluded from the body link check, matching --all.
6404        let fx = Fixture::new();
6405        fx.write("records/contacts/a.md", &valid_contact("A"));
6406        fx.write(
6407            "log.md",
6408            "---\ntype: log\n---\n\n## [2026-06-01 10:00] delete | records/contacts/ghost\n\nRemoved [[records/contacts/ghost]] per cleanup.\n",
6409        );
6410        let issues = validate_working_set(&fx.store(), None).unwrap();
6411        assert!(
6412            !issues
6413                .iter()
6414                .any(|i| i.code == codes::WIKI_LINK_BROKEN
6415                    && i.file == std::path::Path::new("log.md")),
6416            "a broken wiki-link inside append-only log.md must not be flagged: {issues:#?}"
6417        );
6418    }
6419
6420    // ── Regression: DB.md schema field lint ──────────────────────────────────
6421
6422    #[test]
6423    fn schema_duplicate_field_name_is_flagged() {
6424        let mut fx = Fixture::new();
6425        fx.config.schemas.insert(
6426            "contact".into(),
6427            Schema {
6428                fields: vec![
6429                    FieldSpec {
6430                        name: "name".into(),
6431                        required: true,
6432                        ..Default::default()
6433                    },
6434                    FieldSpec {
6435                        name: "name".into(),
6436                        ..Default::default()
6437                    },
6438                ],
6439                ..Default::default()
6440            },
6441        );
6442        let issues = fx.store_all();
6443        assert!(
6444            issues
6445                .iter()
6446                .any(|i| i.code == codes::DB_MD_SCHEMA_FIELD && i.key.as_deref() == Some("name")),
6447            "a duplicate schema field name must be flagged: {issues:#?}"
6448        );
6449    }
6450
6451    #[test]
6452    fn schema_unknown_modifier_is_info() {
6453        let mut fx = Fixture::new();
6454        fx.config.schemas.insert(
6455            "contact".into(),
6456            Schema {
6457                fields: vec![FieldSpec {
6458                    name: "name".into(),
6459                    unknown_modifiers: vec!["requierd".into()],
6460                    ..Default::default()
6461                }],
6462                ..Default::default()
6463            },
6464        );
6465        let issues = fx.store_all();
6466        assert!(
6467            issues.iter().any(|i| i.code == codes::DB_MD_SCHEMA_FIELD
6468                && i.severity == Severity::Info
6469                && i.key.as_deref() == Some("name")),
6470            "an unrecognized schema modifier must surface as Info: {issues:#?}"
6471        );
6472    }
6473
6474    /// Every code in `mod codes` must appear as a row in SPEC.md § Validation —
6475    /// the SPEC table is the declared "complete vocabulary" an agent branches on,
6476    /// and the module doc-comment promises this code implements "exactly those
6477    /// codes — no more, no fewer." This guards against the code/SPEC drift where a
6478    /// new validation code is added to the engine but never documented.
6479    #[test]
6480    fn every_code_constant_is_documented_in_spec() {
6481        // Parse the canonical constant *values* straight out of this module's
6482        // source, so a future `pub const X: &str = "X";` is covered with no test
6483        // edit. Format is uniform: `    pub const NAME: &str = "VALUE";`.
6484        let this_src = include_str!("validate.rs");
6485        let mut codes_in_module: Vec<String> = Vec::new();
6486        let mut in_codes_mod = false;
6487        for line in this_src.lines() {
6488            let t = line.trim();
6489            if t.starts_with("pub mod codes") {
6490                in_codes_mod = true;
6491                continue;
6492            }
6493            // The `mod codes` block ends at its closing brace at column 0.
6494            if in_codes_mod && line == "}" {
6495                break;
6496            }
6497            if in_codes_mod {
6498                if let Some(rest) = t.strip_prefix("pub const ") {
6499                    // rest = `NAME: &str = "VALUE";`
6500                    let value = rest
6501                        .split_once('=')
6502                        .map(|(_, v)| v.trim())
6503                        .and_then(|v| v.strip_prefix('"'))
6504                        .and_then(|v| v.strip_suffix("\";"))
6505                        .unwrap_or_else(|| panic!("unparseable code constant line: {line:?}"));
6506                    codes_in_module.push(value.to_string());
6507                }
6508            }
6509        }
6510        assert!(
6511            codes_in_module.len() >= 36,
6512            "parsed only {} code constants from `mod codes`; the parser likely \
6513             broke against a source-format change",
6514            codes_in_module.len()
6515        );
6516
6517        // SPEC.md lives at the repo root, two levels up from this crate's manifest.
6518        let spec_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../../SPEC.md");
6519        let spec = fs::read_to_string(&spec_path)
6520            .unwrap_or_else(|e| panic!("cannot read {}: {e}", spec_path.display()));
6521
6522        // Each code must appear as a SPEC § Validation table cell: `` | `CODE` | ``.
6523        let missing: Vec<&String> = codes_in_module
6524            .iter()
6525            .filter(|code| !spec.contains(&format!("| `{code}` |")))
6526            .collect();
6527        assert!(
6528            missing.is_empty(),
6529            "validation codes emitted by the engine but absent from SPEC.md \
6530             § Validation (the declared complete vocabulary): {missing:?}"
6531        );
6532    }
6533
6534    // ── loose files (directly at a layer root, no type-folder) ───────────────
6535
6536    const LOOSE_ALICE: &str = "---\ntype: contact\nid: alice\ncreated: 2026-06-01T08:00:00-07:00\nupdated: 2026-06-01T08:00:00-07:00\nsummary: Alice\n---\nbody\n";
6537    const LOOSE_BOB: &str = "---\ntype: contact\nid: bob\ncreated: 2026-06-01T08:00:00-07:00\nupdated: 2026-06-01T08:00:00-07:00\nsummary: Bob loose\n---\nbody\n";
6538
6539    #[test]
6540    fn loose_file_catalogued_in_layer_jsonl_validates_clean() {
6541        let fx = Fixture::new();
6542        fx.write("records/contacts/alice.md", LOOSE_ALICE);
6543        fx.write("records/bob.md", LOOSE_BOB); // loose, directly under records/
6544        fx.rebuild_indexes();
6545        let issues = fx.store_all();
6546        assert!(
6547            issues.is_empty(),
6548            "a rebuilt store with a catalogued loose file must validate clean, got: {issues:?}"
6549        );
6550    }
6551
6552    #[test]
6553    fn loose_file_with_missing_layer_jsonl_is_index_jsonl_missing() {
6554        let fx = Fixture::new();
6555        fx.write("records/contacts/alice.md", LOOSE_ALICE);
6556        fx.write("records/bob.md", LOOSE_BOB);
6557        fx.rebuild_indexes();
6558        // Simulate the layer sidecar going missing (a hand-deletion / bad sync).
6559        fs::remove_file(fx.dir.path().join("records/index.jsonl")).unwrap();
6560        let issues = fx.store_all();
6561        assert!(
6562            has(&issues, codes::INDEX_JSONL_MISSING),
6563            "a loose file with no layer index.jsonl must raise INDEX_JSONL_MISSING, got: {issues:?}"
6564        );
6565    }
6566}