Skip to main content

dbmd_core/
validate.rs

1//! `validate` — the validation engine.
2//!
3//! The canonical issue-code vocabulary is **SPEC.md § Validation** (that table
4//! is the single source of truth). This module implements exactly those codes
5//! — no more, no fewer. If a code is added here it must be added to the SPEC
6//! table in the same change. The codes are exposed as the [`codes`] constants
7//! so call sites never spell a code as a bare string literal.
8//!
9//! **Two scopes.** [`validate_working_set`] is the loop default: content files
10//! changed since `since`, plus any file whose wiki-links target a changed path.
11//! The changed set and the per-file checks are O(changed); the incoming linkers
12//! are found by a *single* embedded-ripgrep pass over the store for the whole
13//! changed set at once ([`Store::find_links_to_any`], one scan — not a full read
14//! per changed object, and not the parse-the-tree walk `--all` does). On this
15//! changed-set path it never builds the global cross-file state.
16//!
17//! The **one** exception is the vacuous-pass guard: when the change log records
18//! no objects since the cutoff and no explicit `--since` was given (a fresh
19//! store, a missing/empty `log.md`, or external edits never logged), the default
20//! call falls back to a single per-file content sweep ([`Store::walk`]) so an
21//! externally edited or freshly copied store cannot pass validation vacuously.
22//! That fallback is O(store) by design; the O(changed) guarantee is about the
23//! normal post-write path, not this safety net.
24//!
25//! [`validate_all`] is the full SWEEP: it adds the checks that need the global
26//! cross-file state — entity-dedup `DUP_*`, every-index sync, and `log.md`
27//! ordering.
28//!
29//! ## Why this module is self-contained
30//!
31//! Validation does its own frontmatter split, YAML parse, wiki-link scan,
32//! log-header parse, and file walk here, reading only the two public,
33//! caller-populated fields of a [`Store`]: [`Store::root`] and
34//! [`Store::config`] — rather than routing through the sibling modules
35//! ([`crate::parser`], [`crate::store`], [`crate::log`], [`crate::index`]).
36//! Keeping the checks local lets the validator report precise, per-issue
37//! diagnostics (exact codes, file, and context) without coupling its output to
38//! incidental behavior of the shared readers; the public surface and the
39//! emitted issue vocabulary are the contract.
40
41use std::collections::{BTreeMap, BTreeSet, HashMap};
42use std::path::{Component, Path, PathBuf};
43
44use chrono::{DateTime, FixedOffset, NaiveDateTime};
45use serde_norway::Value;
46
47use crate::parser::{Schema, Shape};
48use crate::store::Store;
49
50/// Severity of a validation [`Issue`]. Any [`Severity::Error`] fails validation
51/// (non-zero exit); warnings and info do not.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum Severity {
54    /// Blocks: a hard violation of the format or doctrine.
55    Error,
56    /// A decision point the agent resolves at its discretion.
57    Warning,
58    /// Visibility only; never affects exit status.
59    Info,
60}
61
62/// A single structured validation finding. Agent-primary and machine-parseable
63/// via `--json`; `suggestion` is a deterministic remediation hint the agent
64/// applies without guessing.
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub struct Issue {
67    /// The severity; only [`Severity::Error`] fails validation.
68    pub severity: Severity,
69    /// The structured code, e.g. `"WIKI_LINK_SHORT_FORM"` — one of [`codes`].
70    pub code: &'static str,
71    /// The file the issue is about.
72    pub file: PathBuf,
73    /// The 1-based line, when applicable.
74    pub line: Option<u32>,
75    /// The frontmatter key, when the issue is about a specific field.
76    pub key: Option<String>,
77    /// A human-readable message.
78    pub message: String,
79    /// A deterministic remediation hint, when one exists.
80    pub suggestion: Option<String>,
81    /// Other files involved (e.g. the duplicate partner in a collision).
82    pub related: Vec<PathBuf>,
83}
84
85impl Issue {
86    /// True if this issue fails validation (i.e. its severity is
87    /// [`Severity::Error`]).
88    pub fn is_error(&self) -> bool {
89        matches!(self.severity, Severity::Error)
90    }
91}
92
93/// The canonical validation issue codes — one constant per row of the SPEC.md
94/// § Validation table. Call sites reference these instead of bare strings so
95/// the code and the SPEC table can never silently drift.
96pub mod codes {
97    /// path has no `DB.md`; not a db.md store.
98    pub const NOT_A_STORE: &str = "NOT_A_STORE";
99    /// the store's `DB.md` is not `type: db-md`.
100    pub const DB_MD_BAD_TYPE: &str = "DB_MD_BAD_TYPE";
101    /// the store's `DB.md` frontmatter lacks `scope` or `owner`.
102    pub const DB_MD_MISSING_FIELD: &str = "DB_MD_MISSING_FIELD";
103    /// `DB.md` has an `##` section other than the three recognized ones.
104    pub const DB_MD_UNKNOWN_SECTION: &str = "DB_MD_UNKNOWN_SECTION";
105    /// a `DB.md ## Schemas` field declaration is malformed (empty or duplicate
106    /// field name) or carries an unrecognized modifier.
107    pub const DB_MD_SCHEMA_FIELD: &str = "DB_MD_SCHEMA_FIELD";
108    /// content file has no `type:`.
109    pub const FM_MISSING_TYPE: &str = "FM_MISSING_TYPE";
110    /// content file has no `created:`.
111    pub const FM_MISSING_CREATED: &str = "FM_MISSING_CREATED";
112    /// content file has no `updated:`.
113    pub const FM_MISSING_UPDATED: &str = "FM_MISSING_UPDATED";
114    /// content file can't be read (not valid UTF-8, or an I/O error).
115    pub const FM_UNREADABLE: &str = "FM_UNREADABLE";
116    /// frontmatter block isn't valid YAML.
117    pub const FM_MALFORMED_YAML: &str = "FM_MALFORMED_YAML";
118    /// `created` or `updated` isn't ISO-8601.
119    pub const FM_BAD_TIMESTAMP: &str = "FM_BAD_TIMESTAMP";
120    /// content file has no `summary`.
121    pub const SUMMARY_MISSING: &str = "SUMMARY_MISSING";
122    /// `summary` present but empty.
123    pub const SUMMARY_EMPTY: &str = "SUMMARY_EMPTY";
124    /// `summary` contains newlines.
125    pub const SUMMARY_MULTILINE: &str = "SUMMARY_MULTILINE";
126    /// `summary` > 200 chars.
127    pub const SUMMARY_TOO_LONG: &str = "SUMMARY_TOO_LONG";
128    /// wiki-link target isn't a full store-relative path.
129    pub const WIKI_LINK_SHORT_FORM: &str = "WIKI_LINK_SHORT_FORM";
130    /// wiki-link target file doesn't exist.
131    pub const WIKI_LINK_BROKEN: &str = "WIKI_LINK_BROKEN";
132    /// wiki-link target matches multiple files (defensive).
133    pub const WIKI_LINK_AMBIGUOUS: &str = "WIKI_LINK_AMBIGUOUS";
134    /// wiki-link target carries a `.md` extension — drop it.
135    pub const WIKI_LINK_HAS_EXTENSION: &str = "WIKI_LINK_HAS_EXTENSION";
136    /// frontmatter list uses inline `[[[a]], [[b]]]` — use block form.
137    pub const WIKI_LINK_FLOW_FORM_LIST: &str = "WIKI_LINK_FLOW_FORM_LIST";
138    /// two files declare the same explicit `id`.
139    pub const DUP_ID: &str = "DUP_ID";
140    /// two records of a type collide on a `DB.md ## Schemas` `unique:` key.
141    pub const DUP_UNIQUE_KEY: &str = "DUP_UNIQUE_KEY";
142    /// a `DB.md` schema requires a field that's absent.
143    pub const SCHEMA_MISSING_REQUIRED: &str = "SCHEMA_MISSING_REQUIRED";
144    /// a value doesn't match the schema's shape modifier.
145    pub const SCHEMA_SHAPE_MISMATCH: &str = "SCHEMA_SHAPE_MISMATCH";
146    /// a `link to <prefix>/` field has a plain or wrong-prefix value.
147    pub const SCHEMA_LINK_PREFIX_MISMATCH: &str = "SCHEMA_LINK_PREFIX_MISMATCH";
148    /// a value isn't in the schema's `enum`.
149    pub const SCHEMA_ENUM_VIOLATION: &str = "SCHEMA_ENUM_VIOLATION";
150    /// a write was attempted on a `### Frozen pages` path (write-time).
151    pub const POLICY_FROZEN_PAGE: &str = "POLICY_FROZEN_PAGE";
152    /// a file with an `### Ignored types` type exists.
153    pub const POLICY_IGNORED_TYPE_PRESENT: &str = "POLICY_IGNORED_TYPE_PRESENT";
154    /// a `wiki-page` derives from an ignored-type record.
155    pub const POLICY_IGNORED_TYPE_DERIVED: &str = "POLICY_IGNORED_TYPE_DERIVED";
156    /// a `log.md` entry header timestamp is unparseable.
157    pub const LOG_BAD_TIMESTAMP: &str = "LOG_BAD_TIMESTAMP";
158    /// a `log.md` entry kind isn't recognized.
159    pub const LOG_UNKNOWN_KIND: &str = "LOG_UNKNOWN_KIND";
160    /// `log.md` entries aren't in non-decreasing time order (possible rewrite).
161    pub const LOG_OUT_OF_ORDER: &str = "LOG_OUT_OF_ORDER";
162    /// a non-empty canonical folder lacks `index.md`.
163    pub const INDEX_MISSING: &str = "INDEX_MISSING";
164    /// an `index.md` lists a file that no longer exists.
165    pub const INDEX_STALE_ENTRY: &str = "INDEX_STALE_ENTRY";
166    /// a file isn't listed in its folder's `index.md`.
167    pub const INDEX_MISSING_ENTRY: &str = "INDEX_MISSING_ENTRY";
168    /// an `index.md` sits in an empty / non-canonical folder.
169    pub const INDEX_ORPHAN: &str = "INDEX_ORPHAN";
170    /// an index's `scope:` doesn't match its filesystem location.
171    pub const INDEX_WRONG_SCOPE: &str = "INDEX_WRONG_SCOPE";
172    /// an index entry's text doesn't match the target file's `summary`.
173    pub const INDEX_SUMMARY_MISMATCH: &str = "INDEX_SUMMARY_MISMATCH";
174    /// a type-folder's `index.jsonl` twin is missing.
175    pub const INDEX_JSONL_MISSING: &str = "INDEX_JSONL_MISSING";
176    /// a file isn't in the `index.jsonl`, or a jsonl record points at a missing
177    /// file.
178    pub const INDEX_JSONL_DESYNC: &str = "INDEX_JSONL_DESYNC";
179    /// a `index.jsonl` record's fields don't match the file's frontmatter.
180    pub const INDEX_JSONL_STALE: &str = "INDEX_JSONL_STALE";
181    /// `tags` isn't a flat YAML list of short scalar labels.
182    pub const TAGS_MALFORMED: &str = "TAGS_MALFORMED";
183    /// a line in `assets.jsonl` is not a valid asset record.
184    pub const ASSET_MANIFEST_MALFORMED: &str = "ASSET_MANIFEST_MALFORMED";
185    /// a content file references an `asset`/`assets` path with no record in
186    /// `assets.jsonl` (run `dbmd assets scan`).
187    pub const ASSET_UNDECLARED: &str = "ASSET_UNDECLARED";
188    /// an `assets.jsonl` record names a wrapper file that does not exist.
189    pub const ASSET_WRAPPER_BROKEN: &str = "ASSET_WRAPPER_BROKEN";
190    /// an `assets.jsonl` record's path is referenced by no wrapper.
191    pub const ASSET_MANIFEST_ORPHAN: &str = "ASSET_MANIFEST_ORPHAN";
192    /// an `asset`/`assets` path points at a tracked markdown content file.
193    pub const ASSET_PATH_IS_CONTENT: &str = "ASSET_PATH_IS_CONTENT";
194}
195
196/// The SPEC's `summary` length bound (chars). Over it → `SUMMARY_TOO_LONG`.
197const MAX_SUMMARY_LEN: usize = 200;
198
199/// Recognized `log.md` entry kinds (SPEC § `log.md`). Anything else →
200/// `LOG_UNKNOWN_KIND` (warning, not error).
201const RECOGNIZED_LOG_KINDS: &[&str] = &[
202    "ingest",
203    "create",
204    "update",
205    "delete",
206    "rename",
207    "link",
208    "validate",
209    "index-rebuild",
210    "contradiction",
211];
212
213// ─────────────────────────────────────────────────────────────────────────────
214//  Public entrypoints
215// ─────────────────────────────────────────────────────────────────────────────
216
217/// **Loop default.** Validate the working set: content files changed since
218/// `since` (default: the last `validate` entry in `log.md`), plus any file whose
219/// wiki-links target a changed/renamed/removed path. Per-file *checks* only —
220/// none of the cross-file global passes (entity-dedup, every-index sync,
221/// `log.md` ordering) that `--all` adds. If the default call finds no logged
222/// changed objects, it falls back to a per-file content sweep so an externally
223/// edited or freshly copied store cannot pass vacuously.
224///
225/// **Cost.** The changed set is read from `log.md` — O(changed): every
226/// `create`/`update`/`ingest`/`rename`/`delete`/`link` entry newer than the
227/// cutoff names an object. Per-file frontmatter + link-doctrine checks then run
228/// over that set plus its incoming linkers — also O(changed). The one part that
229/// is *not* O(changed) is discovering those incoming linkers: a link to a
230/// changed path can live in the body or a typed frontmatter field of any file,
231/// so it is found by a **single** embedded-ripgrep pass over the store
232/// ([`Store::find_links_to_any`]) for the whole changed set at once — one store
233/// scan, flat in the changed-set size. (It was previously a full store read
234/// *per* changed object — `O(changed × store)`; that is the blow-up this path
235/// no longer pays.) The unavoidable single content scan is the same shape as
236/// free-text `dbmd search`; the sidecar `links` projection can't replace it
237/// because it omits body/typed-field edges.
238pub fn validate_working_set(
239    store: &Store,
240    since: Option<DateTime<FixedOffset>>,
241) -> crate::Result<Vec<Issue>> {
242    if !store_marker_present(store) {
243        return Ok(vec![not_a_store_issue(store)]);
244    }
245
246    let cutoff = match since {
247        Some(ts) => Some(ts),
248        None => last_validate_at(store),
249    };
250
251    // 1. Changed objects, straight from the log (O(changed) — never a walk).
252    let changed = changed_objects_since(store, cutoff);
253    if changed.is_empty() && since.is_none() {
254        return validate_content_sweep(store);
255    }
256
257    // 2. Add every file with an incoming wiki-link to a changed/renamed/removed
258    //    path (the linker may now be stale even though it didn't change). The
259    //    incoming-linker scan is `Store::find_links_to_any` — ONE embedded-ripgrep
260    //    pass over the store for the WHOLE changed set (one `.md` walk, one
261    //    presence-only/early-exit scan per file), not one walk per object. This
262    //    is the fix for the `O(changed × store)` blow-up that calling
263    //    `find_links_to` in a loop produced (a full store read per changed
264    //    object); the cost is now a single store scan regardless of how many
265    //    objects changed. A returned self-link is harmlessly deduped by the set
266    //    (the object is already inserted below).
267    let changed_targets: Vec<PathBuf> = changed.iter().cloned().collect();
268    let mut working: BTreeSet<PathBuf> = changed;
269    for linker in store.find_links_to_any(&changed_targets)? {
270        working.insert(linker);
271    }
272
273    let mut issues = Vec::new();
274    for rel in &working {
275        let abs = store.root.join(rel);
276        // A changed path can be a *deletion* — skip files that no longer exist;
277        // the incoming-linker scan above already flagged links into them.
278        if !abs.is_file() {
279            continue;
280        }
281        // `None` basename index: the working-set pass does not build the
282        // store-wide basename map (that is a `--all`-only structure), so a bare
283        // short-form target is reported as plain `WIKI_LINK_SHORT_FORM` and the
284        // `--all` sweep does the ambiguity upgrade.
285        check_content_file(store, rel, &abs, None, &mut issues);
286    }
287    issues.sort_by(issue_order);
288    Ok(issues)
289}
290
291fn validate_content_sweep(store: &Store) -> crate::Result<Vec<Issue>> {
292    let mut issues = Vec::new();
293    for rel in store.walk()? {
294        let abs = store.root.join(&rel);
295        check_content_file(store, &rel, &abs, None, &mut issues);
296    }
297    issues.sort_by(issue_order);
298    Ok(issues)
299}
300
301/// **Full SWEEP (O(store)).** Validate every file, every link, and every index,
302/// adding the cross-file checks that need global state: entity-dedup `DUP_*`,
303/// every-index sync (md + jsonl), and `log.md` ordering. CI / recovery, not the
304/// loop.
305pub fn validate_all(store: &Store) -> crate::Result<Vec<Issue>> {
306    if !store_marker_present(store) {
307        return Ok(vec![not_a_store_issue(store)]);
308    }
309
310    let mut issues = Vec::new();
311
312    // Store-identity file: `DB.md` shape (type / required fields / section
313    // headers). A single root file, checked once in the sweep — not a content
314    // file (it carries no `summary`), so it is not part of `walk_content_files`.
315    check_db_md(store, &mut issues);
316
317    let files = walk_content_files(&store.root);
318
319    // The basename index makes the short-form wiki-link check able to upgrade a
320    // bare-basename target to `WIKI_LINK_AMBIGUOUS` when it matches ≥2 files.
321    // Built once from the already-gathered sweep list (no extra walk); only the
322    // `--all` path has it (the working-set path stays O(changed)).
323    let basenames = build_basename_index(&files);
324
325    // Per-file checks over the whole store.
326    let mut parsed: Vec<(PathBuf, Parsed)> = Vec::new();
327    for rel in &files {
328        let abs = store.root.join(rel);
329        if let Some(p) = check_content_file(store, rel, &abs, Some(&basenames), &mut issues) {
330            parsed.push((rel.clone(), p));
331        }
332    }
333
334    // Cross-file: hard `id` + soft schema-declared `unique:` dedup collisions.
335    check_duplicates(store, &parsed, &mut issues);
336
337    // Cross-file: hierarchical index.md + index.jsonl sync.
338    check_indexes(store, &files, &mut issues);
339
340    // Cross-file: log.md well-formedness + ordering.
341    check_log(store, &mut issues);
342
343    // Cross-file: asset manifest (assets.jsonl) integrity against wrapper
344    // declarations. Text-only, no hashing, no byte reads — a SWEEP check like
345    // dedup. Byte presence/correctness is `dbmd assets verify`, not validate, so
346    // a fresh clone with no restored bytes still passes here.
347    check_assets(store, &parsed, &mut issues);
348
349    issues.sort_by(issue_order);
350    Ok(issues)
351}
352
353// ─────────────────────────────────────────────────────────────────────────────
354//  Per-file content checks (shared by both scopes)
355// ─────────────────────────────────────────────────────────────────────────────
356
357/// What `validate_all`'s cross-file pass needs from a per-file parse: the
358/// parsed YAML mapping (for dedup keys) and the raw frontmatter text (for
359/// text-based wiki-link extraction). The body and fence-line are consumed
360/// inline during the per-file pass and not carried here.
361struct Parsed {
362    /// The parsed top-level YAML mapping, keyed by string. `None` ⇒ malformed
363    /// YAML (a `FM_MALFORMED_YAML` was already emitted).
364    fm: Option<BTreeMap<String, Value>>,
365    /// The raw frontmatter YAML text (between the fences) — the source for
366    /// text-based wiki-link extraction in dedup.
367    fm_yaml: String,
368}
369
370/// Run every per-file check on one content file, pushing issues. Returns the
371/// parsed file so `validate_all` can reuse it for cross-file checks. Returns
372/// `None` only when the file is unreadable or has no frontmatter block at all
373/// (which for a content file is itself reported).
374fn check_content_file(
375    store: &Store,
376    rel: &Path,
377    abs: &Path,
378    basenames: Option<&BasenameIndex>,
379    issues: &mut Vec<Issue>,
380) -> Option<Parsed> {
381    let text = match std::fs::read_to_string(abs) {
382        Ok(t) => t,
383        Err(e) => {
384            // The file exists in the walk but can't be read as UTF-8 text
385            // (invalid bytes) or hit an I/O error. Returning `None` silently
386            // here let a store whose only content file was binary garbage pass
387            // `dbmd validate` with exit 0 — the exact vacuous-pass the fallback
388            // sweep exists to prevent. Report it so the agent gets an actionable
389            // diagnostic naming the unreadable file (and `index rebuild`, which
390            // hard-fails on the same file, isn't the only signal).
391            let detail = if e.kind() == std::io::ErrorKind::InvalidData {
392                "file is not valid UTF-8 text".to_string()
393            } else {
394                format!("file could not be read: {e}")
395            };
396            push(
397                issues,
398                Severity::Error,
399                codes::FM_UNREADABLE,
400                rel,
401                None,
402                None,
403                format!("content file is unreadable: {detail}"),
404                Some(
405                    "save the file as UTF-8 text, or remove it if it isn't a db.md content file"
406                        .into(),
407                ),
408                vec![],
409            );
410            return None;
411        }
412    };
413
414    let is_content = is_content_file(rel);
415
416    let (fm_yaml, body, fm_end_line) = match split_frontmatter(&text) {
417        Some(split) => split,
418        None => {
419            // No frontmatter at all. For a content file that means there's no
420            // `type:` and no `summary:` — report both the way a parsed-but-empty
421            // file would, so the agent gets the same actionable codes.
422            if is_content {
423                push(
424                    issues,
425                    Severity::Error,
426                    codes::FM_MISSING_TYPE,
427                    rel,
428                    None,
429                    Some("type".into()),
430                    "content file has no frontmatter `type:`".into(),
431                    Some("add a YAML frontmatter block with `type:`".into()),
432                    vec![],
433                );
434                push(
435                    issues,
436                    Severity::Error,
437                    codes::SUMMARY_MISSING,
438                    rel,
439                    None,
440                    Some("summary".into()),
441                    "content file has no `summary`".into(),
442                    Some("run `dbmd fm init`".into()),
443                    vec![],
444                );
445            }
446            return None;
447        }
448    };
449
450    // Parse the YAML block.
451    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
452        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
453        // An empty frontmatter block parses as Null; treat as an empty mapping.
454        Ok(Value::Null) => Some(BTreeMap::new()),
455        Ok(_) => {
456            // A scalar / sequence at the top level isn't a frontmatter mapping.
457            // Anchor to line 1 — the frontmatter block's opening `---`; the whole
458            // block is opaque, so there is no single offending field line.
459            push(
460                issues,
461                Severity::Error,
462                codes::FM_MALFORMED_YAML,
463                rel,
464                Some(1),
465                None,
466                "frontmatter is not a YAML mapping".into(),
467                Some("repair the frontmatter YAML mapping, then rerun `dbmd validate`".into()),
468                vec![],
469            );
470            None
471        }
472        Err(e) => {
473            // Anchor to line 1 (the opening `---`): an unparseable block has no
474            // single offending field line; the agent re-reads the whole block.
475            push(
476                issues,
477                Severity::Error,
478                codes::FM_MALFORMED_YAML,
479                rel,
480                Some(1),
481                None,
482                format!("frontmatter block isn't valid YAML: {e}"),
483                Some("repair the frontmatter YAML block, then rerun `dbmd validate`".into()),
484                vec![],
485            );
486            None
487        }
488    };
489
490    if let Some(map) = &fm {
491        // The detailed frontmatter checks only run when the YAML parsed.
492        check_frontmatter(store, rel, map, &fm_yaml, basenames, issues, is_content);
493    }
494
495    // Wiki-link doctrine checks run on the body of content files (and on
496    // `index.md` files, whose entries are wiki-links too). They are NOT run on
497    // the root append-only meta files `log.md`/`DB.md`: those reach this
498    // function only via the working-set incoming-linker scan (`walk_all_md`
499    // includes them), and `validate --all` never link-checks their bodies
500    // (`walk_content_files` skips them; `check_log`/`check_db_md` do no body
501    // link checks). Without this guard the two scopes disagree — a historical
502    // `[[deleted-page]]` mention in a `log.md` note, or a `[[…]]` in DB.md's
503    // `## Agent instructions`, is flagged `WIKI_LINK_BROKEN` by the default
504    // working set but is clean under `--all`. The log is append-only by spec, so
505    // the suggested "fix the link" remedy can't even be applied.
506    if !is_root_meta_file(rel) {
507        check_body_wiki_links(store, rel, &body, fm_end_line, basenames, issues);
508    }
509
510    Some(Parsed { fm, fm_yaml })
511}
512
513/// All frontmatter-level checks for a content file with valid YAML.
514fn check_frontmatter(
515    store: &Store,
516    rel: &Path,
517    fm: &BTreeMap<String, Value>,
518    fm_yaml: &str,
519    basenames: Option<&BasenameIndex>,
520    issues: &mut Vec<Issue>,
521    is_content: bool,
522) {
523    let type_ = fm.get("type").and_then(scalar_string);
524
525    // ── type ────────────────────────────────────────────────────────────────
526    if is_content && type_.is_none() {
527        push(
528            issues,
529            Severity::Error,
530            codes::FM_MISSING_TYPE,
531            rel,
532            fm_key_line_or_top(fm_yaml, "type"),
533            Some("type".into()),
534            "content file has no `type:`".into(),
535            Some("add a `type:` field (e.g. `type: contact`)".into()),
536            vec![],
537        );
538    }
539
540    // ── summary (universal on content files) ──────────────────────────────────
541    if is_content {
542        check_summary(rel, fm, fm_yaml, issues);
543    }
544
545    // ── timestamps: created / updated ─────────────────────────────────────────
546    // The `created`/`updated` contract is content-file-only; meta files
547    // (`DB.md`, `log.md`, index twins) legitimately carry no such timestamps.
548    if is_content {
549        for (key, missing_code) in [
550            ("created", codes::FM_MISSING_CREATED),
551            ("updated", codes::FM_MISSING_UPDATED),
552        ] {
553            // A key that is absent, or present-but-`null`, has *no* timestamp →
554            // `FM_MISSING_*`. The toolkit's parser also treats a null value as
555            // "no timestamp", so a null `created:` must read as missing, not
556            // silently pass.
557            let value = fm.get(key);
558            let missing = value.is_none() || value.is_some_and(Value::is_null);
559            if missing {
560                push(
561                    issues,
562                    Severity::Error,
563                    missing_code,
564                    rel,
565                    fm_key_line_or_top(fm_yaml, key),
566                    Some(key.into()),
567                    format!("content file has no `{key}:` timestamp"),
568                    Some(format!(
569                        "set `{key}` to an RFC3339 timestamp, e.g. 2026-05-27T08:00:00-07:00"
570                    )),
571                    vec![],
572                );
573            } else if let Some(v) = value {
574                // Present and non-null. A scalar is checked for ISO-8601; a
575                // sequence/mapping is not a timestamp string at all and so
576                // cannot be ISO-8601 → `FM_BAD_TIMESTAMP` (it must not slip
577                // through the way it did when `scalar_string` returned `None`
578                // and the branch silently no-oped).
579                match scalar_string(v) {
580                    Some(s) if is_iso8601(&s) => {}
581                    Some(s) => push(
582                        issues,
583                        Severity::Error,
584                        codes::FM_BAD_TIMESTAMP,
585                        rel,
586                        fm_key_line(fm_yaml, key),
587                        Some(key.into()),
588                        format!("`{key}` is not ISO-8601: {s:?}"),
589                        Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
590                        vec![],
591                    ),
592                    None => push(
593                        issues,
594                        Severity::Error,
595                        codes::FM_BAD_TIMESTAMP,
596                        rel,
597                        fm_key_line(fm_yaml, key),
598                        Some(key.into()),
599                        format!(
600                            "`{key}` is not ISO-8601: expected a timestamp string, found a list or mapping"
601                        ),
602                        Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
603                        vec![],
604                    ),
605                }
606            }
607        }
608    }
609    // ── tags shape ────────────────────────────────────────────────────────────
610    if let Some(tags) = fm.get("tags") {
611        if !is_flat_scalar_list(tags) {
612            push(
613                issues,
614                Severity::Warning,
615                codes::TAGS_MALFORMED,
616                rel,
617                fm_key_line(fm_yaml, "tags"),
618                Some("tags".into()),
619                "`tags` must be a flat YAML list of short scalar labels".into(),
620                Some("use block form: one `- <tag>` per line".into()),
621                vec![],
622            );
623        }
624    }
625
626    // ── inline flow-form wiki-link lists in frontmatter ──────────────────────
627    for key in detect_flow_form_link_lists(fm_yaml) {
628        push(
629            issues,
630            Severity::Error,
631            codes::WIKI_LINK_FLOW_FORM_LIST,
632            rel,
633            fm_key_line(fm_yaml, &key),
634            Some(key.clone()),
635            format!("`{key}` uses inline flow form `[[[a]], [[b]]]`"),
636            Some("use YAML block-sequence form: one `- [[...]]` per line".into()),
637            vec![],
638        );
639    }
640
641    // ── frontmatter wiki-link fields: doctrine + integrity ───────────────────
642    // Skip keys that have an explicit `link to` schema spec — those are checked
643    // (with prefix enforcement) in `check_schema`, and double-reporting the same
644    // link via two paths would be noise.
645    let schema_link_keys: BTreeSet<String> =
646        effective_schema(store, type_.as_deref().unwrap_or(""))
647            .map(|s| {
648                s.fields
649                    .iter()
650                    .filter(|f| f.link_prefix.is_some())
651                    .map(|f| f.name.clone())
652                    .collect()
653            })
654            .unwrap_or_default();
655    for (key, link) in frontmatter_link_fields_text(fm_yaml, 2) {
656        if schema_link_keys.contains(&key) {
657            continue;
658        }
659        check_wiki_link(
660            store,
661            rel,
662            &link,
663            Some(link.line),
664            Some(&key),
665            basenames,
666            issues,
667        );
668    }
669
670    // ── policies: ignored types ──────────────────────────────────────────────
671    if let Some(t) = &type_ {
672        if store.config.ignored_types.iter().any(|it| it == t) {
673            push(
674                issues,
675                Severity::Info,
676                codes::POLICY_IGNORED_TYPE_PRESENT,
677                rel,
678                fm_key_line(fm_yaml, "type"),
679                Some("type".into()),
680                format!("file has ignored type `{t}` (per DB.md ## Policies)"),
681                Some(
682                    "change the `type`, or remove it from DB.md `### Ignored types` if it should be managed"
683                        .into(),
684                ),
685                // The policy source: `DB.md` declares the ignored type.
686                vec![PathBuf::from("DB.md")],
687            );
688        }
689        // A wiki-page deriving from an ignored-type record → warning. The
690        // decision lives in the shared `derived_from_ignored_type` entry point;
691        // this side only supplies the `derived_from` targets (with their line,
692        // which the issue carries) and renders the finding.
693        for link in frontmatter_links_for_key(fm_yaml, "derived_from", 2) {
694            if let Some(hit) =
695                derived_from_ignored_type(store, t, std::iter::once(link.target.as_str()))
696            {
697                push(
698                    issues,
699                    Severity::Warning,
700                    codes::POLICY_IGNORED_TYPE_DERIVED,
701                    rel,
702                    Some(link.line),
703                    Some("derived_from".into()),
704                    format!(
705                        "wiki-page derives from ignored-type record `{}` (type `{}`)",
706                        hit.target, hit.target_type
707                    ),
708                    Some(
709                        "drop this `derived_from` link, or remove the target type from DB.md `### Ignored types`"
710                            .into(),
711                    ),
712                    // The ignored-type source record, plus `DB.md` (the policy
713                    // source that lists the ignored type).
714                    vec![
715                        PathBuf::from(format!("{}.md", hit.target)),
716                        PathBuf::from("DB.md"),
717                    ],
718                );
719            }
720        }
721    }
722
723    // ── schema enforcement: DB.md ## Schemas (the only schema source) ─────────
724    if let Some(t) = &type_ {
725        if let Some(schema) = effective_schema(store, t) {
726            check_schema(store, rel, fm, fm_yaml, &schema, issues);
727        }
728    }
729}
730
731/// `summary` rules: required, non-empty, single-line, ≤ 200 chars.
732fn check_summary(rel: &Path, fm: &BTreeMap<String, Value>, fm_yaml: &str, issues: &mut Vec<Issue>) {
733    let line = fm_key_line(fm_yaml, "summary");
734    match fm.get("summary") {
735        None => push(
736            issues,
737            Severity::Error,
738            codes::SUMMARY_MISSING,
739            rel,
740            // A missing `summary` key has no line of its own → anchor to the
741            // frontmatter block top (line 1), the EXPECTED field-absence rule.
742            fm_key_line_or_top(fm_yaml, "summary"),
743            Some("summary".into()),
744            "content file has no `summary`".into(),
745            Some("run `dbmd fm init`".into()),
746            vec![],
747        ),
748        Some(v) => {
749            let s = scalar_string(v).unwrap_or_default();
750            if s.trim().is_empty() {
751                push(
752                    issues,
753                    Severity::Error,
754                    codes::SUMMARY_EMPTY,
755                    rel,
756                    line,
757                    Some("summary".into()),
758                    "`summary` is present but empty".into(),
759                    Some("write a one-line summary, or run `dbmd fm init`".into()),
760                    vec![],
761                );
762            } else if s.contains('\n') {
763                push(
764                    issues,
765                    Severity::Error,
766                    codes::SUMMARY_MULTILINE,
767                    rel,
768                    line,
769                    Some("summary".into()),
770                    "`summary` must be one line (contains a newline)".into(),
771                    Some("collapse the summary to a single line".into()),
772                    vec![],
773                );
774            } else if s.chars().count() > MAX_SUMMARY_LEN {
775                push(
776                    issues,
777                    Severity::Warning,
778                    codes::SUMMARY_TOO_LONG,
779                    rel,
780                    line,
781                    Some("summary".into()),
782                    format!(
783                        "`summary` is {} chars (> {MAX_SUMMARY_LEN})",
784                        s.chars().count()
785                    ),
786                    Some(format!("trim the summary to ≤ {MAX_SUMMARY_LEN} chars")),
787                    vec![],
788                );
789            }
790        }
791    }
792}
793
794/// Wiki-link checks for a body. Per-link doctrine (`WIKI_LINK_*`).
795fn check_body_wiki_links(
796    store: &Store,
797    rel: &Path,
798    body: &str,
799    fm_end_line: u32,
800    basenames: Option<&BasenameIndex>,
801    issues: &mut Vec<Issue>,
802) {
803    for link in extract_wiki_links(body) {
804        // Body lines are offset past the frontmatter block. `link.line` is
805        // 1-based within `body`; the body starts at `fm_end_line + 1`.
806        let abs_line = fm_end_line + link.line;
807        check_wiki_link(store, rel, &link, Some(abs_line), None, basenames, issues);
808    }
809}
810
811/// A store-wide map from a file's bare basename (its stem, no `.md`) to every
812/// store-relative path carrying that basename. Built once per `validate --all`
813/// sweep so the short-form wiki-link check can distinguish a merely short-form
814/// target (`WIKI_LINK_SHORT_FORM`) from one that is *ambiguous* because the bare
815/// basename matches two or more files (`WIKI_LINK_AMBIGUOUS`, the defensive
816/// code). `None` in the working-set path — that loop is O(changed) and never
817/// walks the store, so it reports the plain short-form error without the scan.
818type BasenameIndex = HashMap<String, Vec<PathBuf>>;
819
820/// Build the [`BasenameIndex`] from the swept file list (already gathered by
821/// `validate_all`; no extra walk).
822fn build_basename_index(files: &[PathBuf]) -> BasenameIndex {
823    let mut idx: BasenameIndex = HashMap::new();
824    for rel in files {
825        if let Some(stem) = rel.file_stem().and_then(|s| s.to_str()) {
826            idx.entry(stem.to_string()).or_default().push(rel.clone());
827        }
828    }
829    idx
830}
831
832/// The shared per-wiki-link doctrine + integrity check used by both body links
833/// and frontmatter link-fields. `basenames` is `Some` only in the `--all`
834/// sweep, where a no-slash short-form target is upgraded to `WIKI_LINK_AMBIGUOUS`
835/// when its bare basename matches ≥2 files.
836fn check_wiki_link(
837    store: &Store,
838    rel: &Path,
839    link: &Link,
840    line: Option<u32>,
841    key: Option<&str>,
842    basenames: Option<&BasenameIndex>,
843    issues: &mut Vec<Issue>,
844) {
845    let bare = link.target.trim_end_matches(".md");
846
847    // Short-form: not a full store-relative path (no `/`, or first segment isn't
848    // a known layer).
849    if !is_full_store_path(bare) {
850        // Ambiguous (defensive) takes precedence over plain short-form when the
851        // target is a bare basename (no `/`) that matches ≥2 files in the store.
852        // Only computable in the sweep (where `basenames` is populated); the
853        // working-set path falls through to the plain short-form error.
854        if !bare.contains('/') {
855            if let Some(idx) = basenames {
856                if let Some(matches) = idx.get(bare) {
857                    if matches.len() >= 2 {
858                        let mut related = matches.clone();
859                        related.sort();
860                        push(
861                            issues,
862                            Severity::Error,
863                            codes::WIKI_LINK_AMBIGUOUS,
864                            rel,
865                            line,
866                            key.map(str::to_string),
867                            format!(
868                                "short-form wiki-link `[[{}]]` matches multiple files",
869                                link.target
870                            ),
871                            Some("use the full store-relative path to disambiguate".into()),
872                            related,
873                        );
874                        return;
875                    }
876                }
877            }
878        }
879        push(
880            issues,
881            Severity::Error,
882            codes::WIKI_LINK_SHORT_FORM,
883            rel,
884            line,
885            key.map(str::to_string),
886            format!(
887                "wiki-link `[[{}]]` is not a full store-relative path",
888                link.target
889            ),
890            short_form_suggestion(bare),
891            vec![],
892        );
893        // Don't also report broken; the agent must fix the form first.
894        return;
895    }
896
897    // `.md` extension → warning, then still check existence.
898    if link.target.ends_with(".md") {
899        push(
900            issues,
901            Severity::Warning,
902            codes::WIKI_LINK_HAS_EXTENSION,
903            rel,
904            line,
905            key.map(str::to_string),
906            format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
907            Some(format!("drop the extension: [[{bare}]]")),
908            vec![],
909        );
910    }
911
912    // Broken: target file doesn't exist (O(1) stat). Resolve the target the
913    // same way the graph engine does — the literal path first (so a link to a
914    // raw `.eml`/`.pdf` source kept verbatim under `sources/` resolves), then
915    // the `.md`-appended path.
916    match resolve_wiki_target(store, bare) {
917        TargetResolution::Exists => {}
918        TargetResolution::Missing => push(
919            issues,
920            Severity::Error,
921            codes::WIKI_LINK_BROKEN,
922            rel,
923            line,
924            key.map(str::to_string),
925            format!("wiki-link target `{bare}` doesn't exist"),
926            Some(format!(
927                "create `{bare}.md`, or point the link at an existing file"
928            )),
929            vec![],
930        ),
931        TargetResolution::Unsafe => push(
932            issues,
933            Severity::Error,
934            codes::WIKI_LINK_BROKEN,
935            rel,
936            line,
937            key.map(str::to_string),
938            format!("wiki-link target `{bare}` is not a safe store-relative path"),
939            Some("use a full store-relative path under sources/, records/, or wiki/".into()),
940            vec![],
941        ),
942    }
943}
944
945// ─────────────────────────────────────────────────────────────────────────────
946//  Schema enforcement (user-declared DB.md ## Schemas — the only source)
947// ─────────────────────────────────────────────────────────────────────────────
948
949/// The effective schema for a type: the store's explicit `DB.md ## Schemas`
950/// block, or `None`. This is the **only** source of schema enforcement — the
951/// toolkit ships no implicit or built-in per-type schema (SPEC § Schemas). A
952/// store that wants its `contact` / `expense` / etc. fields enforced declares
953/// them in `## Schemas`; the example schema pack in SPEC § Example types is a
954/// copy-in starting point.
955fn effective_schema(store: &Store, type_: &str) -> Option<Schema> {
956    store.config.schemas.get(type_).cloned()
957}
958
959/// Validate a file's frontmatter against a schema's [`FieldSpec`]s.
960fn check_schema(
961    store: &Store,
962    rel: &Path,
963    fm: &BTreeMap<String, Value>,
964    fm_yaml: &str,
965    schema: &Schema,
966    issues: &mut Vec<Issue>,
967) {
968    for spec in &schema.fields {
969        let present = fm.get(&spec.name);
970        let line = fm_key_line(fm_yaml, &spec.name);
971
972        // Required. "Empty" means: the key is absent, or its value carries no
973        // content — a YAML `null` (`name:`), an empty list (`name: []`), an
974        // empty mapping (`name: {}`), or a blank/whitespace-only scalar
975        // (`name: ""`). `scalar_string` returns `None` for null/list/mapping, so
976        // a bare `.unwrap_or(false)` wrongly treated those as non-empty and let
977        // a required field with a null or empty-collection value pass silently;
978        // route them through `is_empty_value` instead.
979        let is_empty = match present {
980            None => true,
981            Some(v) => is_empty_value(v),
982        };
983        if spec.required && is_empty {
984            push(
985                issues,
986                Severity::Error,
987                codes::SCHEMA_MISSING_REQUIRED,
988                rel,
989                // Absent key → anchor to the frontmatter top (line 1); a
990                // present-but-empty value keeps its own line.
991                fm_key_line_or_top(fm_yaml, &spec.name),
992                Some(spec.name.clone()),
993                format!("required field `{}` is absent or empty", spec.name),
994                Some(format!("set `{}` to a non-empty value", spec.name)),
995                vec![],
996            );
997            continue;
998        }
999        let Some(value) = present else { continue };
1000
1001        // An OPTIONAL field that is `null` or empty is simply unset — there is
1002        // no value to shape/enum/link-check. (The required+empty case already
1003        // returned above as `SCHEMA_MISSING_REQUIRED`.) Without this, an
1004        // `paid_at: null` on an `invoice` whose schema marks `paid_at (date)`
1005        // would wrongly fire `SCHEMA_SHAPE_MISMATCH` against the empty string.
1006        let value_empty = value.is_null()
1007            || scalar_string(value)
1008                .map(|s| s.trim().is_empty())
1009                .unwrap_or(false);
1010        if !spec.required && value_empty {
1011            continue;
1012        }
1013
1014        // link to <prefix>/ — extract the link target(s) from the raw frontmatter
1015        // text (unquoted `[[...]]` is a YAML nested-sequence, not a string).
1016        if let Some(prefix) = &spec.link_prefix {
1017            check_schema_link(store, rel, &spec.name, fm_yaml, prefix, line, issues);
1018            continue; // a link field is never also shape/enum-checked
1019        }
1020
1021        // A shape- or enum-constrained field expects a SCALAR. A YAML sequence
1022        // or mapping satisfies neither, and would otherwise slip through both
1023        // checks (`scalar_string` returns `None` for non-scalars, so the enum
1024        // and shape bodies silently no-op). Flag it as a shape mismatch rather
1025        // than let a structurally-wrong value validate clean. (Link fields,
1026        // which legitimately take block-form sequences, already `continue`d.)
1027        if (spec.shape.is_some() || spec.enum_values.is_some()) && scalar_string(value).is_none() {
1028            push(
1029                issues,
1030                Severity::Error,
1031                codes::SCHEMA_SHAPE_MISMATCH,
1032                rel,
1033                line,
1034                Some(spec.name.clone()),
1035                format!(
1036                    "`{}` must be a scalar value, found a list or mapping",
1037                    spec.name
1038                ),
1039                Some(format!("set `{}` to a single scalar value", spec.name)),
1040                vec![],
1041            );
1042            continue;
1043        }
1044
1045        // enum
1046        if let Some(allowed) = &spec.enum_values {
1047            if let Some(s) = scalar_string(value) {
1048                if !allowed.iter().any(|a| a == &s) {
1049                    push(
1050                        issues,
1051                        Severity::Error,
1052                        codes::SCHEMA_ENUM_VIOLATION,
1053                        rel,
1054                        line,
1055                        Some(spec.name.clone()),
1056                        format!("`{}` value {s:?} not in enum {allowed:?}", spec.name),
1057                        Some(format!("use one of: {}", allowed.join(", "))),
1058                        vec![],
1059                    );
1060                }
1061            }
1062            continue;
1063        }
1064
1065        // shape
1066        if let Some(shape) = spec.shape {
1067            check_schema_shape(rel, &spec.name, value, shape, line, issues);
1068        }
1069    }
1070}
1071
1072/// `link to <prefix>/` enforcement: the value must be a wiki-link whose target
1073/// starts with `<prefix>`. Reads the link target(s) from the raw frontmatter
1074/// text so unquoted `field: [[...]]` (a YAML nested-sequence, not a string) is
1075/// recognized exactly like the quoted form.
1076fn check_schema_link(
1077    store: &Store,
1078    rel: &Path,
1079    field: &str,
1080    fm_yaml: &str,
1081    prefix: &Path,
1082    line: Option<u32>,
1083    issues: &mut Vec<Issue>,
1084) {
1085    let prefix_str = prefix.to_string_lossy();
1086    let prefix_str = prefix_str.trim_end_matches('/');
1087    let suggestion = |target_leaf: &str| {
1088        Some(format!(
1089            "expected `link to {prefix_str}/`; replace with [[{prefix_str}/{target_leaf}]]"
1090        ))
1091    };
1092
1093    let links = frontmatter_links_for_key(fm_yaml, field, 2);
1094    if links.is_empty() {
1095        // No wiki-link in the field's value → it's a plain string.
1096        let raw = frontmatter_raw_value_for_key(fm_yaml, field, 2).unwrap_or_default();
1097        let raw = raw.trim().trim_matches('"').trim_matches('\'').trim();
1098        let leaf = slugish(raw);
1099        push(
1100            issues,
1101            Severity::Error,
1102            codes::SCHEMA_LINK_PREFIX_MISMATCH,
1103            rel,
1104            line,
1105            Some(field.to_string()),
1106            format!(
1107                "`{field}` is a plain string {raw:?}, expected a wiki-link under `{prefix_str}/`"
1108            ),
1109            suggestion(&leaf),
1110            vec![],
1111        );
1112        return;
1113    }
1114
1115    for link in links {
1116        if link.target.ends_with(".md") {
1117            let bare = link.target.trim_end_matches(".md");
1118            push(
1119                issues,
1120                Severity::Warning,
1121                codes::WIKI_LINK_HAS_EXTENSION,
1122                rel,
1123                Some(link.line),
1124                Some(field.to_string()),
1125                format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
1126                Some(format!("drop the extension: [[{bare}]]")),
1127                vec![],
1128            );
1129        }
1130        let bare = link.target.trim_end_matches(".md");
1131        if !path_under_prefix(bare, prefix_str) {
1132            let leaf = bare.rsplit('/').next().unwrap_or(bare);
1133            push(
1134                issues,
1135                Severity::Error,
1136                codes::SCHEMA_LINK_PREFIX_MISMATCH,
1137                rel,
1138                line,
1139                Some(field.to_string()),
1140                format!("`{field}` target `{bare}` is not under `{prefix_str}/`"),
1141                suggestion(leaf),
1142                vec![],
1143            );
1144        } else {
1145            // Correct prefix — still surface a broken target so the agent sees
1146            // one consistent vocabulary. Resolve like the graph engine (literal
1147            // path first, then `.md`) so a `link to sources/` field pointing at a
1148            // raw `.eml`/`.pdf` source isn't wrongly flagged broken.
1149            match resolve_wiki_target(store, bare) {
1150                TargetResolution::Exists => {}
1151                TargetResolution::Missing => push(
1152                    issues,
1153                    Severity::Error,
1154                    codes::WIKI_LINK_BROKEN,
1155                    rel,
1156                    line,
1157                    Some(field.to_string()),
1158                    format!("wiki-link target `{bare}` doesn't exist"),
1159                    Some(format!(
1160                        "create `{bare}.md`, or point the link at an existing file"
1161                    )),
1162                    vec![],
1163                ),
1164                TargetResolution::Unsafe => push(
1165                    issues,
1166                    Severity::Error,
1167                    codes::WIKI_LINK_BROKEN,
1168                    rel,
1169                    line,
1170                    Some(field.to_string()),
1171                    format!("wiki-link target `{bare}` is not a safe store-relative path"),
1172                    Some(
1173                        "use a full store-relative path under sources/, records/, or wiki/".into(),
1174                    ),
1175                    vec![],
1176                ),
1177            }
1178        }
1179    }
1180}
1181
1182/// Shape enforcement for a non-link, non-enum schema field.
1183fn check_schema_shape(
1184    rel: &Path,
1185    field: &str,
1186    value: &Value,
1187    shape: Shape,
1188    line: Option<u32>,
1189    issues: &mut Vec<Issue>,
1190) {
1191    let s = scalar_string(value).unwrap_or_default();
1192    let ok = match shape {
1193        Shape::String => true, // any scalar string
1194        Shape::Int => value.is_i64() || value.is_u64() || s.trim().parse::<i64>().is_ok(),
1195        Shape::Bool => value.is_bool() || matches!(s.trim(), "true" | "false"),
1196        Shape::Date => is_iso8601_date_or_datetime(&s),
1197        Shape::Email => is_email(&s),
1198        Shape::Currency => is_currency(&s),
1199        Shape::Url => is_url(&s),
1200    };
1201    if !ok {
1202        push(
1203            issues,
1204            Severity::Error,
1205            codes::SCHEMA_SHAPE_MISMATCH,
1206            rel,
1207            line,
1208            Some(field.to_string()),
1209            format!("`{field}` value {s:?} doesn't match shape {shape:?}"),
1210            Some(shape_suggestion(shape)),
1211            vec![],
1212        );
1213    }
1214}
1215
1216// ─────────────────────────────────────────────────────────────────────────────
1217//  Cross-file: entity-dedup collisions (validate_all only)
1218// ─────────────────────────────────────────────────────────────────────────────
1219
1220/// Hard `DUP_ID` + the soft, schema-declared `DUP_UNIQUE_KEY` collisions.
1221///
1222/// `DUP_ID` is universal (two files with the same explicit `id`).
1223/// `DUP_UNIQUE_KEY` is driven entirely by the store's `DB.md ## Schemas`: each
1224/// `- unique: <field>[, <field> …]` directive on a `### <type>` declares a
1225/// uniqueness constraint, and two records of that type whose declared values
1226/// collide warn. No type carries a built-in dedup key — the store opts in.
1227///
1228/// **Reporting precedence (rule #1 in `corpus-b-edges/EXPECTED/README.md`):** a
1229/// collision group of N files yields exactly ONE issue, not N. Its `file` is the
1230/// lexicographically smallest store-relative path in the group (a total order →
1231/// deterministic); `related` is the rest, sorted. A single-field key anchors to
1232/// that field's line on the reported file and carries it as `key`; a multi-field
1233/// key anchors to line 1 with a null key.
1234fn check_duplicates(store: &Store, parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
1235    // Path → frontmatter YAML, for resolving the anchor field's line on the
1236    // reported (smallest-path) member.
1237    let fm_yaml_of: HashMap<&PathBuf, &str> = parsed
1238        .iter()
1239        .map(|(rel, p)| (rel, p.fm_yaml.as_str()))
1240        .collect();
1241
1242    // ── DUP_ID (hard error): two files with the same explicit `id`. ──────────
1243    let mut by_id: HashMap<String, Vec<PathBuf>> = HashMap::new();
1244    for (rel, p) in parsed {
1245        if let Some(map) = &p.fm {
1246            if let Some(id) = map.get("id").and_then(scalar_string) {
1247                if !id.trim().is_empty() {
1248                    by_id.entry(id).or_default().push(rel.clone());
1249                }
1250            }
1251        }
1252    }
1253    for (id, files) in &by_id {
1254        if files.len() > 1 {
1255            let (reported, related) = canonical_and_related(files);
1256            let line = fm_yaml_of.get(&reported).and_then(|y| fm_key_line(y, "id"));
1257            push(
1258                issues,
1259                Severity::Error,
1260                codes::DUP_ID,
1261                &reported,
1262                line,
1263                Some("id".into()),
1264                format!("id {id:?} is declared by more than one file"),
1265                Some("give each file a unique `id` (or drop it to derive from the path)".into()),
1266                related,
1267            );
1268        }
1269    }
1270
1271    // ── DUP_UNIQUE_KEY (warning): schema-declared `unique:` collisions. ───────
1272    // Every constraint comes from the store's `## Schemas`; a type with no
1273    // `unique:` directive is never dedup-checked. Iteration over the BTreeMap is
1274    // key-ordered, so emitted issues are deterministic across runs.
1275    for (type_name, schema) in &store.config.schemas {
1276        for key_fields in &schema.unique_keys {
1277            soft_dup(parsed, issues, type_name, key_fields, &fm_yaml_of);
1278        }
1279    }
1280}
1281
1282/// Emit ONE `DUP_UNIQUE_KEY` warning per group of ≥2 files of `type_` whose
1283/// declared `key_fields` render to the same token tuple. Files missing any key
1284/// field are skipped — an incomplete key is never a collision.
1285///
1286/// Per reporting rule #1 the issue is keyed on the lexicographically smallest
1287/// store-relative path; `related` is the rest. A single-field key anchors to
1288/// that field's line on the reported file and carries it as `key`; a multi-field
1289/// key anchors to line 1 with a null key. `fm_yaml_of` resolves the field line.
1290fn soft_dup(
1291    parsed: &[(PathBuf, Parsed)],
1292    issues: &mut Vec<Issue>,
1293    type_: &str,
1294    key_fields: &[String],
1295    fm_yaml_of: &HashMap<&PathBuf, &str>,
1296) {
1297    if key_fields.is_empty() {
1298        return;
1299    }
1300    let mut groups: HashMap<Vec<String>, Vec<PathBuf>> = HashMap::new();
1301    for (rel, p) in parsed {
1302        let is_type =
1303            p.fm.as_ref()
1304                .and_then(|m| m.get("type"))
1305                .and_then(scalar_string)
1306                .map(|t| t == type_)
1307                .unwrap_or(false);
1308        if !is_type {
1309            continue;
1310        }
1311        if let Some(key) = dedup_key(p, key_fields) {
1312            groups.entry(key).or_default().push(rel.clone());
1313        }
1314    }
1315    // HashMap iteration is nondeterministic; sort by reported member so the
1316    // emitted issue order is stable across runs.
1317    let mut collisions: Vec<(PathBuf, Vec<PathBuf>)> = groups
1318        .values()
1319        .filter(|files| files.len() > 1)
1320        .map(|files| canonical_and_related(files))
1321        .collect();
1322    collisions.sort_by(|a, b| a.0.cmp(&b.0));
1323
1324    let fields_disp = key_fields.join(", ");
1325    for (reported, related) in collisions {
1326        // Single-field keys anchor to the field's line + carry the key; multi-
1327        // field keys anchor to line 1 with a null key.
1328        let (line, key) = if key_fields.len() == 1 {
1329            (
1330                fm_yaml_of
1331                    .get(&reported)
1332                    .and_then(|y| fm_key_line(y, &key_fields[0])),
1333                Some(key_fields[0].clone()),
1334            )
1335        } else {
1336            (Some(1), None)
1337        };
1338        let n = related.len();
1339        push(
1340            issues,
1341            Severity::Warning,
1342            codes::DUP_UNIQUE_KEY,
1343            &reported,
1344            line,
1345            key,
1346            format!("`{type_}` unique key ({fields_disp}) collides with {n} other record(s)"),
1347            Some("merge with `dbmd rename`, or cross-link with `dbmd link`".into()),
1348            related,
1349        );
1350    }
1351}
1352
1353/// Render a type's `unique:` key for one file: each field's dedup token in
1354/// order, or `None` if any field is absent/empty (an incomplete key never
1355/// collides).
1356fn dedup_key(p: &Parsed, key_fields: &[String]) -> Option<Vec<String>> {
1357    let mut out = Vec::with_capacity(key_fields.len());
1358    for f in key_fields {
1359        out.push(dedup_token(p, f)?);
1360    }
1361    Some(out)
1362}
1363
1364/// One field's normalized dedup token, or `None` when absent/empty. Wiki-link
1365/// values (single or block-sequence list) reduce to their lower-cased target
1366/// path(s); a list collapses to a sorted, de-duplicated set so item order never
1367/// matters. Plain scalars (and YAML scalar lists) lower-case and trim.
1368fn dedup_token(p: &Parsed, field: &str) -> Option<String> {
1369    // Wiki-links first — read from the raw frontmatter text so the unquoted
1370    // `field: [[...]]` (a YAML nested-sequence, not a string) is handled.
1371    let links = frontmatter_links_for_key(&p.fm_yaml, field, 2);
1372    if !links.is_empty() {
1373        let set: BTreeSet<String> = links
1374            .into_iter()
1375            .map(|l| l.target.trim_end_matches(".md").to_lowercase())
1376            .filter(|t| !t.is_empty())
1377            .collect();
1378        return if set.is_empty() {
1379            None
1380        } else {
1381            Some(set.into_iter().collect::<Vec<_>>().join(","))
1382        };
1383    }
1384    match p.fm.as_ref()?.get(field) {
1385        Some(Value::Sequence(items)) => {
1386            let set: BTreeSet<String> = items
1387                .iter()
1388                .filter_map(scalar_string)
1389                .map(|s| s.trim().to_lowercase())
1390                .filter(|t| !t.is_empty())
1391                .collect();
1392            if set.is_empty() {
1393                None
1394            } else {
1395                Some(set.into_iter().collect::<Vec<_>>().join(","))
1396            }
1397        }
1398        Some(v) => {
1399            let s = scalar_string(v)?.trim().to_lowercase();
1400            if s.is_empty() {
1401                None
1402            } else {
1403                Some(s)
1404            }
1405        }
1406        None => None,
1407    }
1408}
1409
1410/// Split a non-empty collision group into `(reported, related)`: the
1411/// lexicographically smallest store-relative path is the reported member; the
1412/// rest, sorted ascending, are `related`. Deterministic because store-relative
1413/// path is a total order — the property reporting rule #1 relies on.
1414fn canonical_and_related(files: &[PathBuf]) -> (PathBuf, Vec<PathBuf>) {
1415    let mut sorted = files.to_vec();
1416    sorted.sort();
1417    let reported = sorted[0].clone();
1418    let related = sorted[1..].to_vec();
1419    (reported, related)
1420}
1421
1422// ─────────────────────────────────────────────────────────────────────────────
1423//  Cross-file: hierarchical index.md + index.jsonl sync (validate_all only)
1424// ─────────────────────────────────────────────────────────────────────────────
1425
1426/// All `INDEX_*` and `INDEX_JSONL_*` checks across the three canonical levels.
1427fn check_indexes(store: &Store, files: &[PathBuf], issues: &mut Vec<Issue>) {
1428    // Group content files by their immediate parent folder (the type-folder,
1429    // *across date shards* — a sharded file's "type folder" is the folder right
1430    // under the layer). We key on the type-folder so shards roll up correctly.
1431    let mut type_folders: BTreeMap<PathBuf, Vec<PathBuf>> = BTreeMap::new();
1432    let mut layers_present: BTreeSet<&'static str> = BTreeSet::new();
1433    for rel in files {
1434        // The layer is the first path component — recorded independently of the
1435        // type-folder so a layer containing only loose files still requires an
1436        // `index.md`.
1437        if let Some(layer) = rel.iter().next().and_then(|s| s.to_str()) {
1438            match layer {
1439                "sources" => layers_present.insert("sources"),
1440                "records" => layers_present.insert("records"),
1441                "wiki" => layers_present.insert("wiki"),
1442                _ => false,
1443            };
1444        }
1445        if let Some(tf) = type_folder_of(rel) {
1446            type_folders.entry(tf).or_default().push(rel.clone());
1447        }
1448    }
1449
1450    // ── Root index.md ─────────────────────────────────────────────────────────
1451    if !files.is_empty() {
1452        let root_index = store.root.join("index.md");
1453        if !root_index.is_file() {
1454            push(
1455                issues,
1456                Severity::Error,
1457                codes::INDEX_MISSING,
1458                Path::new("index.md"),
1459                None,
1460                None,
1461                "store has files but no root `index.md`".into(),
1462                Some("run `dbmd index rebuild`".into()),
1463                vec![],
1464            );
1465        } else {
1466            check_index_scope(store, Path::new("index.md"), "root", None, issues);
1467        }
1468    }
1469
1470    // ── Layer index.md ────────────────────────────────────────────────────────
1471    for layer in &layers_present {
1472        let layer_index_rel = PathBuf::from(layer).join("index.md");
1473        let abs = store.root.join(&layer_index_rel);
1474        if !abs.is_file() {
1475            push(
1476                issues,
1477                Severity::Error,
1478                codes::INDEX_MISSING,
1479                &layer_index_rel,
1480                None,
1481                None,
1482                format!("layer `{layer}/` has files but no `index.md`"),
1483                Some("run `dbmd index rebuild`".into()),
1484                vec![],
1485            );
1486        } else {
1487            check_index_scope(store, &layer_index_rel, "layer", Some(layer), issues);
1488        }
1489    }
1490
1491    // ── Type-folder index.md + index.jsonl ───────────────────────────────────
1492    for (tf, members) in &type_folders {
1493        let index_md_rel = tf.join("index.md");
1494        let index_md_abs = store.root.join(&index_md_rel);
1495        let index_md_present = index_md_abs.is_file();
1496        if !index_md_present {
1497            // The whole folder index is absent → a single `INDEX_MISSING` keyed
1498            // on the FOLDER (not the would-be `index.md` path). When the index is
1499            // entirely missing we do NOT additionally evaluate per-entry
1500            // completeness or the `index.jsonl` twin: one `INDEX_MISSING` covers
1501            // the folder (precedence rule #4 in `corpus-b-edges/EXPECTED`).
1502            push(
1503                issues,
1504                Severity::Error,
1505                codes::INDEX_MISSING,
1506                tf,
1507                None,
1508                None,
1509                format!("non-empty folder `{}` has no index.md", tf.display()),
1510                Some(format!(
1511                    "run `dbmd index rebuild --folder {}`",
1512                    tf.display()
1513                )),
1514                vec![],
1515            );
1516            continue;
1517        }
1518
1519        check_index_scope(store, &index_md_rel, "type-folder", tf.to_str(), issues);
1520        check_type_folder_index_md(store, tf, &index_md_rel, members, issues);
1521
1522        // index.jsonl twin — must exist and be complete (uncapped). Only checked
1523        // when the `index.md` is present (above): a folder whose entire index is
1524        // missing is one `INDEX_MISSING`, not also an `INDEX_JSONL_MISSING`.
1525        let jsonl_rel = tf.join("index.jsonl");
1526        let jsonl_abs = store.root.join(&jsonl_rel);
1527        if !jsonl_abs.is_file() {
1528            push(
1529                issues,
1530                Severity::Error,
1531                codes::INDEX_JSONL_MISSING,
1532                &jsonl_rel,
1533                None,
1534                None,
1535                format!("type-folder `{}/` has no `index.jsonl` twin", tf.display()),
1536                Some("run `dbmd index rebuild`".into()),
1537                vec![],
1538            );
1539        } else {
1540            check_type_folder_index_jsonl(store, tf, &jsonl_rel, members, issues);
1541        }
1542    }
1543
1544    // ── Orphan index.md: an index file in a folder with no content. ──────────
1545    for rel in walk_index_files(&store.root) {
1546        let parent = rel.parent().unwrap_or(Path::new("")).to_path_buf();
1547        let parent_str = parent.to_string_lossy().to_string();
1548        let is_canonical = parent_str.is_empty() // root
1549            || matches!(parent_str.as_str(), "sources" | "records" | "wiki")
1550            || type_folders.contains_key(&parent);
1551        if !is_canonical {
1552            push(
1553                issues,
1554                Severity::Warning,
1555                codes::INDEX_ORPHAN,
1556                &rel,
1557                None,
1558                None,
1559                format!(
1560                    "`{}` sits in an empty or non-canonical folder",
1561                    rel.display()
1562                ),
1563                Some("remove it, or run `dbmd index rebuild`".into()),
1564                vec![],
1565            );
1566        }
1567    }
1568}
1569
1570/// Check a type-folder `index.md`'s entries against the folder's actual files:
1571/// stale entries (target gone), missing entries (file not listed), and
1572/// summary mismatches.
1573fn check_type_folder_index_md(
1574    store: &Store,
1575    tf: &Path,
1576    index_rel: &Path,
1577    members: &[PathBuf],
1578    issues: &mut Vec<Issue>,
1579) {
1580    let abs = store.root.join(index_rel);
1581    let Ok(text) = std::fs::read_to_string(&abs) else {
1582        return;
1583    };
1584    let entries = parse_index_entries(&text);
1585
1586    let listed: BTreeSet<PathBuf> = entries
1587        .iter()
1588        .map(|e| PathBuf::from(e.target.trim_end_matches(".md")))
1589        .collect();
1590
1591    // Stale entries + summary mismatch.
1592    for entry in &entries {
1593        let bare = entry.target.trim_end_matches(".md");
1594        // Resolve like the graph engine (literal path first, then `.md`) so an
1595        // index entry naming a raw `.eml`/`.pdf` source isn't reported stale.
1596        let target_abs = match resolved_target_abs(store, bare) {
1597            Some(abs) => abs,
1598            None => {
1599                if matches!(resolve_wiki_target(store, bare), TargetResolution::Unsafe) {
1600                    push(
1601                        issues,
1602                        Severity::Error,
1603                        codes::INDEX_STALE_ENTRY,
1604                        index_rel,
1605                        Some(entry.line),
1606                        None,
1607                        format!("index entry `[[{bare}]]` is not a safe store-relative path"),
1608                        Some("run `dbmd index rebuild`".into()),
1609                        vec![],
1610                    );
1611                } else {
1612                    push(
1613                        issues,
1614                        Severity::Error,
1615                        codes::INDEX_STALE_ENTRY,
1616                        index_rel,
1617                        Some(entry.line),
1618                        None,
1619                        format!("index entry `[[{bare}]]` points at a missing file"),
1620                        Some("run `dbmd index rebuild`".into()),
1621                        // The stale target the entry names (the file that no
1622                        // longer exists) — so the agent can locate the dangling
1623                        // reference.
1624                        vec![PathBuf::from(format!("{bare}.md"))],
1625                    );
1626                }
1627                continue;
1628            }
1629        };
1630        // Summary mismatch: the entry text must equal the file's `summary`. A
1631        // bare `- [[path]]` entry (no `— <text>`) when the file HAS a non-empty
1632        // summary is also a mismatch — the SPEC requires every type-folder index
1633        // entry to quote the file's `summary` (`- [[path]] — <summary>`), so a
1634        // missing quote can't validate clean just because there's nothing to
1635        // compare.
1636        if let Some(expected) = read_summary(&target_abs) {
1637            match &entry.summary_text {
1638                Some(text_part) if text_part.trim() != expected.trim() => {
1639                    push(
1640                        issues,
1641                        Severity::Error,
1642                        codes::INDEX_SUMMARY_MISMATCH,
1643                        index_rel,
1644                        Some(entry.line),
1645                        None,
1646                        format!("index entry for `{bare}` text doesn't match the file's `summary`"),
1647                        Some("run `dbmd index rebuild`".into()),
1648                        vec![PathBuf::from(format!("{bare}.md"))],
1649                    );
1650                }
1651                None if !expected.trim().is_empty() => {
1652                    push(
1653                        issues,
1654                        Severity::Error,
1655                        codes::INDEX_SUMMARY_MISMATCH,
1656                        index_rel,
1657                        Some(entry.line),
1658                        None,
1659                        format!("index entry for `{bare}` is missing its summary text (the file has a `summary`)"),
1660                        Some("run `dbmd index rebuild`".into()),
1661                        vec![PathBuf::from(format!("{bare}.md"))],
1662                    );
1663                }
1664                _ => {}
1665            }
1666        }
1667    }
1668
1669    // Missing entries: a member file not listed. Skip the index/log meta files.
1670    // The browse view caps at 500; only flag a missing entry when the folder is
1671    // under the cap (a capped folder legitimately omits older files).
1672    let content_members: Vec<&PathBuf> = members.iter().filter(|m| is_content_file(m)).collect();
1673    if content_members.len() <= 500 {
1674        for m in content_members {
1675            let bare = PathBuf::from(m.to_string_lossy().trim_end_matches(".md").to_string());
1676            if !listed.contains(&bare) {
1677                push(
1678                    issues,
1679                    Severity::Error,
1680                    codes::INDEX_MISSING_ENTRY,
1681                    index_rel,
1682                    None,
1683                    None,
1684                    format!(
1685                        "file `{}` is not listed in its folder's `index.md`",
1686                        m.display()
1687                    ),
1688                    Some("run `dbmd index rebuild`".into()),
1689                    vec![(*m).clone()],
1690                );
1691            }
1692        }
1693    }
1694    let _ = tf;
1695}
1696
1697/// Check a type-folder `index.jsonl` twin: it must list **every** file in the
1698/// folder (uncapped), every record must point at a real file, and each record's
1699/// fields must match the file's frontmatter.
1700fn check_type_folder_index_jsonl(
1701    store: &Store,
1702    tf: &Path,
1703    jsonl_rel: &Path,
1704    members: &[PathBuf],
1705    issues: &mut Vec<Issue>,
1706) {
1707    let abs = store.root.join(jsonl_rel);
1708    let Ok(text) = std::fs::read_to_string(&abs) else {
1709        return;
1710    };
1711
1712    // Parse records (last-write-wins by path), tolerating tombstones/blank lines.
1713    let mut records: BTreeMap<PathBuf, serde_json::Value> = BTreeMap::new();
1714    for (i, line) in text.lines().enumerate() {
1715        let line = line.trim();
1716        if line.is_empty() {
1717            continue;
1718        }
1719        let rec: serde_json::Value = match serde_json::from_str(line) {
1720            Ok(v) => v,
1721            Err(e) => {
1722                push(
1723                    issues,
1724                    Severity::Error,
1725                    codes::INDEX_JSONL_DESYNC,
1726                    jsonl_rel,
1727                    Some((i + 1) as u32),
1728                    None,
1729                    format!("`index.jsonl` line {} is not valid JSON: {e}", i + 1),
1730                    Some("run `dbmd index rebuild`".into()),
1731                    vec![],
1732                );
1733                continue;
1734            }
1735        };
1736        if let Some(path) = rec.get("path").and_then(|v| v.as_str()) {
1737            if !is_safe_store_relative_path(Path::new(path)) {
1738                push(
1739                    issues,
1740                    Severity::Error,
1741                    codes::INDEX_JSONL_DESYNC,
1742                    jsonl_rel,
1743                    Some((i + 1) as u32),
1744                    None,
1745                    format!("`index.jsonl` record path `{path}` is not a safe store-relative path"),
1746                    Some("run `dbmd index rebuild`".into()),
1747                    vec![],
1748                );
1749                continue;
1750            }
1751            records.insert(PathBuf::from(path), rec);
1752        }
1753    }
1754
1755    let member_set: BTreeSet<PathBuf> = members
1756        .iter()
1757        .filter(|m| is_content_file(m))
1758        .cloned()
1759        .collect();
1760
1761    // jsonl record → missing file = desync.
1762    for path in records.keys() {
1763        let target_abs = store.root.join(path);
1764        if !target_abs.is_file() {
1765            push(
1766                issues,
1767                Severity::Error,
1768                codes::INDEX_JSONL_DESYNC,
1769                jsonl_rel,
1770                None,
1771                None,
1772                format!(
1773                    "`index.jsonl` record points at missing file `{}`",
1774                    path.display()
1775                ),
1776                Some("run `dbmd index rebuild`".into()),
1777                vec![],
1778            );
1779        }
1780    }
1781
1782    // file not in jsonl = desync (the jsonl is the complete twin — no cap).
1783    for m in &member_set {
1784        if !records.contains_key(m) {
1785            push(
1786                issues,
1787                Severity::Error,
1788                codes::INDEX_JSONL_DESYNC,
1789                jsonl_rel,
1790                None,
1791                None,
1792                format!(
1793                    "file `{}` is missing from the complete `index.jsonl`",
1794                    m.display()
1795                ),
1796                Some("run `dbmd index rebuild`".into()),
1797                vec![m.clone()],
1798            );
1799        }
1800    }
1801
1802    // Record fields stale vs. frontmatter. SPEC § Validation defines
1803    // `INDEX_JSONL_STALE` as "an `index.jsonl` record's fields don't match the
1804    // file's frontmatter" — ANY field, not just `summary`/`type`. The query and
1805    // search paths read every field straight from these sidecars (`tags`,
1806    // `links`, `created`, `updated`, plus type-specific `email` / `domain` /
1807    // `company` / `amount` / `vendor` …), so a single field left unchecked lets
1808    // a stale value answer queries with data that exists in no `.md` file.
1809    //
1810    // Rather than re-list (and drift from) every projected key, rebuild the
1811    // record the canonical projection would write for this file
1812    // ([`IndexRecord::expected_from_file`], the same path `index rebuild` uses)
1813    // and diff the two as flat JSON maps. Every key the projection emits is
1814    // covered automatically; `path` is the join key and is skipped.
1815    for (path, rec) in &records {
1816        let target_abs = store.root.join(path);
1817        if !target_abs.is_file() {
1818            continue;
1819        }
1820        let Ok(expected) = crate::index::IndexRecord::expected_from_file(&target_abs, path.clone())
1821        else {
1822            continue; // unreadable / unparseable frontmatter is reported elsewhere
1823        };
1824        let Ok(expected_json) = serde_json::to_value(&expected) else {
1825            continue;
1826        };
1827        let (Some(have), Some(want)) = (rec.as_object(), expected_json.as_object()) else {
1828            continue;
1829        };
1830
1831        // Compare the union of keys present on either side; a key the file
1832        // projects but the sidecar omits is just as stale as a wrong value.
1833        let mut mismatched_keys: BTreeSet<&str> = BTreeSet::new();
1834        for key in have.keys().chain(want.keys()) {
1835            if key == "path" {
1836                continue;
1837            }
1838            if have.get(key) != want.get(key) {
1839                mismatched_keys.insert(key);
1840            }
1841        }
1842
1843        if !mismatched_keys.is_empty() {
1844            let keys: Vec<&str> = mismatched_keys.into_iter().collect();
1845            push(
1846                issues,
1847                Severity::Error,
1848                codes::INDEX_JSONL_STALE,
1849                jsonl_rel,
1850                None,
1851                Some(keys.join(",")),
1852                format!(
1853                    "`index.jsonl` record for `{}` is stale ({})",
1854                    path.display(),
1855                    keys.join(", ")
1856                ),
1857                Some("run `dbmd index rebuild`".into()),
1858                vec![path.clone()],
1859            );
1860        }
1861    }
1862    let _ = tf;
1863}
1864
1865/// Check an index's `scope:` frontmatter against its filesystem location.
1866fn check_index_scope(
1867    store: &Store,
1868    index_rel: &Path,
1869    expected_scope: &str,
1870    expected_folder: Option<&str>,
1871    issues: &mut Vec<Issue>,
1872) {
1873    let abs = store.root.join(index_rel);
1874    let Ok(text) = std::fs::read_to_string(&abs) else {
1875        return;
1876    };
1877    let Some((yaml, _, _)) = split_frontmatter(&text) else {
1878        return;
1879    };
1880    let Ok(Value::Mapping(map)) = serde_norway::from_str::<Value>(&yaml) else {
1881        return;
1882    };
1883    let fm = yaml_map_to_btree(&map);
1884
1885    if let Some(scope) = fm.get("scope").and_then(scalar_string) {
1886        // Accept "type-folder" and the SPEC example's looser "folder" alias.
1887        let scope_ok =
1888            scope == expected_scope || (expected_scope == "type-folder" && scope == "folder");
1889        if !scope_ok {
1890            push(
1891                issues,
1892                Severity::Warning,
1893                codes::INDEX_WRONG_SCOPE,
1894                index_rel,
1895                fm_key_line(&yaml, "scope"),
1896                Some("scope".into()),
1897                format!(
1898                    "index `scope: {scope}` doesn't match location (expected `{expected_scope}`)"
1899                ),
1900                Some(format!("set `scope: {expected_scope}`")),
1901                vec![],
1902            );
1903        }
1904    }
1905    // folder: must match for layer/type-folder indexes.
1906    if let Some(expected) = expected_folder {
1907        if let Some(folder) = fm.get("folder").and_then(scalar_string) {
1908            if folder.trim_end_matches('/') != expected.trim_end_matches('/') {
1909                push(
1910                    issues,
1911                    Severity::Warning,
1912                    codes::INDEX_WRONG_SCOPE,
1913                    index_rel,
1914                    fm_key_line(&yaml, "folder"),
1915                    Some("folder".into()),
1916                    format!("index `folder: {folder}` doesn't match location `{expected}`"),
1917                    Some(format!("set `folder: {expected}`")),
1918                    vec![],
1919                );
1920            }
1921        }
1922    }
1923}
1924
1925// ─────────────────────────────────────────────────────────────────────────────
1926//  Cross-file: log.md well-formedness + ordering (validate_all only)
1927// ─────────────────────────────────────────────────────────────────────────────
1928
1929/// `LOG_*` checks: bad timestamps, unknown kinds, out-of-order entries — across
1930/// the active `log.md` AND the rotated `log/<YYYY-MM>.md` archives.
1931///
1932/// [`Log::append`] rolls strictly-prior-month entries into `log/<YYYY-MM>.md`,
1933/// and `Log::tail`/`Log::since` deliberately read those archives back. If the
1934/// LOG_* checks read only the active file, an entry `validate --all` flagged
1935/// while it lived in `log.md` would stop being flagged the moment a newer-month
1936/// append rotated it into an archive — even though the log readers still surface
1937/// that exact entry to the curator. Scanning the archives too keeps validate and
1938/// the readers in agreement after a rotation.
1939///
1940/// Order: archives oldest-month first, then the active `log.md` last — the true
1941/// chronological timeline — so the out-of-order check threads `prev` across the
1942/// rotation boundary the same way it does within a single file.
1943fn check_log(store: &Store, issues: &mut Vec<Issue>) {
1944    let mut prev: Option<DateTime<FixedOffset>> = None;
1945    for rel in log_files_chronological(store) {
1946        check_log_file(store, &rel, &mut prev, issues);
1947    }
1948}
1949
1950/// The log files to scan, in chronological order: every `log/<YYYY-MM>.md`
1951/// archive oldest-month first, then the active `log.md` last. Missing files are
1952/// simply absent from the list.
1953fn log_files_chronological(store: &Store) -> Vec<PathBuf> {
1954    let mut files: Vec<PathBuf> = Vec::new();
1955    let archive_dir = store.root.join("log");
1956    if let Ok(entries) = std::fs::read_dir(&archive_dir) {
1957        let mut archives: Vec<PathBuf> = entries
1958            .flatten()
1959            .map(|e| e.path())
1960            .filter(|p| {
1961                p.is_file()
1962                    && p.file_name()
1963                        .and_then(|s| s.to_str())
1964                        .and_then(|n| n.strip_suffix(".md"))
1965                        .is_some_and(is_year_month_archive)
1966            })
1967            .filter_map(|p| p.strip_prefix(&store.root).ok().map(Path::to_path_buf))
1968            .collect();
1969        // `YYYY-MM` stems sort lexically == chronologically; oldest first.
1970        archives.sort();
1971        files.extend(archives);
1972    }
1973    // The active file holds the current month — newest, so it comes last.
1974    if store.root.join("log.md").is_file() {
1975        files.push(PathBuf::from("log.md"));
1976    }
1977    files
1978}
1979
1980/// Scan one log file's entry headers, threading the running `prev` timestamp so
1981/// the out-of-order check spans file (rotation) boundaries. Issues anchor to the
1982/// given store-relative path so an archived entry points at its archive file.
1983fn check_log_file(
1984    store: &Store,
1985    log_rel: &Path,
1986    prev: &mut Option<DateTime<FixedOffset>>,
1987    issues: &mut Vec<Issue>,
1988) {
1989    let abs = store.root.join(log_rel);
1990    let Ok(text) = std::fs::read_to_string(&abs) else {
1991        return;
1992    };
1993
1994    for (i, line) in text.lines().enumerate() {
1995        if !line.starts_with("## [") {
1996            continue;
1997        }
1998        let line_no = (i + 1) as u32;
1999        match parse_log_header(line) {
2000            None => push(
2001                issues,
2002                Severity::Error,
2003                codes::LOG_BAD_TIMESTAMP,
2004                log_rel,
2005                Some(line_no),
2006                None,
2007                format!("log entry header has an unparseable timestamp: {line:?}"),
2008                Some("use `## [YYYY-MM-DD HH:MM] <kind> | <object>`".into()),
2009                vec![],
2010            ),
2011            Some((ts, kind, _object)) => {
2012                if !RECOGNIZED_LOG_KINDS.contains(&kind.as_str()) {
2013                    push(
2014                        issues,
2015                        Severity::Warning,
2016                        codes::LOG_UNKNOWN_KIND,
2017                        log_rel,
2018                        Some(line_no),
2019                        None,
2020                        format!("log entry kind `{kind}` is not recognized"),
2021                        Some(format!("use one of: {}", RECOGNIZED_LOG_KINDS.join(", "))),
2022                        vec![],
2023                    );
2024                }
2025                if let Some(p) = *prev {
2026                    if ts < p {
2027                        push(
2028                            issues,
2029                            Severity::Warning,
2030                            codes::LOG_OUT_OF_ORDER,
2031                            log_rel,
2032                            Some(line_no),
2033                            None,
2034                            "log entry is older than the entry above it (possible rewrite)".into(),
2035                            Some("append corrective entries; never reorder past ones".into()),
2036                            vec![],
2037                        );
2038                    }
2039                }
2040                *prev = Some(ts);
2041            }
2042        }
2043    }
2044}
2045
2046// ─────────────────────────────────────────────────────────────────────────────
2047//  Self-contained primitives (collapse onto sibling modules once they land)
2048// ─────────────────────────────────────────────────────────────────────────────
2049
2050/// A minimal wiki-link found in a body: target, optional display, 1-based line.
2051#[derive(Debug)]
2052struct Link {
2053    target: String,
2054    line: u32,
2055}
2056
2057/// True if the store marker (`DB.md`, uppercase) is present at the root. On a
2058/// case-insensitive filesystem `db.md` would also match `DB.md`; we require the
2059/// exact-cased directory entry to be present.
2060fn store_marker_present(store: &Store) -> bool {
2061    let want = store.root.join("DB.md");
2062    if !want.is_file() {
2063        return false;
2064    }
2065    // Reject a case-folded match (`db.md`) on case-insensitive filesystems.
2066    match std::fs::read_dir(&store.root) {
2067        Ok(entries) => entries
2068            .flatten()
2069            .any(|e| e.file_name().to_str() == Some("DB.md")),
2070        Err(_) => true, // can't enumerate; trust the is_file() above
2071    }
2072}
2073
2074/// Validate the store's identity file, `DB.md`: its frontmatter `type:` must be
2075/// `db-md`, it must carry both `scope` and `owner`, and its body may contain
2076/// only the three recognized `##` sections (`Agent instructions`, `Policies`,
2077/// `Schemas`).
2078///
2079/// `DB.md` is not a content file (no `summary`), so it is checked here rather
2080/// than through `check_content_file`. The marker presence is established by the
2081/// caller (`store_marker_present`); a malformed-frontmatter `DB.md` still counts
2082/// as a store (the marker is the filename), so we report its shape rather than
2083/// `NOT_A_STORE`. Issues anchor to `DB.md` as the store-relative path.
2084fn check_db_md(store: &Store, issues: &mut Vec<Issue>) {
2085    let rel = Path::new("DB.md");
2086    let abs = store.root.join("DB.md");
2087    let Ok(text) = std::fs::read_to_string(&abs) else {
2088        return; // marker present but unreadable: nothing more to say.
2089    };
2090
2091    let Some((fm_yaml, body, fm_end_line)) = split_frontmatter(&text) else {
2092        // No frontmatter block at all → it cannot declare `type: db-md` and has
2093        // neither required field. Report the type and both missing fields,
2094        // anchored to line 1 (the would-be opening fence).
2095        push(
2096            issues,
2097            Severity::Error,
2098            codes::DB_MD_BAD_TYPE,
2099            rel,
2100            Some(1),
2101            Some("type".into()),
2102            "DB.md has no frontmatter; it must declare `type: db-md`".into(),
2103            Some("add a `---` frontmatter block with `type: db-md`".into()),
2104            vec![],
2105        );
2106        for field in ["scope", "owner"] {
2107            push(
2108                issues,
2109                Severity::Error,
2110                codes::DB_MD_MISSING_FIELD,
2111                rel,
2112                Some(1),
2113                Some(field.into()),
2114                format!("DB.md frontmatter is missing required field `{field}`"),
2115                Some(format!("add `{field}:` to the DB.md frontmatter")),
2116                vec![],
2117            );
2118        }
2119        return;
2120    };
2121
2122    // Parse the frontmatter mapping. If it doesn't parse, we can still say the
2123    // identity contract is unmet (no provable `type: db-md`, no provable fields).
2124    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
2125        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
2126        Ok(Value::Null) => Some(BTreeMap::new()),
2127        _ => None,
2128    };
2129
2130    match &fm {
2131        Some(map) => {
2132            // ── type: db-md ──────────────────────────────────────────────────
2133            let type_ = map.get("type").and_then(scalar_string);
2134            if type_.as_deref() != Some("db-md") {
2135                let (line, msg) = match &type_ {
2136                    Some(t) => (
2137                        fm_key_line(&fm_yaml, "type"),
2138                        format!("DB.md has `type: {t}`; a store's DB.md must be `type: db-md`"),
2139                    ),
2140                    None => (
2141                        Some(1),
2142                        "DB.md frontmatter has no `type:`; it must be `type: db-md`".to_string(),
2143                    ),
2144                };
2145                push(
2146                    issues,
2147                    Severity::Error,
2148                    codes::DB_MD_BAD_TYPE,
2149                    rel,
2150                    line,
2151                    Some("type".into()),
2152                    msg,
2153                    Some("set `type: db-md` in the DB.md frontmatter".into()),
2154                    vec![],
2155                );
2156            }
2157
2158            // ── required fields: scope + owner ───────────────────────────────
2159            for field in ["scope", "owner"] {
2160                let present = map
2161                    .get(field)
2162                    .and_then(scalar_string)
2163                    .map(|s| !s.trim().is_empty())
2164                    .unwrap_or(false);
2165                if !present {
2166                    push(
2167                        issues,
2168                        Severity::Error,
2169                        codes::DB_MD_MISSING_FIELD,
2170                        rel,
2171                        // A present-but-empty field anchors to its line; a fully
2172                        // absent one to the block top.
2173                        fm_key_line_or_top(&fm_yaml, field),
2174                        Some(field.into()),
2175                        format!("DB.md frontmatter is missing required field `{field}`"),
2176                        Some(format!("add `{field}:` to the DB.md frontmatter")),
2177                        vec![],
2178                    );
2179                }
2180            }
2181        }
2182        None => {
2183            // Unparseable frontmatter: the identity contract is unprovable. Emit
2184            // the type error and both field errors, anchored to the block top.
2185            push(
2186                issues,
2187                Severity::Error,
2188                codes::DB_MD_BAD_TYPE,
2189                rel,
2190                Some(1),
2191                Some("type".into()),
2192                "DB.md frontmatter isn't valid YAML; it must declare `type: db-md`".into(),
2193                Some("fix the DB.md frontmatter and set `type: db-md`".into()),
2194                vec![],
2195            );
2196            for field in ["scope", "owner"] {
2197                push(
2198                    issues,
2199                    Severity::Error,
2200                    codes::DB_MD_MISSING_FIELD,
2201                    rel,
2202                    Some(1),
2203                    Some(field.into()),
2204                    format!("DB.md frontmatter is missing required field `{field}`"),
2205                    Some(format!("add `{field}:` to the DB.md frontmatter")),
2206                    vec![],
2207                );
2208            }
2209        }
2210    }
2211
2212    // ── recognized `##` section headers only ─────────────────────────────────
2213    // The body's H2 headings must be one of the three the toolkit reads; any
2214    // other is a likely typo / misplacement (warning — the parser ignores it,
2215    // so the config is not corrupted, but the operator wrote a section that will
2216    // never be read). H3 sub-headings (Frozen pages, Ignored types, `### <type>`
2217    // schema blocks) live under their H2 and are not flagged here.
2218    for section in crate::parser::extract_sections(&body) {
2219        if section.level != 2 {
2220            continue;
2221        }
2222        let name = section.heading.trim().to_ascii_lowercase();
2223        if matches!(name.as_str(), "agent instructions" | "policies" | "schemas") {
2224            continue;
2225        }
2226        // `Section::line` is 1-based within the body; the body begins at file
2227        // line `fm_end_line + 1`.
2228        let file_line = fm_end_line + section.line;
2229        push(
2230            issues,
2231            Severity::Warning,
2232            codes::DB_MD_UNKNOWN_SECTION,
2233            rel,
2234            Some(file_line),
2235            None,
2236            format!(
2237                "DB.md has an unrecognized `## {}` section",
2238                section.heading.trim()
2239            ),
2240            Some(
2241                "DB.md sections are `## Agent instructions`, `## Policies`, `## Schemas` — \
2242                 remove or rename this heading"
2243                    .into(),
2244            ),
2245            vec![],
2246        );
2247    }
2248
2249    // ── `## Schemas` field-declaration lint ──────────────────────────────────
2250    // Without this, every schema misparse is silent: the operator/agent gets no
2251    // signal that DB.md is interpreting their schema differently from what they
2252    // wrote, and downstream records are validated against the degraded schema.
2253    check_db_md_schemas(store, rel, &body, fm_end_line, issues);
2254}
2255
2256/// Lint the parsed `## Schemas` field declarations: an empty field name, a
2257/// duplicate field name within a type, or an unrecognized modifier all parse
2258/// "successfully" into a degraded [`Schema`] today, so a bad declaration never
2259/// surfaces. The parsed schemas live in `store.config.schemas` (directives
2260/// already separated out); this pass reports the suspicious *field* shapes,
2261/// anchored to the `### <type>` heading line so the agent can find the block.
2262fn check_db_md_schemas(
2263    store: &Store,
2264    rel: &Path,
2265    body: &str,
2266    fm_end_line: u32,
2267    issues: &mut Vec<Issue>,
2268) {
2269    if store.config.schemas.is_empty() {
2270        return;
2271    }
2272
2273    // Map each `### <type>` heading (under `## Schemas`) to its file line, so a
2274    // per-type issue can anchor to the declaration block. `extract_sections`
2275    // returns a flat list with 1-based body lines; the body starts at file line
2276    // `fm_end_line + 1`.
2277    let mut type_line: BTreeMap<String, u32> = BTreeMap::new();
2278    let mut current_h2: Option<String> = None;
2279    for section in crate::parser::extract_sections(body) {
2280        match section.level {
2281            2 => current_h2 = Some(section.heading.trim().to_ascii_lowercase()),
2282            3 if current_h2.as_deref() == Some("schemas") => {
2283                // The H3 heading text (as written) is the type name — the same
2284                // key `parse_db_md` inserts into `config.schemas`.
2285                type_line
2286                    .entry(section.heading.trim().to_string())
2287                    .or_insert(fm_end_line + section.line);
2288            }
2289            _ => {}
2290        }
2291    }
2292
2293    for (type_name, schema) in &store.config.schemas {
2294        let line = type_line.get(type_name).copied();
2295        let mut seen: BTreeSet<String> = BTreeSet::new();
2296        for field in &schema.fields {
2297            let name = field.name.trim();
2298
2299            // Empty field name: a `- (string)` / bare `- ` bullet parses to a
2300            // nameless field that can never match a frontmatter key, so its
2301            // required/shape/enum constraints silently never apply.
2302            if name.is_empty() {
2303                push(
2304                    issues,
2305                    Severity::Warning,
2306                    codes::DB_MD_SCHEMA_FIELD,
2307                    rel,
2308                    line,
2309                    None,
2310                    format!("`### {type_name}` has a schema field bullet with no field name"),
2311                    Some(
2312                        "write each field as `- <name> (<modifiers>)`, e.g. `- email (required, email)`"
2313                            .into(),
2314                    ),
2315                    vec![],
2316                );
2317                continue;
2318            }
2319
2320            // Duplicate field name within a type: the second declaration's
2321            // constraints are interpreted independently of the first, so the
2322            // author's intent is ambiguous and likely wrong.
2323            if !seen.insert(name.to_string()) {
2324                push(
2325                    issues,
2326                    Severity::Warning,
2327                    codes::DB_MD_SCHEMA_FIELD,
2328                    rel,
2329                    line,
2330                    Some(name.to_string()),
2331                    format!("`### {type_name}` declares field `{name}` more than once"),
2332                    Some(
2333                        "remove the duplicate field bullet, or merge the modifiers onto one".into(),
2334                    ),
2335                    vec![],
2336                );
2337            }
2338
2339            // Unrecognized modifiers: the parser stashes anything outside the
2340            // known vocabulary (`required` / a shape / `link to …` / `default …`
2341            // / `enum: …`) in `unknown_modifiers`. Surface them as Info so a
2342            // typo'd modifier (`requierd`, `unqiue`) doesn't silently do nothing.
2343            for modifier in &field.unknown_modifiers {
2344                let modifier = modifier.trim();
2345                if modifier.is_empty() {
2346                    continue;
2347                }
2348                push(
2349                    issues,
2350                    Severity::Info,
2351                    codes::DB_MD_SCHEMA_FIELD,
2352                    rel,
2353                    line,
2354                    Some(name.to_string()),
2355                    format!(
2356                        "`### {type_name}` field `{name}` has an unrecognized modifier `{modifier}`"
2357                    ),
2358                    Some(
2359                        "recognized modifiers are `required`, a shape (`string`/`int`/`bool`/`date`/`email`/`currency`/`url`), `link to <prefix>/`, `default <value>`, `enum: <v1>, <v2>, …`"
2360                            .into(),
2361                    ),
2362                    vec![],
2363                );
2364            }
2365        }
2366    }
2367}
2368
2369/// The `NOT_A_STORE` issue for a root with no `DB.md`.
2370fn not_a_store_issue(store: &Store) -> Issue {
2371    Issue {
2372        severity: Severity::Error,
2373        code: codes::NOT_A_STORE,
2374        file: store.root.clone(),
2375        line: None,
2376        key: None,
2377        message: format!("{} has no DB.md; not a db.md store", store.root.display()),
2378        suggestion: Some("create a `DB.md` at the store root".into()),
2379        related: vec![],
2380    }
2381}
2382
2383/// True if a store-relative path is a content file: under `sources/`,
2384/// `records/`, or `wiki/` and not an `index.md`/`index.jsonl`/`log.md`.
2385fn is_content_file(rel: &Path) -> bool {
2386    let Some(first) = rel.iter().next().and_then(|s| s.to_str()) else {
2387        return false;
2388    };
2389    if !matches!(first, "sources" | "records" | "wiki") {
2390        return false;
2391    }
2392    let name = rel.file_name().and_then(|s| s.to_str()).unwrap_or("");
2393    // Only the derived catalog twins are meta INSIDE a layer. `DB.md` / `log.md`
2394    // are reserved meta only at the store ROOT, which the `first` layer check
2395    // above already excludes — so a content file named `log.md` / `DB.md` inside
2396    // a layer (e.g. `records/docs/log.md`) is real content, consistent with
2397    // `Store::walk`.
2398    if matches!(name, "index.md" | "index.jsonl") {
2399        return false;
2400    }
2401    name.ends_with(".md")
2402}
2403
2404/// True for the store's ROOT append-only meta files (`DB.md` / `log.md`): a
2405/// single-component store-relative path whose name is one of those two. An
2406/// in-layer `records/docs/log.md` is real content (multiple components), not a
2407/// root meta file. These reach `check_content_file` only via the working-set
2408/// incoming-linker scan; their bodies are deliberately not link-checked there
2409/// because `validate --all` doesn't link-check them either.
2410fn is_root_meta_file(rel: &Path) -> bool {
2411    let mut comps = rel.components();
2412    let Some(Component::Normal(only)) = comps.next() else {
2413        return false;
2414    };
2415    if comps.next().is_some() {
2416        return false; // has a parent dir → not a root file
2417    }
2418    matches!(only.to_str(), Some("DB.md") | Some("log.md"))
2419}
2420
2421/// Split a file into `(frontmatter_yaml, body, closing_fence_line)`. The block
2422/// must start at the very first line with `---` and end at the next `---`.
2423/// Returns `None` if there's no leading frontmatter block.
2424fn split_frontmatter(text: &str) -> Option<(String, String, u32)> {
2425    // Tolerate a single leading UTF-8 BOM, matching parser/store/index (which
2426    // already strip it). Without this, a BOM-prefixed file is read as having no
2427    // frontmatter here while the catalog still indexes it — so validate would
2428    // silently skip frontmatter checks on a file the rest of the toolkit sees.
2429    let text = text.strip_prefix('\u{feff}').unwrap_or(text);
2430    let mut lines = text.lines();
2431    let first = lines.next()?;
2432    if first.trim_end() != "---" {
2433        return None;
2434    }
2435    let mut yaml = String::new();
2436    let mut close_line: Option<u32> = None;
2437    // line 1 is the opening fence; YAML starts at line 2.
2438    let mut current = 1u32;
2439    for line in lines {
2440        current += 1;
2441        if line.trim_end() == "---" {
2442            close_line = Some(current);
2443            break;
2444        }
2445        yaml.push_str(line);
2446        yaml.push('\n');
2447    }
2448    let close_line = close_line?;
2449    // Body = everything after the closing fence.
2450    let body: String = text
2451        .lines()
2452        .skip(close_line as usize)
2453        .collect::<Vec<_>>()
2454        .join("\n");
2455    Some((yaml, body, close_line))
2456}
2457
2458/// Read just the `summary` field of a file, or `None` if absent/unparseable.
2459fn read_summary(abs: &Path) -> Option<String> {
2460    let text = std::fs::read_to_string(abs).ok()?;
2461    let (yaml, _, _) = split_frontmatter(&text)?;
2462    let value: Value = serde_norway::from_str(&yaml).ok()?;
2463    if let Value::Mapping(m) = value {
2464        m.get(Value::String("summary".into()))
2465            .and_then(scalar_string)
2466    } else {
2467        None
2468    }
2469}
2470
2471/// Convert a `serde_norway` mapping into a string-keyed [`BTreeMap`], dropping
2472/// non-string keys (frontmatter keys are always strings).
2473fn yaml_map_to_btree(map: &serde_norway::Mapping) -> BTreeMap<String, Value> {
2474    let mut out = BTreeMap::new();
2475    for (k, v) in map {
2476        if let Value::String(s) = k {
2477            out.insert(s.clone(), v.clone());
2478        }
2479    }
2480    out
2481}
2482
2483/// A scalar YAML value as a string (`String`/`Number`/`Bool`); `None` for
2484/// sequences/mappings/null.
2485fn scalar_string(v: &Value) -> Option<String> {
2486    match v {
2487        Value::String(s) => Some(s.clone()),
2488        Value::Number(n) => Some(n.to_string()),
2489        Value::Bool(b) => Some(b.to_string()),
2490        _ => None,
2491    }
2492}
2493
2494/// True if a frontmatter value carries no content for a *required*-field check:
2495/// a YAML `null` (`name:`), an empty sequence (`name: []`), an empty mapping
2496/// (`name: {}`), or a blank/whitespace-only scalar (`name: ""`). A non-empty
2497/// list or mapping is NOT treated as empty here — a structurally-wrong value on
2498/// a shape/enum field is caught by the later non-scalar shape check, not by the
2499/// required-presence check.
2500fn is_empty_value(v: &Value) -> bool {
2501    match v {
2502        Value::Null => true,
2503        Value::Sequence(items) => items.is_empty(),
2504        Value::Mapping(map) => map.is_empty(),
2505        other => scalar_string(other)
2506            .map(|s| s.trim().is_empty())
2507            .unwrap_or(true),
2508    }
2509}
2510
2511/// True if `tags` is a flat YAML sequence of scalars. A mapping, a scalar, or a
2512/// sequence containing a nested sequence/mapping → false (`TAGS_MALFORMED`).
2513fn is_flat_scalar_list(v: &Value) -> bool {
2514    match v {
2515        Value::Sequence(items) => items.iter().all(|it| scalar_string(it).is_some()),
2516        _ => false,
2517    }
2518}
2519
2520/// Extract every frontmatter wiki-link, returning `(key, Link)` pairs with the
2521/// link's 1-based file line. **Text-based, by necessity:** an unquoted
2522/// `company: [[records/companies/x]]` parses in YAML as a nested *sequence*, not
2523/// a string (because `[[x]]` is YAML flow-list-in-a-list); a quoted
2524/// `"[[...]]"` parses as a string. Scanning the raw frontmatter text catches
2525/// both forms uniformly, the way the link textually appears — the doctrine view.
2526///
2527/// `fm_start_line` is the file line of the first YAML line (file line 2, since
2528/// line 1 is the opening `---`), so the returned `Link::line` is absolute.
2529fn frontmatter_link_fields_text(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, Link)> {
2530    let mut out = Vec::new();
2531    for (key, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2532        for link in links {
2533            out.push((key.clone(), link));
2534        }
2535    }
2536    out
2537}
2538
2539/// The wiki-link targets declared under a single top-level frontmatter key
2540/// (text-based; handles quoted + unquoted forms). Empty if the key is absent or
2541/// carries no `[[...]]`.
2542fn frontmatter_links_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Vec<Link> {
2543    for (k, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2544        if k == key {
2545            return links;
2546        }
2547    }
2548    Vec::new()
2549}
2550
2551/// The raw value text under a single top-level frontmatter key (the remainder of
2552/// the key line plus any indented continuation/sequence lines), trimmed. Used to
2553/// decide whether a `link to` field holds a plain string vs. a wiki-link.
2554fn frontmatter_raw_value_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Option<String> {
2555    for (k, value_text, _links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2556        if k == key {
2557            return Some(value_text);
2558        }
2559    }
2560    None
2561}
2562
2563/// Split a frontmatter YAML block into `(key, raw_value_text, wiki_links)` for
2564/// each top-level key. A top-level key is a line with no leading indentation in
2565/// `name:` form; its value spans the rest of that line plus any deeper-indented
2566/// continuation lines (block scalars, block sequences) until the next top-level
2567/// key. Wiki-links are every `[[...]]` found anywhere in that span, with their
2568/// absolute file line.
2569fn frontmatter_key_blocks(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, String, Vec<Link>)> {
2570    let mut blocks: Vec<(String, String, Vec<Link>)> = Vec::new();
2571    let mut current: Option<(String, String, Vec<Link>)> = None;
2572
2573    for (idx, raw_line) in fm_yaml.lines().enumerate() {
2574        let file_line = fm_start_line + idx as u32;
2575        let indented = raw_line.starts_with(' ') || raw_line.starts_with('\t');
2576        let trimmed = raw_line.trim();
2577
2578        // A new top-level key: no indentation, `name:` prefix, not a list dash or
2579        // comment. (Indented or dash lines belong to the current key's value.)
2580        let new_key = if !indented && !trimmed.starts_with('#') && !trimmed.starts_with('-') {
2581            top_level_key(raw_line)
2582        } else {
2583            None
2584        };
2585
2586        if let Some((key, after)) = new_key {
2587            if let Some(done) = current.take() {
2588                blocks.push(done);
2589            }
2590            let mut links = Vec::new();
2591            collect_line_links(after, file_line, &mut links);
2592            current = Some((key, after.trim().to_string(), links));
2593        } else if let Some((_k, value_text, links)) = current.as_mut() {
2594            // Continuation of the current key's value (indented or dash line).
2595            if !value_text.is_empty() {
2596                value_text.push('\n');
2597            }
2598            value_text.push_str(trimmed);
2599            collect_line_links(raw_line, file_line, links);
2600        }
2601    }
2602    if let Some(done) = current.take() {
2603        blocks.push(done);
2604    }
2605    blocks
2606}
2607
2608/// Parse a top-level frontmatter key line into `(key, value_after_colon)`.
2609/// `None` if the line isn't a `name:` mapping entry.
2610fn top_level_key(line: &str) -> Option<(String, &str)> {
2611    let (key, rest) = line.split_once(':')?;
2612    let key = key.trim();
2613    if key.is_empty()
2614        || !key
2615            .chars()
2616            .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
2617    {
2618        return None;
2619    }
2620    Some((key.to_string(), rest))
2621}
2622
2623/// Append every `[[target]]` / `[[target|display]]` found in `s` to `links`,
2624/// each tagged with `file_line`.
2625fn collect_line_links(s: &str, file_line: u32, links: &mut Vec<Link>) {
2626    let bytes = s.as_bytes();
2627    let mut i = 0;
2628    while i + 1 < bytes.len() {
2629        if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2630            if let Some(close) = s[i + 2..].find("]]") {
2631                let inner = &s[i + 2..i + 2 + close];
2632                // Guard against `[[[` (nested) double-counting: the inner must
2633                // not itself open another `[[`.
2634                let target = inner
2635                    .trim_start_matches('[')
2636                    .split('|')
2637                    .next()
2638                    .unwrap_or(inner)
2639                    .trim()
2640                    .to_string();
2641                if !target.is_empty() {
2642                    links.push(Link {
2643                        target,
2644                        line: file_line,
2645                    });
2646                }
2647                i = i + 2 + close + 2;
2648                continue;
2649            }
2650        }
2651        i += 1;
2652    }
2653}
2654
2655/// Extract every `[[...]]` wiki-link from a body, with 1-based line numbers.
2656/// Skips fenced code blocks, so example links in docs don't trip the validator.
2657///
2658/// Fence tracking matches the toolkit's parser ([`crate::parser`]'s
2659/// `extract_sections`): an open fence is `(fence char, run length)` and closes
2660/// only on a line that is the **same** fence character with a run **at least as
2661/// long**. A naive "toggle a bool on any ``` or ~~~ line" inverts the state when
2662/// a `~~~` block legally contains a ```` ``` ```` line (the standard way to
2663/// document a backtick fence) — the inner backtick line would flip `in_fence`
2664/// off and the demo `[[…]]` inside the code block would be checked as a live
2665/// link, falsely flagging a legal store.
2666fn extract_wiki_links(body: &str) -> Vec<Link> {
2667    let mut out = Vec::new();
2668    let mut fence: Option<(u8, usize)> = None;
2669    for (idx, line) in body.lines().enumerate() {
2670        let content = line.trim_end_matches('\r');
2671        if let Some(f) = fence {
2672            // Inside a fence: the only thing that matters is whether THIS line
2673            // closes it (matching char, run ≥ the opening run). Everything else
2674            // is opaque code — no link extraction.
2675            if fence_closes(content, f) {
2676                fence = None;
2677            }
2678            continue;
2679        }
2680        if let Some(opened) = fence_opens(content) {
2681            fence = Some(opened);
2682            continue;
2683        }
2684        let line_no = (idx + 1) as u32;
2685        let bytes = line.as_bytes();
2686        let mut i = 0;
2687        while i + 1 < bytes.len() {
2688            if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2689                if let Some(close) = line[i + 2..].find("]]") {
2690                    let inner = &line[i + 2..i + 2 + close];
2691                    let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
2692                    // Skip a triple-bracket `[[[…` opening: the inner content
2693                    // starts with `[`, so this is the rejected flow-form list
2694                    // mis-encoding (`[[[a]], [[b]]]`), not a real wiki-link. A
2695                    // legitimate target never starts with `[`. The frontmatter
2696                    // `WIKI_LINK_FLOW_FORM_LIST` check already owns that error;
2697                    // extracting a bogus body link here would double-report it as
2698                    // a spurious `WIKI_LINK_SHORT_FORM`.
2699                    if !target.is_empty() && !target.starts_with('[') {
2700                        out.push(Link {
2701                            target,
2702                            line: line_no,
2703                        });
2704                    }
2705                    i = i + 2 + close + 2;
2706                    continue;
2707                }
2708            }
2709            i += 1;
2710        }
2711    }
2712    out
2713}
2714
2715/// If `line` opens a fenced code block, return `(fence byte, run length)`. A
2716/// local mirror of the parser's `opening_fence` so the validator's fence
2717/// tracking matches the rest of the toolkit: a fence is ``` ``` ``` or `~~~`
2718/// (run ≥ 3) at ≤ 3 spaces of indent, and a backtick fence's info string may
2719/// not itself contain a backtick.
2720fn fence_opens(line: &str) -> Option<(u8, usize)> {
2721    let indent = line.len() - line.trim_start_matches(' ').len();
2722    if indent > 3 {
2723        return None;
2724    }
2725    let rest = &line[indent..];
2726    let byte = rest.bytes().next()?;
2727    if byte != b'`' && byte != b'~' {
2728        return None;
2729    }
2730    let run = rest.len() - rest.trim_start_matches(byte as char).len();
2731    if run < 3 {
2732        return None;
2733    }
2734    // A backtick fence's info string may not itself contain a backtick.
2735    if byte == b'`' && rest[run..].contains('`') {
2736        return None;
2737    }
2738    Some((byte, run))
2739}
2740
2741/// True if `line` closes the currently open `fence`: same char, run at least as
2742/// long, nothing but trailing whitespace after. Local mirror of the parser's
2743/// `is_closing_fence` — so an inner fence of the *other* character (a ``` ``` ```
2744/// line inside a `~~~` block) does NOT close the outer fence.
2745fn fence_closes(line: &str, fence: (u8, usize)) -> bool {
2746    let (byte, open_len) = fence;
2747    let indent = line.len() - line.trim_start_matches(' ').len();
2748    if indent > 3 {
2749        return false;
2750    }
2751    let rest = &line[indent..];
2752    let run = rest.len() - rest.trim_start_matches(byte as char).len();
2753    if run < open_len {
2754        return false;
2755    }
2756    rest[run..].trim().is_empty()
2757}
2758
2759/// Detect the frontmatter INLINE flow-form wiki-link-list mis-encoding —
2760/// `attendees: [[[a]], [[b]]]` — and return the offending keys.
2761///
2762/// **Scoped to the inline value on the key line.** The SPEC's canonical
2763/// list-of-links form is the *unquoted YAML block sequence* (`- [[a]]` per
2764/// indented line), which is explicitly correct (SPEC § Linking) and MUST NOT be
2765/// flagged — even though, parsed whole, it nests the same way the rejected
2766/// inline flow form does. So this check looks only at the value written *inline*
2767/// after the colon: if it opens a flow sequence (`[…]`) whose parsed shape is a
2768/// nested sequence (a list whose items are themselves lists — the wiki-link-list
2769/// mis-encoding), it is flagged. A key with no inline value (the block form,
2770/// whose items live on continuation lines) is never inspected here.
2771///
2772/// Parsing the inline value (rather than a literal `starts_with("[[[")` text
2773/// test) is what catches the whitespace variant `attendees: [ [[a]] ]`, which
2774/// encodes the identical nested sequence but evaded the old prefix match.
2775fn detect_flow_form_link_lists(fm_yaml: &str) -> Vec<String> {
2776    let mut out = Vec::new();
2777    for line in fm_yaml.lines() {
2778        // Top-level key lines only (no indentation, not a comment or list dash).
2779        if line.starts_with(' ') || line.starts_with('\t') {
2780            continue;
2781        }
2782        let Some((key, rest)) = line.split_once(':') else {
2783            continue;
2784        };
2785        let key = key.trim();
2786        if key.is_empty()
2787            || key.starts_with('#')
2788            || key.starts_with('-')
2789            || !key
2790                .chars()
2791                .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
2792        {
2793            continue;
2794        }
2795        let rest = rest.trim();
2796        // Only an inline flow sequence (`[…]`) on the key line is a candidate;
2797        // the unquoted block form has an empty inline value and is never flagged.
2798        if !rest.starts_with('[') {
2799            continue;
2800        }
2801        // Parse just the inline value and test its shape: a list whose items are
2802        // themselves lists is the wiki-link-list mis-encoding (`[[[a]]]` parses
2803        // to `Seq[Seq[Seq[String]]]`; the scalar inline link `[[a]]` is only
2804        // `Seq[Seq[String]]` and is NOT flagged).
2805        if let Ok(Value::Sequence(items)) = serde_norway::from_str::<Value>(rest) {
2806            let nested = items.iter().any(|item| match item {
2807                Value::Sequence(inner) => inner.iter().any(|x| matches!(x, Value::Sequence(_))),
2808                _ => false,
2809            });
2810            if nested {
2811                out.push(key.to_string());
2812            }
2813        }
2814    }
2815    out
2816}
2817
2818/// True if a bare target (no `.md`) is a full store-relative path: it contains a
2819/// `/` and its first segment is a known layer.
2820fn is_full_store_path(bare: &str) -> bool {
2821    let mut parts = bare.splitn(2, '/');
2822    let first = parts.next().unwrap_or("");
2823    let has_rest = parts.next().map(|r| !r.is_empty()).unwrap_or(false);
2824    matches!(first, "sources" | "records" | "wiki") && has_rest
2825}
2826
2827/// True if a path contains only normal relative components. Validator inputs
2828/// come from user-authored markdown/JSON sidecars; never let absolute paths,
2829/// platform prefixes, or `..` turn a validation probe into a filesystem escape.
2830fn is_safe_store_relative_path(path: &Path) -> bool {
2831    let mut saw_component = false;
2832    for component in path.components() {
2833        match component {
2834            Component::Normal(_) => saw_component = true,
2835            Component::CurDir => {}
2836            Component::ParentDir | Component::RootDir | Component::Prefix(_) => return false,
2837        }
2838    }
2839    saw_component
2840}
2841
2842fn safe_md_target_rel(bare: &str) -> Option<PathBuf> {
2843    let path = Path::new(bare);
2844    if !is_safe_store_relative_path(path) {
2845        return None;
2846    }
2847    Some(PathBuf::from(format!("{bare}.md")))
2848}
2849
2850/// How a wiki-link / index-entry target resolves on disk.
2851enum TargetResolution {
2852    /// The target exists (either as the literal path or with a `.md` suffix).
2853    Exists,
2854    /// The target is a safe store-relative path but no file exists for it.
2855    Missing,
2856    /// The target escapes the store (absolute, `..`, prefix) — never probe it.
2857    Unsafe,
2858}
2859
2860/// Resolve a bare wiki-link / index-entry target the way the graph engine does
2861/// ([`crate::graph`]'s `resolve_existing`): try the path **as written** first
2862/// (so a link to a raw non-`.md` source file kept verbatim under `sources/` —
2863/// `[[sources/emails/x.eml]]`, `[[sources/contracts/y.pdf]]` — resolves to the
2864/// real file), then the `.md`-appended path (the common case for content
2865/// pages). Without trying the literal path first, a legal link to a raw source
2866/// file is wrongly flagged `WIKI_LINK_BROKEN` even though `graph backlinks`
2867/// resolves it.
2868fn resolve_wiki_target(store: &Store, bare: &str) -> TargetResolution {
2869    // The literal path and the `.md`-appended path share the same safety check
2870    // (`safe_md_target_rel` only differs by appending `.md`), so an unsafe bare
2871    // target is unsafe in both forms.
2872    if !is_safe_store_relative_path(Path::new(bare)) {
2873        return TargetResolution::Unsafe;
2874    }
2875    match resolved_target_abs(store, bare) {
2876        Some(_) => TargetResolution::Exists,
2877        None => TargetResolution::Missing,
2878    }
2879}
2880
2881/// The absolute on-disk path a bare wiki-link / index-entry target resolves to,
2882/// trying the literal path first, then `.md`-appended — mirroring the graph
2883/// engine. `None` when neither exists, or when the bare target escapes the store
2884/// (callers that need to distinguish unsafe from merely-missing use
2885/// [`resolve_wiki_target`]).
2886fn resolved_target_abs(store: &Store, bare: &str) -> Option<PathBuf> {
2887    if !is_safe_store_relative_path(Path::new(bare)) {
2888        return None;
2889    }
2890    // The literal path, as written (e.g. an `.eml`/`.pdf` source file kept
2891    // verbatim under `sources/`).
2892    let literal = store.root.join(bare);
2893    if literal.is_file() {
2894        return Some(literal);
2895    }
2896    // The `.md`-appended path (a content page referenced without its extension).
2897    let with_md = store.root.join(format!("{bare}.md"));
2898    if with_md.is_file() {
2899        return Some(with_md);
2900    }
2901    None
2902}
2903
2904/// True if a bare target path is under `prefix` (both `.md`-stripped).
2905fn path_under_prefix(bare: &str, prefix: &str) -> bool {
2906    let prefix = prefix.trim_end_matches('/');
2907    bare == prefix || bare.starts_with(&format!("{prefix}/"))
2908}
2909
2910/// The type-folder for a store-relative content path: `<layer>/<type-folder>`
2911/// (the folder directly under the layer; date-shards roll up to it). `None` for
2912/// files directly in a layer folder or outside the three layers.
2913fn type_folder_of(rel: &Path) -> Option<PathBuf> {
2914    let comps: Vec<&str> = rel.iter().filter_map(|s| s.to_str()).collect();
2915    if comps.len() < 3 {
2916        return None; // need layer/type-folder/file at minimum
2917    }
2918    if !matches!(comps[0], "sources" | "records" | "wiki") {
2919        return None;
2920    }
2921    Some(PathBuf::from(comps[0]).join(comps[1]))
2922}
2923
2924/// **SWEEP.** Walk every `.md` content file under `sources/`/`records/`/`wiki/`,
2925/// returning store-relative paths to be parsed in full. Skips hidden dirs and
2926/// the index twin (`index.jsonl`). Used only by `validate_all`; the working-set
2927/// incoming-linker scan rides the embedded-ripgrep `Store::find_links_to_any`
2928/// (a single presence-only pass), so the loop default never walks-and-*parses*
2929/// the whole content tree.
2930///
2931/// **`log/` is NOT pruned here.** Only the *root-level* `log/` rotation archive
2932/// is reserved (`Store::is_in_log_dir` checks only the first path component);
2933/// the walk roots are the three layers, so the root archive is already out of
2934/// scope. A `log`-named folder *inside* a layer (e.g. `records/log/` — a
2935/// decision log) is real content (see `is_content_file`), so pruning every
2936/// `name == "log"` made `--all` silently skip those files — reporting fewer
2937/// errors than the default working-set scope on the same store.
2938fn walk_content_files(root: &Path) -> Vec<PathBuf> {
2939    let mut out = Vec::new();
2940    for layer in ["sources", "records", "wiki"] {
2941        let base = root.join(layer);
2942        if !base.is_dir() {
2943            continue;
2944        }
2945        for entry in walkdir::WalkDir::new(&base)
2946            .into_iter()
2947            .filter_entry(|e| {
2948                let name = e.file_name().to_str().unwrap_or("");
2949                !name.starts_with('.')
2950            })
2951            .flatten()
2952        {
2953            if !entry.file_type().is_file() {
2954                continue;
2955            }
2956            let name = entry.file_name().to_str().unwrap_or("");
2957            if name.ends_with(".md") && name != "index.md" {
2958                if let Ok(rel) = entry.path().strip_prefix(root) {
2959                    out.push(rel.to_path_buf());
2960                }
2961            }
2962        }
2963    }
2964    out.sort();
2965    out
2966}
2967
2968/// Every `index.md` under the store (root + layers + type-folders), as
2969/// store-relative paths. Used to detect orphan indexes. Like
2970/// [`walk_content_files`], a `log`-named folder *inside* a layer is real content
2971/// and its `index.md` is not pruned (only the root-level `log/` archive is
2972/// reserved, and the walk roots are the three layers, so it is already
2973/// out of scope).
2974fn walk_index_files(root: &Path) -> Vec<PathBuf> {
2975    let mut out = Vec::new();
2976    if root.join("index.md").is_file() {
2977        out.push(PathBuf::from("index.md"));
2978    }
2979    for layer in ["sources", "records", "wiki"] {
2980        let base = root.join(layer);
2981        if !base.is_dir() {
2982            continue;
2983        }
2984        for entry in walkdir::WalkDir::new(&base)
2985            .into_iter()
2986            .filter_entry(|e| {
2987                let name = e.file_name().to_str().unwrap_or("");
2988                !name.starts_with('.')
2989            })
2990            .flatten()
2991        {
2992            if entry.file_type().is_file() && entry.file_name().to_str() == Some("index.md") {
2993                if let Ok(rel) = entry.path().strip_prefix(root) {
2994                    out.push(rel.to_path_buf());
2995                }
2996            }
2997        }
2998    }
2999    out.sort();
3000    out
3001}
3002
3003/// A parsed `index.md` entry line: the wiki-link target, the optional summary
3004/// text after the `—`, and the 1-based line number.
3005struct IndexEntry {
3006    target: String,
3007    summary_text: Option<String>,
3008    line: u32,
3009}
3010
3011/// Parse the `- [[<path>]] — <summary>` entry lines of an `index.md`. Stops at a
3012/// `## More` footer (those lines aren't file entries). Root/layer entries with a
3013/// `|display` segment and a `(N)` count are parsed too — the target is the bare
3014/// path, the summary text is whatever follows the em dash.
3015fn parse_index_entries(text: &str) -> Vec<IndexEntry> {
3016    let mut out = Vec::new();
3017    let mut in_more = false;
3018    for (idx, line) in text.lines().enumerate() {
3019        let trimmed = line.trim_start();
3020        if trimmed.starts_with("## More") {
3021            in_more = true;
3022            continue;
3023        }
3024        if in_more {
3025            continue;
3026        }
3027        if !trimmed.starts_with("- ") {
3028            continue;
3029        }
3030        // Find the first `[[...]]`.
3031        let Some(open) = trimmed.find("[[") else {
3032            continue;
3033        };
3034        let Some(close_rel) = trimmed[open + 2..].find("]]") else {
3035            continue;
3036        };
3037        let inner = &trimmed[open + 2..open + 2 + close_rel];
3038        let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
3039
3040        // Summary text: whatever follows the first em dash (`—`) or ` - `.
3041        let after = &trimmed[open + 2 + close_rel + 2..];
3042        let summary_text = extract_index_entry_summary(after);
3043
3044        out.push(IndexEntry {
3045            target,
3046            summary_text,
3047            line: (idx + 1) as u32,
3048        });
3049    }
3050    out
3051}
3052
3053/// Pull the summary portion out of the text trailing an index entry's
3054/// wiki-link: drop a leading `(N files)` count, then the `—`/`-` separator, then
3055/// strip a trailing `  ·  #tag` suffix **only when it is a genuine tag block**
3056/// (so a literal `·` inside the summary text is preserved, not mistaken for the
3057/// renderer's tag separator).
3058fn extract_index_entry_summary(after: &str) -> Option<String> {
3059    let mut s = after.trim();
3060    // Drop a leading "(N ...)" count segment, if present.
3061    if s.starts_with('(') {
3062        if let Some(close) = s.find(')') {
3063            s = s[close + 1..].trim_start();
3064        }
3065    }
3066    // Require an em dash or hyphen separator before the summary.
3067    let s = if let Some(rest) = s.strip_prefix('—') {
3068        rest.trim()
3069    } else if let Some(rest) = s.strip_prefix('-') {
3070        rest.trim()
3071    } else {
3072        return None;
3073    };
3074    if s.is_empty() {
3075        return None;
3076    }
3077    // Strip a trailing tag block — but ONLY when it matches the EXACT delimiter
3078    // the renderer emits: `  ·  #tag #tag` (a *double*-spaced middot, per
3079    // `crate::index::format_md_entry`'s `format!("  ·  {tags}")`), dropped when
3080    // the file has no tags. The previous code also accepted a *single*-spaced
3081    // ` · ` separator, which collided with a legal summary whose own text ends
3082    // in a single-spaced middot-plus-hashtag tail — e.g. a tagless file with
3083    // `summary: "Standup notes · #standup"`. The renderer round-trips that
3084    // summary verbatim (no tag block, since there are no tags), but the loose
3085    // strip mistook the ` · #standup` for the renderer's tag suffix, compared
3086    // `"Standup notes"` against the file's full summary, and emitted a spurious
3087    // `INDEX_SUMMARY_MISMATCH` that `dbmd index rebuild` could never fix
3088    // (rebuild regenerates the identical line). Matching the renderer's exact
3089    // double-spaced delimiter makes the comparison round-trip. `rsplit_once`
3090    // matches from the right so only the real trailing tag block is considered.
3091    let s = match s.rsplit_once("  ·  ") {
3092        Some((summary, tags)) if is_tag_suffix(tags) => summary.trim(),
3093        _ => s,
3094    };
3095    Some(s.to_string())
3096}
3097
3098/// True if `s` is a non-empty tag block: one or more whitespace-separated tokens
3099/// each starting with `#`, the exact shape the index renderer appends after the
3100/// `·` separator (`crate::index::format_md_entry`). Used to distinguish the
3101/// renderer's `  ·  #tag` suffix from a literal `·` inside the summary text.
3102fn is_tag_suffix(s: &str) -> bool {
3103    let mut any = false;
3104    for tok in s.split_whitespace() {
3105        if !tok.starts_with('#') || tok.len() < 2 {
3106            return false;
3107        }
3108        any = true;
3109    }
3110    any
3111}
3112
3113/// Parse a `log.md` entry header `## [YYYY-MM-DD HH:MM] <kind> | <object>`.
3114/// Returns `(timestamp, kind, object)`; `None` if the timestamp is unparseable
3115/// or the header isn't well-formed.
3116fn parse_log_header(line: &str) -> Option<(DateTime<FixedOffset>, String, Option<String>)> {
3117    let rest = line.strip_prefix("## [")?;
3118    let close = rest.find(']')?;
3119    let ts_str = &rest[..close];
3120    let tail = rest[close + 1..].trim();
3121
3122    // Parse `YYYY-MM-DD HH:MM` (the SPEC header form) as a naive local time and
3123    // attach a zero offset — the log header carries minute precision, no zone.
3124    let naive = NaiveDateTime::parse_from_str(ts_str.trim(), "%Y-%m-%d %H:%M").ok()?;
3125    let offset = FixedOffset::east_opt(0)?;
3126    let ts = naive.and_local_timezone(offset).single()?;
3127
3128    // kind | object
3129    let (kind, object) = match tail.split_once('|') {
3130        Some((k, o)) => {
3131            let o = o.trim();
3132            (
3133                k.trim().to_string(),
3134                if o.is_empty() {
3135                    None
3136                } else {
3137                    Some(o.to_string())
3138                },
3139            )
3140        }
3141        None => (tail.to_string(), None),
3142    };
3143    if kind.is_empty() {
3144        return None;
3145    }
3146    Some((ts, kind, object))
3147}
3148
3149/// Every log file that holds entries for the working-set scan: the active
3150/// `log.md` plus every `log/<YYYY-MM>.md` archive. [`Log::append`] rotates
3151/// strictly-prior-month entries into the archives, so the active file alone is
3152/// NOT the full timeline — both the last `validate` cutoff and a changed-but-
3153/// unvalidated object can live in an archive after a month rollover. Reading the
3154/// archives here keeps the working-set readers in sync with the rest of the log
3155/// layer (`Log::since`/`Log::tail`), which deliberately cross archives, and
3156/// prevents `dbmd validate` from silently skipping archived changed files. Reads
3157/// only log headers, never the content store, so the loop budget is preserved.
3158fn log_files_for_working_set(store: &Store) -> Vec<PathBuf> {
3159    let mut files = vec![store.root.join("log.md")];
3160    let archive_dir = store.root.join("log");
3161    if let Ok(entries) = std::fs::read_dir(&archive_dir) {
3162        let mut archives: Vec<PathBuf> = entries
3163            .flatten()
3164            .map(|e| e.path())
3165            .filter(|p| {
3166                p.is_file()
3167                    && p.file_name()
3168                        .and_then(|s| s.to_str())
3169                        .and_then(|n| n.strip_suffix(".md"))
3170                        .is_some_and(is_year_month_archive)
3171            })
3172            .collect();
3173        // Deterministic order (oldest month first); the callers fold across all
3174        // files so order doesn't affect the result, but a stable order keeps the
3175        // scan reproducible.
3176        archives.sort();
3177        files.extend(archives);
3178    }
3179    files
3180}
3181
3182/// True if `s` looks like a `YYYY-MM` archive stem (4 digits, `-`, 2 digits) —
3183/// the `log/<YYYY-MM>.md` naming the rotation in [`crate::log`] emits.
3184fn is_year_month_archive(s: &str) -> bool {
3185    let b = s.as_bytes();
3186    b.len() == 7
3187        && b[..4].iter().all(u8::is_ascii_digit)
3188        && b[4] == b'-'
3189        && b[5..7].iter().all(u8::is_ascii_digit)
3190}
3191
3192/// The timestamp of the most recent `validate` entry across the active `log.md`
3193/// **and** the `log/<YYYY-MM>.md` archives — the default working-set cutoff.
3194/// Reads only headers; never the whole store. Archive-aware so a `validate`
3195/// entry that rotated into an archive after a month rollover still anchors the
3196/// cutoff (without this, the cutoff silently resets to `None`).
3197fn last_validate_at(store: &Store) -> Option<DateTime<FixedOffset>> {
3198    let mut latest: Option<DateTime<FixedOffset>> = None;
3199    for file in log_files_for_working_set(store) {
3200        let Ok(text) = std::fs::read_to_string(&file) else {
3201            continue;
3202        };
3203        for line in text.lines() {
3204            if !line.starts_with("## [") {
3205                continue;
3206            }
3207            if let Some((ts, kind, _)) = parse_log_header(line) {
3208                if kind == "validate" {
3209                    latest = Some(match latest {
3210                        Some(p) if p >= ts => p,
3211                        _ => ts,
3212                    });
3213                }
3214            }
3215        }
3216    }
3217    latest
3218}
3219
3220/// The set of content objects changed since `cutoff`, read from log entries
3221/// whose kind mutates a file. When `cutoff` is `None`, every mutating entry
3222/// counts (no prior validate window). Returns store-relative `.md` paths.
3223///
3224/// Scans the active `log.md` **and** every `log/<YYYY-MM>.md` archive: after a
3225/// month rollover [`Log::append`] rotates prior-month entries out of the active
3226/// file, so an object changed-but-never-validated in a prior month lives only in
3227/// an archive. Reading the archives here is what keeps `dbmd validate` from
3228/// silently skipping those files. Reads only log headers, never the content
3229/// store.
3230fn changed_objects_since(
3231    store: &Store,
3232    cutoff: Option<DateTime<FixedOffset>>,
3233) -> BTreeSet<PathBuf> {
3234    let mut out = BTreeSet::new();
3235    for file in log_files_for_working_set(store) {
3236        let Ok(text) = std::fs::read_to_string(&file) else {
3237            continue;
3238        };
3239        for line in text.lines() {
3240            if !line.starts_with("## [") {
3241                continue;
3242            }
3243            let Some((ts, kind, object)) = parse_log_header(line) else {
3244                continue;
3245            };
3246            if let Some(c) = cutoff {
3247                if ts < c {
3248                    continue;
3249                }
3250            }
3251            if !matches!(
3252                kind.as_str(),
3253                "create" | "update" | "ingest" | "rename" | "delete" | "link"
3254            ) {
3255                continue;
3256            }
3257            if let Some(obj) = object {
3258                // The object slot is a store-relative path (or a wiki-link target).
3259                let bare = obj
3260                    .trim()
3261                    .trim_start_matches("[[")
3262                    .trim_end_matches("]]")
3263                    .split('|')
3264                    .next()
3265                    .unwrap_or("")
3266                    .trim()
3267                    .trim_end_matches(".md")
3268                    .to_string();
3269                if bare.is_empty() {
3270                    continue;
3271                }
3272                out.insert(PathBuf::from(format!("{bare}.md")));
3273            }
3274        }
3275    }
3276    out
3277}
3278
3279/// The result of the [`derived_from_ignored_type`] policy check: the
3280/// `derived_from` target that resolves to an ignored-type record, plus that
3281/// record's type. Carries exactly what both the validate finding and the
3282/// write-time warning need to render their message.
3283#[derive(Debug, Clone, PartialEq, Eq)]
3284pub struct DerivedFromIgnored {
3285    /// The `derived_from` wiki-link target as written (bare store-relative path,
3286    /// no `.md`).
3287    pub target: String,
3288    /// The resolved `type` of that target, which is present in
3289    /// `store.config.ignored_types`.
3290    pub target_type: String,
3291}
3292
3293/// **The single authoritative `### Ignored types` derivation check.** Decides
3294/// whether a `wiki-page` derives from an ignored-type record: the type must be
3295/// `wiki-page`, `### Ignored types` must be non-empty, and some `derived_from`
3296/// target must resolve to a record whose `type` is in `ignored_types`. Returns
3297/// the first such target (and its type), or `None`.
3298///
3299/// Both surfaces call this so the policy lives in exactly one place:
3300/// [`check_content_file`] (read side — `dbmd validate`) feeds it the
3301/// `derived_from` targets it scanned from the raw frontmatter, and the write
3302/// surface (`dbmd write`) feeds it the targets from the composed frontmatter.
3303/// The link *extraction* differs per surface (text-scan with line numbers vs.
3304/// the parsed `Frontmatter`); the *decision* — type gate, target-type
3305/// resolution, and `ignored_types` membership — does not.
3306pub fn derived_from_ignored_type<I, S>(
3307    store: &Store,
3308    type_: &str,
3309    derived_from_targets: I,
3310) -> Option<DerivedFromIgnored>
3311where
3312    I: IntoIterator<Item = S>,
3313    S: AsRef<str>,
3314{
3315    if type_ != "wiki-page" || store.config.ignored_types.is_empty() {
3316        return None;
3317    }
3318    for target in derived_from_targets {
3319        let target = target.as_ref();
3320        if let Some(target_type) = link_target_type(store, target) {
3321            if store.config.ignored_types.contains(&target_type) {
3322                return Some(DerivedFromIgnored {
3323                    target: target.to_string(),
3324                    target_type,
3325                });
3326            }
3327        }
3328    }
3329    None
3330}
3331
3332/// Resolve the `type` of a wiki-link target file (bare, no `.md`), or `None`.
3333fn link_target_type(store: &Store, target: &str) -> Option<String> {
3334    let bare = target.trim_end_matches(".md");
3335    let abs = store.root.join(safe_md_target_rel(bare)?);
3336    let text = std::fs::read_to_string(&abs).ok()?;
3337    let (yaml, _, _) = split_frontmatter(&text)?;
3338    let value: Value = serde_norway::from_str(&yaml).ok()?;
3339    if let Value::Mapping(m) = value {
3340        m.get(Value::String("type".into())).and_then(scalar_string)
3341    } else {
3342        None
3343    }
3344}
3345
3346// ── Shape validators ─────────────────────────────────────────────────────────
3347
3348/// True if a string is RFC3339 / ISO-8601 with a time + zone (the
3349/// `created`/`updated` contract: `2026-05-27T08:00:00-07:00`).
3350fn is_iso8601(s: &str) -> bool {
3351    DateTime::parse_from_rfc3339(s.trim()).is_ok()
3352}
3353
3354/// True if a string is an ISO-8601 *date* (`2026-05-27`) or a full RFC3339
3355/// datetime. Type-specific date fields (`expense.date`, `contact.last_touch`)
3356/// accept the date-only form per the SPEC's worked example.
3357fn is_iso8601_date_or_datetime(s: &str) -> bool {
3358    let s = s.trim();
3359    if DateTime::parse_from_rfc3339(s).is_ok() {
3360        return true;
3361    }
3362    chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_ok()
3363}
3364
3365/// True for `<local>@<domain>` with a non-empty local part and a dotted domain.
3366/// There must be exactly one `@`: a domain that still contains an `@` after the
3367/// split (the common double-`@` typo `sarah@@acme.com`, or `a@b@c.com`) is
3368/// rejected — without this the domain `@acme.com` passed every other check.
3369fn is_email(s: &str) -> bool {
3370    let s = s.trim();
3371    let Some((local, domain)) = s.split_once('@') else {
3372        return false;
3373    };
3374    !local.is_empty()
3375        && !domain.contains('@')
3376        && domain.contains('.')
3377        && !domain.starts_with('.')
3378        && !domain.ends_with('.')
3379        && !domain.contains(' ')
3380        && !local.contains(' ')
3381}
3382
3383/// True for a currency amount: an optional symbol or 3-letter ISO code, then a
3384/// plain decimal number with optional thousands separators and ≤ 2 decimals.
3385///
3386/// The numeric part is validated by hand (not `f64::parse`) so the non-numeric
3387/// floats `f64` accepts — `inf`, `-inf`, `NaN`, and `1e3`-style exponents — are
3388/// rejected, and the ≤ 2-decimal rule is actually enforced.
3389fn is_currency(s: &str) -> bool {
3390    let mut t = s.trim();
3391    // Strip a leading currency symbol …
3392    for sym in ["$", "€", "£", "¥"] {
3393        if let Some(rest) = t.strip_prefix(sym) {
3394            t = rest.trim_start();
3395            break;
3396        }
3397    }
3398    // … or a leading 3-letter ISO-4217-ish code (`USD 100`, `EUR 9.50`). The
3399    // code must be exactly three ASCII letters and separated from the number by
3400    // whitespace, so a bare `USD` with no amount still fails.
3401    if let Some((head, rest)) = t.split_once(char::is_whitespace) {
3402        if head.len() == 3 && head.chars().all(|c| c.is_ascii_alphabetic()) {
3403            t = rest.trim_start();
3404        }
3405    }
3406
3407    let cleaned: String = t.chars().filter(|c| *c != ',').collect();
3408    is_plain_amount(cleaned.trim())
3409}
3410
3411/// True for a bare decimal amount: optional sign, ≥ 1 digit, an optional
3412/// fractional part of 1–2 digits. No exponents, no `inf`/`NaN`, no empty string.
3413fn is_plain_amount(s: &str) -> bool {
3414    let digits = s.strip_prefix(['+', '-']).unwrap_or(s);
3415    let (int_part, frac_part) = match digits.split_once('.') {
3416        Some((i, f)) => (i, Some(f)),
3417        None => (digits, None),
3418    };
3419    if int_part.is_empty() || !int_part.bytes().all(|b| b.is_ascii_digit()) {
3420        return false;
3421    }
3422    match frac_part {
3423        None => true,
3424        Some(f) => (1..=2).contains(&f.len()) && f.bytes().all(|b| b.is_ascii_digit()),
3425    }
3426}
3427
3428/// True for an http(s) URL: a recognized scheme prefix with at least one
3429/// character after it. The length guard uses the *matched* scheme's own length,
3430/// so a single-character host on the shorter `http://` scheme (`http://x`, 8
3431/// bytes — e.g. an intranet/container hostname) is accepted; a bare scheme with
3432/// nothing after it (`http://`, `https://`) is rejected.
3433fn is_url(s: &str) -> bool {
3434    let s = s.trim();
3435    for scheme in ["http://", "https://"] {
3436        if let Some(rest) = s.strip_prefix(scheme) {
3437            return !rest.is_empty();
3438        }
3439    }
3440    false
3441}
3442
3443/// A short, deterministic suggestion for a `SCHEMA_SHAPE_MISMATCH`.
3444fn shape_suggestion(shape: Shape) -> String {
3445    match shape {
3446        Shape::String => "use a scalar string".into(),
3447        Shape::Int => "use an integer".into(),
3448        Shape::Bool => "use `true` or `false`".into(),
3449        Shape::Date => "use an ISO-8601 date, e.g. 2026-05-27".into(),
3450        Shape::Email => "use a `<local>@<domain>` address".into(),
3451        Shape::Currency => "use a numeric amount, e.g. 1234.56".into(),
3452        Shape::Url => "use an http(s) URL".into(),
3453    }
3454}
3455
3456/// Suggest a full-path rewrite for a short-form wiki-link. Without the layer we
3457/// can't know the folder, so the suggestion is generic but actionable.
3458fn short_form_suggestion(bare: &str) -> Option<String> {
3459    Some(format!(
3460        "use a full store-relative path, e.g. [[records/contacts/{}]]",
3461        slugish(bare)
3462    ))
3463}
3464
3465/// A filesystem-ish leaf for a plain string (lowercase, spaces → hyphens).
3466fn slugish(s: &str) -> String {
3467    s.trim()
3468        .to_lowercase()
3469        .chars()
3470        .map(|c| if c.is_whitespace() { '-' } else { c })
3471        .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '/' || *c == '_')
3472        .collect()
3473}
3474
3475/// Cross-file asset-manifest integrity (the `--all` sweep). Text-only: it never
3476/// hashes a byte or reads an asset file's contents — byte presence and hash
3477/// correctness are `dbmd assets verify`, not `validate`, so a fresh clone with
3478/// no restored bytes still passes. Cross-checks `assets.jsonl` against every
3479/// content file's `asset`/`assets` declarations.
3480fn check_assets(store: &Store, parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
3481    use crate::assets;
3482
3483    let manifest_rel = Path::new(assets::MANIFEST_FILE);
3484    let manifest_abs = store.root.join(assets::MANIFEST_FILE);
3485
3486    // Lenient manifest read: a malformed line is reported, not fatal.
3487    let mut manifest: BTreeMap<String, assets::AssetRecord> = BTreeMap::new();
3488    if let Ok(text) = std::fs::read_to_string(&manifest_abs) {
3489        for (i, line) in text.lines().enumerate() {
3490            if line.trim().is_empty() {
3491                continue;
3492            }
3493            match serde_json::from_str::<assets::AssetRecord>(line) {
3494                Ok(rec) => {
3495                    manifest.insert(rec.path.clone(), rec);
3496                }
3497                Err(e) => push(
3498                    issues,
3499                    Severity::Error,
3500                    codes::ASSET_MANIFEST_MALFORMED,
3501                    manifest_rel,
3502                    Some((i as u32) + 1),
3503                    None,
3504                    format!("invalid {} record: {e}", assets::MANIFEST_FILE),
3505                    Some("run `dbmd assets scan` to rebuild the manifest".to_string()),
3506                    vec![],
3507                ),
3508            }
3509        }
3510    }
3511
3512    // Per-wrapper declarations: every declared asset must be in the manifest and
3513    // must not point at a markdown content file.
3514    let mut declared: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
3515    for (rel, p) in parsed {
3516        let Some(map) = &p.fm else {
3517            continue;
3518        };
3519        for decl in assets::declarations_from_yaml_map(map) {
3520            let norm = match assets::normalize_asset_path(&decl.path) {
3521                Ok(n) => n,
3522                Err(_) => continue, // a bad declared path is surfaced by `scan`, not here
3523            };
3524            declared.insert(norm.clone());
3525            let is_md = Path::new(&norm)
3526                .extension()
3527                .and_then(|e| e.to_str())
3528                .map(|e| e.eq_ignore_ascii_case("md"))
3529                .unwrap_or(false);
3530            if is_md {
3531                push(
3532                    issues,
3533                    Severity::Warning,
3534                    codes::ASSET_PATH_IS_CONTENT,
3535                    rel,
3536                    None,
3537                    Some("asset".to_string()),
3538                    format!("asset path `{norm}` points at a markdown content file"),
3539                    Some("assets are raw binaries; reference a non-markdown path".to_string()),
3540                    vec![PathBuf::from(&norm)],
3541                );
3542            }
3543            if !manifest.contains_key(&norm) {
3544                push(
3545                    issues,
3546                    Severity::Error,
3547                    codes::ASSET_UNDECLARED,
3548                    rel,
3549                    None,
3550                    Some("asset".to_string()),
3551                    format!(
3552                        "references asset `{norm}` with no record in {}",
3553                        assets::MANIFEST_FILE
3554                    ),
3555                    Some("run `dbmd assets scan` to catalog it".to_string()),
3556                    vec![PathBuf::from(&norm)],
3557                );
3558            }
3559        }
3560    }
3561
3562    // Per-record: wrapper existence + orphan detection.
3563    for (path, rec) in &manifest {
3564        for w in &rec.wrappers {
3565            if !store.root.join(w).is_file() {
3566                push(
3567                    issues,
3568                    Severity::Error,
3569                    codes::ASSET_WRAPPER_BROKEN,
3570                    Path::new(path),
3571                    None,
3572                    None,
3573                    format!("manifest record for `{path}` names a missing wrapper `{w}`"),
3574                    Some("run `dbmd assets scan` to reconcile the manifest".to_string()),
3575                    vec![PathBuf::from(w)],
3576                );
3577            }
3578        }
3579        if !declared.contains(path) {
3580            push(
3581                issues,
3582                Severity::Warning,
3583                codes::ASSET_MANIFEST_ORPHAN,
3584                Path::new(path),
3585                None,
3586                None,
3587                format!(
3588                    "`{path}` is in {} but no wrapper references it",
3589                    assets::MANIFEST_FILE
3590                ),
3591                Some("run `dbmd assets scan` to drop the orphan, or add a wrapper".to_string()),
3592                vec![],
3593            );
3594        }
3595    }
3596}
3597
3598/// Push a fully-formed [`Issue`].
3599#[allow(clippy::too_many_arguments)]
3600fn push(
3601    issues: &mut Vec<Issue>,
3602    severity: Severity,
3603    code: &'static str,
3604    file: &Path,
3605    line: Option<u32>,
3606    key: Option<String>,
3607    message: String,
3608    suggestion: Option<String>,
3609    related: Vec<PathBuf>,
3610) {
3611    issues.push(Issue {
3612        severity,
3613        code,
3614        file: file.to_path_buf(),
3615        line,
3616        key,
3617        message,
3618        suggestion,
3619        related,
3620    });
3621}
3622
3623/// 1-based line of a top-level frontmatter key inside the YAML block, offset to
3624/// the file (the YAML starts at file line 2). `None` if not found.
3625fn fm_key_line(fm_yaml: &str, key: &str) -> Option<u32> {
3626    for (i, line) in fm_yaml.lines().enumerate() {
3627        let trimmed = line.trim_start();
3628        // A top-level key line: `key:` with no leading list dash.
3629        if let Some(rest) = trimmed.strip_prefix(key) {
3630            if rest.starts_with(':') && line.starts_with(key) {
3631                // +2: file line 1 is the opening `---`, YAML line 0 → file line 2.
3632                return Some((i as u32) + 2);
3633            }
3634        }
3635    }
3636    None
3637}
3638
3639/// The line a *field-absence* issue (a required key that is missing entirely)
3640/// anchors to: the key's line when present, else line `1` — the frontmatter
3641/// block's opening `---`. A missing key has no line of its own; anchoring it to
3642/// the block top gives the agent (and the `EXPECTED` golden) a stable, non-null
3643/// line to point at instead of an unhelpful `null`.
3644fn fm_key_line_or_top(fm_yaml: &str, key: &str) -> Option<u32> {
3645    fm_key_line(fm_yaml, key).or(Some(1))
3646}
3647
3648/// A stable sort order for issues: by file, then line, then code. Keeps `--json`
3649/// output deterministic across runs.
3650fn issue_order(a: &Issue, b: &Issue) -> std::cmp::Ordering {
3651    a.file
3652        .cmp(&b.file)
3653        .then(a.line.cmp(&b.line))
3654        .then(a.code.cmp(b.code))
3655        .then(a.key.cmp(&b.key))
3656}
3657
3658// ═════════════════════════════════════════════════════════════════════════════
3659//  Tests
3660// ═════════════════════════════════════════════════════════════════════════════
3661
3662#[cfg(test)]
3663mod tests {
3664    use super::*;
3665    use crate::parser::{Config, FieldSpec};
3666    use std::fs;
3667    use tempfile::TempDir;
3668
3669    #[test]
3670    fn split_frontmatter_tolerates_leading_bom() {
3671        // Regression (finding #19 cross-module): a UTF-8 BOM before the opening
3672        // fence must not make validate treat the file as frontmatter-less while
3673        // the catalog indexes it. Pre-fix `first.trim_end() != "---"` was true
3674        // for `\u{feff}---` and the function returned None.
3675        let text = "\u{feff}---\ntype: contact\nsummary: hi\n---\nbody\n";
3676        let parsed = split_frontmatter(text);
3677        assert!(
3678            parsed.is_some(),
3679            "a leading BOM must not hide frontmatter from validate"
3680        );
3681        let (yaml, body, close_line) = parsed.unwrap();
3682        assert_eq!(yaml, "type: contact\nsummary: hi\n");
3683        assert_eq!(body, "body");
3684        assert_eq!(close_line, 4, "BOM is inline on line 1, not a new line");
3685    }
3686
3687    /// A test store builder over a real tempdir. Every helper writes real files
3688    /// so the assertions exercise real behavior, not mocks.
3689    struct Fixture {
3690        dir: TempDir,
3691        config: Config,
3692    }
3693
3694    impl Fixture {
3695        /// A fresh store with a **valid** `DB.md` (the identity contract:
3696        /// `type: db-md` + `scope` + `owner`) and the three layer dirs. A valid
3697        /// DB.md keeps `check_db_md` silent so a "clean store" fixture is truly
3698        /// clean; tests that want a broken DB.md write their own via `write`.
3699        fn new() -> Self {
3700            let dir = TempDir::new().unwrap();
3701            fs::write(
3702                dir.path().join("DB.md"),
3703                "---\ntype: db-md\nscope: company\nowner: Test\n---\n",
3704            )
3705            .unwrap();
3706            for layer in ["sources", "records", "wiki"] {
3707                fs::create_dir_all(dir.path().join(layer)).unwrap();
3708            }
3709            Fixture {
3710                dir,
3711                config: Config::default(),
3712            }
3713        }
3714
3715        /// A store with no `DB.md` marker.
3716        fn bare() -> Self {
3717            let dir = TempDir::new().unwrap();
3718            Fixture {
3719                dir,
3720                config: Config::default(),
3721            }
3722        }
3723
3724        /// Write a file at a store-relative path, creating parent dirs.
3725        fn write(&self, rel: &str, contents: &str) {
3726            let abs = self.dir.path().join(rel);
3727            fs::create_dir_all(abs.parent().unwrap()).unwrap();
3728            fs::write(abs, contents).unwrap();
3729        }
3730
3731        fn store(&self) -> Store {
3732            Store {
3733                root: self.dir.path().to_path_buf(),
3734                config: self.config.clone(),
3735            }
3736        }
3737
3738        fn store_all(&self) -> Vec<Issue> {
3739            validate_all(&self.store()).unwrap()
3740        }
3741
3742        /// Write the canonical `index.md` + `index.jsonl` at every level via the
3743        /// real builder ([`crate::index::Index::rebuild_all`]) — the same
3744        /// projection a `dbmd index rebuild` produces. Use this (rather than a
3745        /// hand-typed sidecar line) whenever a test asserts a *clean* store, so
3746        /// the sidecar carries the COMPLETE per-field projection and the fixture
3747        /// can't silently drift from what the index writer emits.
3748        fn rebuild_indexes(&self) {
3749            crate::index::Index::rebuild_all(&self.store()).unwrap();
3750        }
3751    }
3752
3753    /// True if any issue has this code.
3754    fn has(issues: &[Issue], code: &str) -> bool {
3755        issues.iter().any(|i| i.code == code)
3756    }
3757
3758    /// Count issues with a code.
3759    fn count(issues: &[Issue], code: &str) -> usize {
3760        issues.iter().filter(|i| i.code == code).count()
3761    }
3762
3763    /// The first issue with a code, or panic.
3764    fn find<'a>(issues: &'a [Issue], code: &str) -> &'a Issue {
3765        issues
3766            .iter()
3767            .find(|i| i.code == code)
3768            .unwrap_or_else(|| panic!("expected an issue with code {code}; got {issues:#?}"))
3769    }
3770
3771    /// A minimal valid `contact` body for reuse.
3772    fn valid_contact(summary: &str) -> String {
3773        format!(
3774            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{summary}\"\nname: A\n---\n\n# A\n"
3775        )
3776    }
3777
3778    // ── store marker ──────────────────────────────────────────────────────────
3779
3780    #[test]
3781    fn not_a_store_when_db_md_absent() {
3782        let fx = Fixture::bare();
3783        let issues = fx.store_all();
3784        assert_eq!(issues.len(), 1, "only NOT_A_STORE expected: {issues:#?}");
3785        assert_eq!(issues[0].code, codes::NOT_A_STORE);
3786        assert!(issues[0].is_error());
3787    }
3788
3789    #[test]
3790    fn working_set_also_reports_not_a_store() {
3791        let fx = Fixture::bare();
3792        let issues = validate_working_set(&fx.store(), None).unwrap();
3793        assert!(has(&issues, codes::NOT_A_STORE));
3794    }
3795
3796    #[test]
3797    fn clean_store_has_no_issues() {
3798        let fx = Fixture::new();
3799        fx.write("records/contacts/a.md", &valid_contact("A contact"));
3800        // Build the canonical indexes (complete per-field jsonl included) the
3801        // same way `dbmd index rebuild` does, so a freshly-rebuilt store is
3802        // proven clean across every projected field, not just summary/type.
3803        fx.rebuild_indexes();
3804        let issues = fx.store_all();
3805        assert!(
3806            issues.is_empty(),
3807            "expected a clean store, got: {issues:#?}"
3808        );
3809    }
3810
3811    // ── DB.md structure ───────────────────────────────────────────────────────
3812
3813    /// The `Fixture::new` DB.md is valid → no `DB_MD_*` issue. This pins the
3814    /// "valid identity file is silent" half (a bug that flagged a valid DB.md
3815    /// would fail here).
3816    #[test]
3817    fn valid_db_md_emits_no_structure_issue() {
3818        let fx = Fixture::new();
3819        let issues = fx.store_all();
3820        assert!(
3821            !has(&issues, codes::DB_MD_BAD_TYPE)
3822                && !has(&issues, codes::DB_MD_MISSING_FIELD)
3823                && !has(&issues, codes::DB_MD_UNKNOWN_SECTION),
3824            "a valid DB.md (type: db-md + scope + owner, recognized sections) is silent: {issues:#?}"
3825        );
3826    }
3827
3828    /// A DB.md whose `type:` isn't `db-md` → `DB_MD_BAD_TYPE`, keyed on `type`,
3829    /// anchored to the `type:` line (file line 2). Failing to read the type, or
3830    /// accepting a non-`db-md` type, breaks this.
3831    #[test]
3832    fn db_md_wrong_type_is_error() {
3833        let fx = Fixture::new();
3834        fx.write("DB.md", "---\ntype: notes\nscope: company\nowner: T\n---\n");
3835        let issues = fx.store_all();
3836        let i = find(&issues, codes::DB_MD_BAD_TYPE);
3837        assert!(i.is_error());
3838        assert_eq!(i.file, PathBuf::from("DB.md"));
3839        assert_eq!(i.key.as_deref(), Some("type"));
3840        assert_eq!(i.line, Some(2), "anchors to the `type:` line");
3841    }
3842
3843    /// A DB.md missing `scope` and `owner` → one `DB_MD_MISSING_FIELD` per
3844    /// absent field, each keyed on its field name, anchored to the block top.
3845    #[test]
3846    fn db_md_missing_scope_and_owner_each_report() {
3847        let fx = Fixture::new();
3848        fx.write("DB.md", "---\ntype: db-md\n---\n");
3849        let issues = fx.store_all();
3850        assert_eq!(
3851            count(&issues, codes::DB_MD_MISSING_FIELD),
3852            2,
3853            "both scope and owner absent → two issues: {issues:#?}"
3854        );
3855        let keys: BTreeSet<Option<String>> = issues
3856            .iter()
3857            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
3858            .map(|i| i.key.clone())
3859            .collect();
3860        assert_eq!(
3861            keys,
3862            BTreeSet::from([Some("scope".to_string()), Some("owner".to_string())]),
3863            "one issue keyed on each missing field"
3864        );
3865        for i in issues
3866            .iter()
3867            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
3868        {
3869            assert!(i.is_error());
3870            assert_eq!(i.line, Some(1), "absent field anchors to the block top");
3871        }
3872    }
3873
3874    /// A present-but-blank required field is still missing (`DB_MD_MISSING_FIELD`),
3875    /// anchored to its own line — guarding against an "is the key textually
3876    /// present?" shortcut that would miss `owner:` with an empty value.
3877    #[test]
3878    fn db_md_blank_required_field_is_missing() {
3879        let fx = Fixture::new();
3880        fx.write(
3881            "DB.md",
3882            "---\ntype: db-md\nscope: company\nowner: \"\"\n---\n",
3883        );
3884        let issues = fx.store_all();
3885        let i = find(&issues, codes::DB_MD_MISSING_FIELD);
3886        assert_eq!(i.key.as_deref(), Some("owner"));
3887        assert_eq!(
3888            i.line,
3889            Some(4),
3890            "a present-but-empty field anchors to its line"
3891        );
3892        assert!(
3893            count(&issues, codes::DB_MD_MISSING_FIELD) == 1,
3894            "scope is present and non-empty → only owner reported"
3895        );
3896    }
3897
3898    /// An unrecognized `##` section → `DB_MD_UNKNOWN_SECTION` (warning), anchored
3899    /// to the heading's file line; the three recognized sections stay silent.
3900    #[test]
3901    fn db_md_unknown_section_is_warning() {
3902        let fx = Fixture::new();
3903        fx.write(
3904            "DB.md",
3905            // line 1 `---`, 2 type, 3 scope, 4 owner, 5 `---`, 6 blank,
3906            // 7 `## Agent instructions`, 8 blank, 9 prose, 10 blank,
3907            // 11 `## Glossary`.
3908            "---\ntype: db-md\nscope: company\nowner: T\n---\n\n## Agent instructions\n\nbe good\n\n## Glossary\n\nterms\n",
3909        );
3910        let issues = fx.store_all();
3911        let i = find(&issues, codes::DB_MD_UNKNOWN_SECTION);
3912        assert!(!i.is_error(), "unknown section is a warning, not an error");
3913        assert_eq!(i.severity, Severity::Warning);
3914        assert_eq!(
3915            i.line,
3916            Some(11),
3917            "anchors to the `## Glossary` heading line"
3918        );
3919        assert!(
3920            i.message.contains("Glossary"),
3921            "the message names the offending section: {}",
3922            i.message
3923        );
3924        // The recognized `## Agent instructions` section did NOT fire.
3925        assert_eq!(
3926            count(&issues, codes::DB_MD_UNKNOWN_SECTION),
3927            1,
3928            "only the unrecognized section is flagged: {issues:#?}"
3929        );
3930    }
3931
3932    /// A DB.md with no frontmatter at all → `DB_MD_BAD_TYPE` plus both
3933    /// `DB_MD_MISSING_FIELD`s (no provable type, no provable fields).
3934    #[test]
3935    fn db_md_no_frontmatter_reports_type_and_both_fields() {
3936        let fx = Fixture::new();
3937        fx.write("DB.md", "# just a heading, no frontmatter\n");
3938        let issues = fx.store_all();
3939        assert!(has(&issues, codes::DB_MD_BAD_TYPE));
3940        assert_eq!(count(&issues, codes::DB_MD_MISSING_FIELD), 2);
3941    }
3942
3943    // ── frontmatter ─────────────────────────────────────────────────────────
3944
3945    #[test]
3946    fn missing_type_is_error() {
3947        let fx = Fixture::new();
3948        fx.write(
3949            "records/contacts/a.md",
3950            "---\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\n# A\n",
3951        );
3952        let issues = fx.store_all();
3953        assert!(has(&issues, codes::FM_MISSING_TYPE));
3954        assert!(find(&issues, codes::FM_MISSING_TYPE).is_error());
3955    }
3956
3957    #[test]
3958    fn missing_universal_timestamps_are_errors_on_content_files() {
3959        let fx = Fixture::new();
3960        fx.write(
3961            "records/contacts/a.md",
3962            "---\ntype: contact\nsummary: x\nname: A\n---\n\n# A\n",
3963        );
3964        let issues = fx.store_all();
3965
3966        let missing_created = find(&issues, codes::FM_MISSING_CREATED);
3967        assert_eq!(missing_created.key.as_deref(), Some("created"));
3968        assert!(missing_created.is_error());
3969
3970        let missing_updated = find(&issues, codes::FM_MISSING_UPDATED);
3971        assert_eq!(missing_updated.key.as_deref(), Some("updated"));
3972        assert!(missing_updated.is_error());
3973    }
3974
3975    #[test]
3976    fn meta_files_do_not_require_universal_timestamps() {
3977        let fx = Fixture::new();
3978        let issues = fx.store_all();
3979
3980        assert!(
3981            !has(&issues, codes::FM_MISSING_CREATED),
3982            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
3983        );
3984        assert!(
3985            !has(&issues, codes::FM_MISSING_UPDATED),
3986            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
3987        );
3988    }
3989
3990    #[test]
3991    fn content_file_with_no_frontmatter_block_reports_type_and_summary() {
3992        let fx = Fixture::new();
3993        fx.write(
3994            "wiki/people/a.md",
3995            "# Just a heading\n\nNo frontmatter here.\n",
3996        );
3997        let issues = fx.store_all();
3998        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
3999        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4000    }
4001
4002    #[test]
4003    fn content_file_with_empty_frontmatter_reports_type_and_summary() {
4004        let fx = Fixture::new();
4005        fx.write("wiki/people/a.md", "---\n---\n\nbody\n");
4006        let issues = fx.store_all();
4007        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
4008        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4009    }
4010
4011    #[test]
4012    fn malformed_yaml_is_error_and_suppresses_field_checks() {
4013        let fx = Fixture::new();
4014        // A tab inside a mapping value is invalid YAML.
4015        fx.write(
4016            "records/contacts/a.md",
4017            "---\ntype: contact\n  bad: : : :\n: : nope\n---\n\nbody\n",
4018        );
4019        let issues = fx.store_all();
4020        let issue = find(&issues, codes::FM_MALFORMED_YAML);
4021        assert!(issue.is_error());
4022        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4023        // When YAML doesn't parse we don't *also* claim the summary is missing;
4024        // the agent fixes the YAML first.
4025        assert!(
4026            !has(&issues, codes::SUMMARY_MISSING),
4027            "malformed YAML should suppress SUMMARY_MISSING: {issues:#?}"
4028        );
4029    }
4030
4031    #[test]
4032    fn bad_created_timestamp_is_error() {
4033        let fx = Fixture::new();
4034        fx.write(
4035            "records/contacts/a.md",
4036            "---\ntype: contact\ncreated: not-a-date\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
4037        );
4038        let issues = fx.store_all();
4039        let issue = find(&issues, codes::FM_BAD_TIMESTAMP);
4040        assert_eq!(issue.key.as_deref(), Some("created"));
4041        assert!(issue.is_error());
4042    }
4043
4044    #[test]
4045    fn date_only_created_is_rejected_but_type_date_field_accepted() {
4046        let fx = Fixture::new();
4047        // `created` must be a full RFC3339 datetime → a date-only value is bad.
4048        // `last_touch` is a type-specific date field → date-only is fine.
4049        fx.write(
4050            "records/contacts/a.md",
4051            "---\ntype: contact\ncreated: 2026-05-22\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\nlast_touch: 2026-05-22\n---\n\n# A\n",
4052        );
4053        let issues = fx.store_all();
4054        let created_issues: Vec<_> = issues
4055            .iter()
4056            .filter(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created"))
4057            .collect();
4058        assert_eq!(
4059            created_issues.len(),
4060            1,
4061            "date-only `created` must fail: {issues:#?}"
4062        );
4063        assert!(
4064            !issues.iter().any(
4065                |i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("last_touch")
4066            ),
4067            "date-only `last_touch` is valid: {issues:#?}"
4068        );
4069    }
4070
4071    // ── summary ─────────────────────────────────────────────────────────────
4072
4073    #[test]
4074    fn summary_missing_empty_multiline_toolong() {
4075        let fx = Fixture::new();
4076        fx.write(
4077            "wiki/people/missing.md",
4078            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\n---\n\nbody\n",
4079        );
4080        fx.write(
4081            "wiki/people/empty.md",
4082            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"   \"\n---\n\nbody\n",
4083        );
4084        let long = "x".repeat(201);
4085        fx.write(
4086            "wiki/people/long.md",
4087            &format!("---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{long}\"\n---\n\nbody\n"),
4088        );
4089        let issues = fx.store_all();
4090        assert!(has(&issues, codes::SUMMARY_MISSING));
4091        assert_eq!(
4092            find(&issues, codes::SUMMARY_MISSING).file,
4093            PathBuf::from("wiki/people/missing.md")
4094        );
4095        assert!(has(&issues, codes::SUMMARY_EMPTY));
4096        assert!(has(&issues, codes::SUMMARY_TOO_LONG));
4097        assert_eq!(
4098            find(&issues, codes::SUMMARY_TOO_LONG).severity,
4099            Severity::Warning
4100        );
4101    }
4102
4103    #[test]
4104    fn summary_multiline_via_yaml_block_scalar() {
4105        let fx = Fixture::new();
4106        // A literal block scalar produces a value with a newline.
4107        fx.write(
4108            "wiki/people/a.md",
4109            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: |\n  line one\n  line two\n---\n\nbody\n",
4110        );
4111        let issues = fx.store_all();
4112        assert!(has(&issues, codes::SUMMARY_MULTILINE), "{issues:#?}");
4113    }
4114
4115    #[test]
4116    fn summary_exactly_200_chars_is_ok() {
4117        let fx = Fixture::new();
4118        let s = "y".repeat(200);
4119        fx.write(
4120            "wiki/people/a.md",
4121            &format!("---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{s}\"\n---\n\nbody\n"),
4122        );
4123        let issues = fx.store_all();
4124        assert!(
4125            !has(&issues, codes::SUMMARY_TOO_LONG),
4126            "200 is the bound, inclusive: {issues:#?}"
4127        );
4128    }
4129
4130    #[test]
4131    fn meta_files_need_no_summary() {
4132        let fx = Fixture::new();
4133        // The root/layer/type indexes + log carry no summary and must not be
4134        // flagged. (A lone DB.md store with one contact and full indexes.)
4135        fx.write("records/contacts/a.md", &valid_contact("A contact"));
4136        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n# I\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4137        fx.write(
4138            "records/index.md",
4139            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4140        );
4141        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — A contact\n");
4142        fx.write(
4143            "records/contacts/index.jsonl",
4144            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"A contact\"}\n",
4145        );
4146        fx.write("log.md", "---\ntype: log\n---\n\n# Log\n");
4147        let issues = fx.store_all();
4148        assert!(!has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4149    }
4150
4151    // ── tags ────────────────────────────────────────────────────────────────
4152
4153    #[test]
4154    fn nested_tags_warns_flat_tags_ok() {
4155        let fx = Fixture::new();
4156        fx.write(
4157            "records/contacts/nested.md",
4158            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags:\n  - good\n  - [nested, list]\n---\n\n# A\n",
4159        );
4160        fx.write(
4161            "records/contacts/flat.md",
4162            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags: [customer, vip]\n---\n\n# A\n",
4163        );
4164        let issues = fx.store_all();
4165        let tag_issues: Vec<_> = issues
4166            .iter()
4167            .filter(|i| i.code == codes::TAGS_MALFORMED)
4168            .collect();
4169        assert_eq!(
4170            tag_issues.len(),
4171            1,
4172            "only the nested-tags file should warn: {issues:#?}"
4173        );
4174        assert_eq!(
4175            tag_issues[0].file,
4176            PathBuf::from("records/contacts/nested.md")
4177        );
4178        assert_eq!(tag_issues[0].severity, Severity::Warning);
4179    }
4180
4181    // ── wiki-links ────────────────────────────────────────────────────────────
4182
4183    #[test]
4184    fn short_form_wiki_link_is_error() {
4185        let fx = Fixture::new();
4186        let mut body = valid_contact("links to a short form");
4187        body.push_str("\nSee [[sarah-chen]] for details.\n");
4188        fx.write("wiki/people/a.md", &body);
4189        let issues = fx.store_all();
4190        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
4191        assert!(issue.is_error());
4192        assert!(issue.message.contains("sarah-chen"));
4193        // A short-form link must NOT also be reported broken — fix the form first.
4194        assert!(
4195            !issues
4196                .iter()
4197                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.message.contains("sarah-chen")),
4198            "short-form should suppress broken: {issues:#?}"
4199        );
4200    }
4201
4202    #[test]
4203    fn broken_full_path_wiki_link_is_error() {
4204        let fx = Fixture::new();
4205        let mut body = valid_contact("links to a missing file");
4206        body.push_str("\nSee [[records/contacts/ghost]].\n");
4207        fx.write("wiki/people/a.md", &body);
4208        let issues = fx.store_all();
4209        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
4210        assert!(issue.is_error());
4211        assert!(issue.message.contains("records/contacts/ghost"));
4212        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4213    }
4214
4215    #[test]
4216    fn traversal_full_path_wiki_link_is_rejected_before_probe() {
4217        let fx = Fixture::new();
4218        let mut body = valid_contact("links with traversal");
4219        body.push_str("\nSee [[records/contacts/../../ghost]].\n");
4220        fx.write("wiki/people/a.md", &body);
4221        let issues = fx.store_all();
4222        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
4223        assert!(issue.message.contains("not a safe store-relative path"));
4224        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4225    }
4226
4227    #[test]
4228    fn valid_full_path_wiki_link_passes() {
4229        let fx = Fixture::new();
4230        fx.write("records/contacts/target.md", &valid_contact("target"));
4231        let mut body = valid_contact("links to target");
4232        body.push_str("\nSee [[records/contacts/target]].\n");
4233        fx.write("wiki/people/a.md", &body);
4234        let issues = fx.store_all();
4235        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4236        assert!(!has(&issues, codes::WIKI_LINK_SHORT_FORM), "{issues:#?}");
4237    }
4238
4239    #[test]
4240    fn md_extension_wiki_link_warns_and_resolves() {
4241        let fx = Fixture::new();
4242        fx.write("records/contacts/target.md", &valid_contact("target"));
4243        let mut body = valid_contact("links with extension");
4244        body.push_str("\nSee [[records/contacts/target.md]].\n");
4245        fx.write("wiki/people/a.md", &body);
4246        let issues = fx.store_all();
4247        let issue = find(&issues, codes::WIKI_LINK_HAS_EXTENSION);
4248        assert_eq!(issue.severity, Severity::Warning);
4249        assert_eq!(
4250            issue.suggestion.as_deref(),
4251            Some("drop the extension: [[records/contacts/target]]")
4252        );
4253        // The target exists once `.md` is stripped → not broken.
4254        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4255    }
4256
4257    #[test]
4258    fn wiki_links_in_code_fences_are_ignored() {
4259        let fx = Fixture::new();
4260        let mut body = valid_contact("has a fenced example");
4261        body.push_str("\n```\n[[sarah-chen]]\n```\n");
4262        fx.write("wiki/people/a.md", &body);
4263        let issues = fx.store_all();
4264        assert!(
4265            !has(&issues, codes::WIKI_LINK_SHORT_FORM),
4266            "fenced wiki-links must be ignored: {issues:#?}"
4267        );
4268    }
4269
4270    #[test]
4271    fn flow_form_link_list_in_frontmatter_is_error() {
4272        let fx = Fixture::new();
4273        fx.write(
4274            "records/meetings/m.md",
4275            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees: [[[records/contacts/a]], [[records/contacts/b]]]\n---\n\n# M\n",
4276        );
4277        let issues = fx.store_all();
4278        let issue = find(&issues, codes::WIKI_LINK_FLOW_FORM_LIST);
4279        assert!(issue.is_error());
4280        assert_eq!(issue.key.as_deref(), Some("attendees"));
4281    }
4282
4283    #[test]
4284    fn block_form_link_list_in_frontmatter_is_not_flow_form() {
4285        let fx = Fixture::new();
4286        fx.write("records/contacts/a.md", &valid_contact("a"));
4287        fx.write("records/contacts/b.md", &valid_contact("b"));
4288        fx.write(
4289            "records/meetings/m.md",
4290            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees:\n  - [[records/contacts/a]]\n  - [[records/contacts/b]]\n---\n\n# M\n",
4291        );
4292        let issues = fx.store_all();
4293        assert!(
4294            !has(&issues, codes::WIKI_LINK_FLOW_FORM_LIST),
4295            "{issues:#?}"
4296        );
4297        // Block-form link targets are still integrity-checked (both exist here).
4298        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4299    }
4300
4301    #[test]
4302    fn frontmatter_short_form_link_field_is_error() {
4303        let fx = Fixture::new();
4304        // `related` is a *custom* (non-schema) wiki-link field, so it goes
4305        // through the generic doctrine path → a short form is WIKI_LINK_SHORT_FORM.
4306        fx.write(
4307            "wiki/people/a.md",
4308            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: \"[[sarah-chen]]\"\n---\n\n# A\n",
4309        );
4310        let issues = fx.store_all();
4311        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
4312        assert!(issue.is_error());
4313        assert_eq!(issue.key.as_deref(), Some("related"));
4314    }
4315
4316    #[test]
4317    fn unquoted_frontmatter_link_is_recognized() {
4318        // An UNQUOTED `[[...]]` parses in YAML as a nested sequence, not a
4319        // string. The validator must still see it as a wiki-link (text-based
4320        // extraction). A short-form custom field must report SHORT_FORM, and a
4321        // full-path one with a missing target must report BROKEN.
4322        let fx = Fixture::new();
4323        fx.write(
4324            "wiki/people/short.md",
4325            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[sarah-chen]]\n---\n\n# A\n",
4326        );
4327        fx.write(
4328            "wiki/people/broken.md",
4329            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[records/contacts/ghost]]\n---\n\n# A\n",
4330        );
4331        let issues = fx.store_all();
4332        assert!(
4333            issues.iter().any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
4334                && i.file == Path::new("wiki/people/short.md")
4335                && i.key.as_deref() == Some("related")),
4336            "unquoted short-form frontmatter link must be caught: {issues:#?}"
4337        );
4338        assert!(
4339            issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN
4340                && i.file == Path::new("wiki/people/broken.md")),
4341            "unquoted full-path frontmatter link to a missing file must be caught: {issues:#?}"
4342        );
4343    }
4344
4345    #[test]
4346    fn short_form_in_declared_link_field_is_prefix_mismatch_not_double_reported() {
4347        // A short-form value in a *declared* link field (a `### contact` schema
4348        // with `company link to records/companies/`) is SCHEMA_LINK_PREFIX_MISMATCH
4349        // (the target isn't under the prefix), and must NOT also be reported as a
4350        // bare WIKI_LINK_SHORT_FORM — the schema path owns that field once.
4351        let mut fx = Fixture::new();
4352        fx.config.schemas.insert(
4353            "contact".into(),
4354            Schema {
4355                fields: vec![FieldSpec {
4356                    name: "company".into(),
4357                    link_prefix: Some(PathBuf::from("records/companies")),
4358                    ..Default::default()
4359                }],
4360                ..Default::default()
4361            },
4362        );
4363        fx.write(
4364            "records/contacts/a.md",
4365            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[northstar]]\"\n---\n\n# A\n",
4366        );
4367        let issues = fx.store_all();
4368        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
4369        assert_eq!(issue.key.as_deref(), Some("company"));
4370        // The same link must NOT also be double-reported via the generic path.
4371        assert!(
4372            !issues
4373                .iter()
4374                .any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
4375                    && i.key.as_deref() == Some("company")),
4376            "schema link fields are checked once, by the schema path: {issues:#?}"
4377        );
4378    }
4379
4380    #[test]
4381    fn schema_link_field_with_md_extension_still_warns() {
4382        let mut fx = Fixture::new();
4383        fx.config.schemas.insert(
4384            "contact".into(),
4385            Schema {
4386                fields: vec![FieldSpec {
4387                    name: "company".into(),
4388                    link_prefix: Some(PathBuf::from("records/companies")),
4389                    ..Default::default()
4390                }],
4391                ..Default::default()
4392            },
4393        );
4394        fx.write(
4395            "records/companies/acme.md",
4396            "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: Acme\nname: Acme\n---\n\n# Acme\n",
4397        );
4398        fx.write(
4399            "records/contacts/a.md",
4400            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[records/companies/acme.md]]\"\n---\n\n# A\n",
4401        );
4402        let issues = fx.store_all();
4403        let issue = issues
4404            .iter()
4405            .find(|i| {
4406                i.code == codes::WIKI_LINK_HAS_EXTENSION && i.key.as_deref() == Some("company")
4407            })
4408            .unwrap_or_else(|| panic!("schema link extension warning missing: {issues:#?}"));
4409        assert_eq!(issue.severity, Severity::Warning);
4410        assert!(
4411            !issues
4412                .iter()
4413                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.key.as_deref() == Some("company")),
4414            "extensionless existence check should still find acme.md: {issues:#?}"
4415        );
4416    }
4417
4418    // ── schema: explicit DB.md schema (required / shape / enum) ───────────────
4419
4420    #[test]
4421    fn explicit_schema_required_shape_enum() {
4422        let fx = {
4423            let mut fx = Fixture::new();
4424            // contact schema: name required, email required+email shape,
4425            // status enum: active|inactive
4426            let schema = Schema {
4427                fields: vec![
4428                    FieldSpec {
4429                        name: "name".into(),
4430                        required: true,
4431                        ..Default::default()
4432                    },
4433                    FieldSpec {
4434                        name: "email".into(),
4435                        required: true,
4436                        shape: Some(Shape::Email),
4437                        ..Default::default()
4438                    },
4439                    FieldSpec {
4440                        name: "status".into(),
4441                        enum_values: Some(vec!["active".into(), "inactive".into()]),
4442                        ..Default::default()
4443                    },
4444                ],
4445                ..Default::default()
4446            };
4447            fx.config.schemas.insert("contact".into(), schema);
4448            fx
4449        };
4450        fx.write(
4451            "records/contacts/a.md",
4452            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nemail: not-an-email\nstatus: archived\n---\n\n# A\n",
4453        );
4454        let issues = fx.store_all();
4455        // name absent → MISSING_REQUIRED
4456        assert!(
4457            issues
4458                .iter()
4459                .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
4460                    && i.key.as_deref() == Some("name")),
4461            "{issues:#?}"
4462        );
4463        // email malformed → SHAPE_MISMATCH
4464        assert!(
4465            issues.iter().any(
4466                |i| i.code == codes::SCHEMA_SHAPE_MISMATCH && i.key.as_deref() == Some("email")
4467            ),
4468            "{issues:#?}"
4469        );
4470        // status archived not in enum → ENUM_VIOLATION
4471        assert!(
4472            issues
4473                .iter()
4474                .any(|i| i.code == codes::SCHEMA_ENUM_VIOLATION
4475                    && i.key.as_deref() == Some("status")),
4476            "{issues:#?}"
4477        );
4478    }
4479
4480    #[test]
4481    fn schema_without_link_field_allows_plain_value() {
4482        // A `contact` schema with no `company` link field means a plain `company`
4483        // string is fine — schema enforcement is exactly what the store declares,
4484        // nothing implicit.
4485        let mut fx = Fixture::new();
4486        fx.config.schemas.insert(
4487            "contact".into(),
4488            Schema {
4489                fields: vec![FieldSpec {
4490                    name: "name".into(),
4491                    required: true,
4492                    ..Default::default()
4493                }],
4494                ..Default::default()
4495            },
4496        );
4497        fx.write(
4498            "records/contacts/a.md",
4499            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
4500        );
4501        let issues = fx.store_all();
4502        assert!(
4503            !has(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH),
4504            "no declared link field for `company` → a plain value is fine: {issues:#?}"
4505        );
4506    }
4507
4508    #[test]
4509    fn schema_link_field_plain_value_is_prefix_mismatch() {
4510        // The surviving link-enforcement path: a declared `link to <prefix>/`
4511        // field with a plain-string value is SCHEMA_LINK_PREFIX_MISMATCH.
4512        let mut fx = Fixture::new();
4513        fx.config.schemas.insert(
4514            "contact".into(),
4515            Schema {
4516                fields: vec![FieldSpec {
4517                    name: "company".into(),
4518                    link_prefix: Some(PathBuf::from("records/companies")),
4519                    ..Default::default()
4520                }],
4521                ..Default::default()
4522            },
4523        );
4524        fx.write(
4525            "records/contacts/a.md",
4526            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
4527        );
4528        let issues = fx.store_all();
4529        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
4530        assert_eq!(issue.key.as_deref(), Some("company"));
4531        assert!(issue
4532            .suggestion
4533            .as_deref()
4534            .unwrap()
4535            .contains("records/companies/"));
4536    }
4537
4538    #[test]
4539    fn schema_shape_int_and_url_and_currency() {
4540        let mut fx = Fixture::new();
4541        fx.config.schemas.insert(
4542            "widget".into(),
4543            Schema {
4544                fields: vec![
4545                    FieldSpec {
4546                        name: "qty".into(),
4547                        shape: Some(Shape::Int),
4548                        ..Default::default()
4549                    },
4550                    FieldSpec {
4551                        name: "site".into(),
4552                        shape: Some(Shape::Url),
4553                        ..Default::default()
4554                    },
4555                    FieldSpec {
4556                        name: "price".into(),
4557                        shape: Some(Shape::Currency),
4558                        ..Default::default()
4559                    },
4560                ],
4561                ..Default::default()
4562            },
4563        );
4564        // `USD 100` is the corpus-realistic shape (an `expense.currency`-style
4565        // ISO code + amount). It must pass — it used to spuriously fail.
4566        fx.write(
4567            "records/widgets/ok.md",
4568            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nqty: 5\nsite: https://example.com\nprice: \"USD 1,234.50\"\n---\n\n# ok\n",
4569        );
4570        // `free` is non-numeric; `inf`/`NaN`/3-decimal used to slip through
4571        // because the old impl leaned on `f64::parse`. `price: inf` here guards
4572        // the under-rejection half of the finding.
4573        fx.write(
4574            "records/widgets/bad.md",
4575            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nqty: five\nsite: ftp://nope\nprice: inf\n---\n\n# bad\n",
4576        );
4577        let issues = fx.store_all();
4578        let bad_shape: Vec<_> = issues
4579            .iter()
4580            .filter(|i| {
4581                i.code == codes::SCHEMA_SHAPE_MISMATCH
4582                    && i.file == Path::new("records/widgets/bad.md")
4583            })
4584            .map(|i| i.key.clone().unwrap_or_default())
4585            .collect();
4586        assert!(bad_shape.contains(&"qty".to_string()), "{issues:#?}");
4587        assert!(bad_shape.contains(&"site".to_string()), "{issues:#?}");
4588        assert!(
4589            bad_shape.contains(&"price".to_string()),
4590            "inf must be rejected as currency: {issues:#?}"
4591        );
4592        assert!(
4593            !issues.iter().any(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH
4594                && i.file == Path::new("records/widgets/ok.md")),
4595            "valid shapes (incl. `USD 1,234.50`) must not fire: {issues:#?}"
4596        );
4597    }
4598
4599    #[test]
4600    fn schema_shape_or_enum_field_with_non_scalar_value_is_shape_mismatch() {
4601        let mut fx = Fixture::new();
4602        fx.config.schemas.insert(
4603            "contact".into(),
4604            Schema {
4605                fields: vec![
4606                    FieldSpec {
4607                        name: "email".into(),
4608                        required: true,
4609                        shape: Some(Shape::Email),
4610                        ..Default::default()
4611                    },
4612                    FieldSpec {
4613                        name: "status".into(),
4614                        enum_values: Some(vec!["active".into(), "inactive".into()]),
4615                        ..Default::default()
4616                    },
4617                ],
4618                ..Default::default()
4619            },
4620        );
4621        // A required EMAIL field and an ENUM field, each holding a LIST. Both
4622        // used to slip through entirely (`scalar_string` → None → the shape and
4623        // enum bodies silently no-op); now they flag SCHEMA_SHAPE_MISMATCH.
4624        fx.write(
4625            "records/contacts/bad.md",
4626            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nemail:\n  - a@b.com\n  - c@d.com\nstatus:\n  - active\n---\n\n# bad\n",
4627        );
4628        let issues = fx.store_all();
4629        let mismatched: Vec<_> = issues
4630            .iter()
4631            .filter(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH)
4632            .map(|i| i.key.clone().unwrap_or_default())
4633            .collect();
4634        assert!(
4635            mismatched.contains(&"email".to_string()),
4636            "list-valued required email must flag: {issues:#?}"
4637        );
4638        assert!(
4639            mismatched.contains(&"status".to_string()),
4640            "list-valued enum must flag: {issues:#?}"
4641        );
4642    }
4643
4644    #[test]
4645    fn is_currency_accepts_codes_and_rejects_non_numeric() {
4646        // Symbols and 3-letter ISO codes both strip; plain numbers pass.
4647        for ok in [
4648            "100",
4649            "1234.56",
4650            "$1,234.50",
4651            "USD 100", // the finding's headline probe — used to be false
4652            "usd 100", // case-insensitive code
4653            "EUR 9.50",
4654            "£12",
4655            "¥1000",
4656            "-5.00", // signed amounts are real (refunds)
4657            "+5",
4658            "1,000,000",
4659        ] {
4660            assert!(is_currency(ok), "expected currency: {ok:?}");
4661        }
4662        // Non-numeric floats `f64::parse` would accept, and the > 2-decimal /
4663        // bare-code / exponent cases the docstring forbids.
4664        for bad in [
4665            "inf", "-inf", "infinity", "NaN", "nan",    // f64 accepts these; we must not
4666            "12.999", // 3 decimals
4667            "1.2345", // 4 decimals
4668            "USD",    // bare code, no amount
4669            "$",      // bare symbol
4670            "free", "", " ", "1e3",      // exponent form
4671            "1.",       // trailing dot, no fractional digits
4672            ".5",       // leading dot, no integer digits
4673            "1 000",    // space as separator is not a thousands separator
4674            "USDD 100", // 4-letter "code" must not strip
4675        ] {
4676            assert!(!is_currency(bad), "expected NOT currency: {bad:?}");
4677        }
4678    }
4679
4680    // ── policies ───────────────────────────────────────────────────────────
4681
4682    #[test]
4683    fn ignored_type_present_is_info() {
4684        let mut fx = Fixture::new();
4685        fx.config.ignored_types.push("temp".into());
4686        fx.write(
4687            "records/temps/x.md",
4688            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
4689        );
4690        let issues = fx.store_all();
4691        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_PRESENT);
4692        assert_eq!(issue.severity, Severity::Info);
4693        assert!(!issue.is_error());
4694        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4695    }
4696
4697    #[test]
4698    fn wiki_page_derived_from_ignored_type_warns() {
4699        let mut fx = Fixture::new();
4700        fx.config.ignored_types.push("temp".into());
4701        fx.write(
4702            "records/temps/x.md",
4703            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
4704        );
4705        fx.write(
4706            "wiki/themes/t.md",
4707            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: derived\nderived_from: \"[[records/temps/x]]\"\n---\n\n# t\n",
4708        );
4709        let issues = fx.store_all();
4710        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_DERIVED);
4711        assert_eq!(issue.severity, Severity::Warning);
4712        assert_eq!(issue.key.as_deref(), Some("derived_from"));
4713        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4714    }
4715
4716    /// The shared `derived_from_ignored_type` entry point — the single
4717    /// policy-decision both `dbmd validate` (read) and `dbmd write` (write-time
4718    /// warning) now route through, so they cannot diverge. This pins its
4719    /// contract directly: the type gate, the empty-ignored-types gate, a
4720    /// positive match carrying the resolved target type, and a non-ignored
4721    /// target rejected.
4722    #[test]
4723    fn derived_from_ignored_type_is_the_shared_policy_decision() {
4724        let mut fx = Fixture::new();
4725        fx.config.ignored_types.push("secret".into());
4726        // An ignored-type record …
4727        fx.write(
4728            "records/secrets/s.md",
4729            "---\ntype: secret\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: hush\n---\n\n# s\n",
4730        );
4731        // … and a non-ignored record.
4732        fx.write(
4733            "records/contacts/c.md",
4734            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nname: C\n---\n\n# c\n",
4735        );
4736        let store = fx.store();
4737
4738        // Positive: a wiki-page deriving from the ignored-type record matches,
4739        // and the hit carries both the target (as written) and its resolved type.
4740        let hit =
4741            derived_from_ignored_type(&store, "wiki-page", std::iter::once("records/secrets/s"))
4742                .expect("wiki-page → ignored-type record must match");
4743        assert_eq!(hit.target, "records/secrets/s");
4744        assert_eq!(hit.target_type, "secret");
4745
4746        // Type gate: a non-`wiki-page` type never triggers, even with the same
4747        // ignored-type target.
4748        assert_eq!(
4749            derived_from_ignored_type(&store, "contact", std::iter::once("records/secrets/s")),
4750            None,
4751            "only wiki-page derivation is policed"
4752        );
4753
4754        // Target gate: a wiki-page deriving from a non-ignored record is fine.
4755        assert_eq!(
4756            derived_from_ignored_type(&store, "wiki-page", std::iter::once("records/contacts/c")),
4757            None,
4758            "deriving from a non-ignored type is allowed"
4759        );
4760
4761        // First match wins across multiple targets (here the second is the hit).
4762        let hit = derived_from_ignored_type(
4763            &store,
4764            "wiki-page",
4765            ["records/contacts/c", "records/secrets/s"],
4766        )
4767        .expect("a later ignored-type target must still be found");
4768        assert_eq!(hit.target, "records/secrets/s");
4769
4770        // Empty-policy gate: with no `### Ignored types`, nothing is policed.
4771        fx.config.ignored_types.clear();
4772        let store = fx.store();
4773        assert_eq!(
4774            derived_from_ignored_type(&store, "wiki-page", std::iter::once("records/secrets/s")),
4775            None,
4776            "an empty ignored-types policy short-circuits"
4777        );
4778    }
4779
4780    // ── duplicates ───────────────────────────────────────────────────────────
4781
4782    #[test]
4783    fn dup_id_is_hard_error_with_related() {
4784        let fx = Fixture::new();
4785        fx.write(
4786            "records/contacts/a.md",
4787            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
4788        );
4789        fx.write(
4790            "records/contacts/b.md",
4791            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
4792        );
4793        let issues = fx.store_all();
4794        // Reporting rule #1: ONE issue per collision group, keyed on the
4795        // lexicographically smallest path (`a.md`), partner in `related`.
4796        assert_eq!(
4797            count(&issues, codes::DUP_ID),
4798            1,
4799            "one issue per group: {issues:#?}"
4800        );
4801        let a = issues.iter().find(|i| i.code == codes::DUP_ID).unwrap();
4802        assert_eq!(a.file, PathBuf::from("records/contacts/a.md"));
4803        assert!(a.is_error());
4804        assert_eq!(a.key.as_deref(), Some("id"));
4805        assert_eq!(
4806            a.line,
4807            Some(3),
4808            "anchors to the `id` line on the reported file"
4809        );
4810        assert_eq!(a.related, vec![PathBuf::from("records/contacts/b.md")]);
4811    }
4812
4813    #[test]
4814    fn dup_id_not_fired_in_working_set() {
4815        // DUP_* is an --all-only cross-file check; the working set must not run it.
4816        let fx = Fixture::new();
4817        fx.write(
4818            "records/contacts/a.md",
4819            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
4820        );
4821        fx.write(
4822            "records/contacts/b.md",
4823            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
4824        );
4825        // Log says both changed since epoch, so they're in the working set.
4826        fx.write(
4827            "log.md",
4828            "---\ntype: log\n---\n\n## [2026-05-22 10:00] create | records/contacts/a\nx\n\n## [2026-05-22 10:01] create | records/contacts/b\nx\n",
4829        );
4830        let issues = validate_working_set(&fx.store(), None).unwrap();
4831        assert!(
4832            !has(&issues, codes::DUP_ID),
4833            "DUP_ID is --all only: {issues:#?}"
4834        );
4835    }
4836
4837    #[test]
4838    fn dup_unique_key_single_field_is_warning() {
4839        let mut fx = Fixture::new();
4840        // contact declares `- unique: email`.
4841        fx.config.schemas.insert(
4842            "contact".into(),
4843            Schema {
4844                unique_keys: vec![vec!["email".into()]],
4845                ..Default::default()
4846            },
4847        );
4848        for (f, name) in [("a", "A"), ("b", "B")] {
4849            fx.write(
4850                &format!("records/contacts/{f}.md"),
4851                &format!("---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: s\nname: {name}\nemail: dup@x.com\n---\n\n# {name}\n"),
4852            );
4853        }
4854        let issues = fx.store_all();
4855        // One issue per group (rule #1), keyed on the smallest path, anchored to
4856        // the single `email` field.
4857        assert_eq!(count(&issues, codes::DUP_UNIQUE_KEY), 1);
4858        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
4859        assert_eq!(dup.severity, Severity::Warning);
4860        assert_eq!(dup.file, PathBuf::from("records/contacts/a.md"));
4861        assert_eq!(dup.key.as_deref(), Some("email"));
4862        assert_eq!(dup.related, vec![PathBuf::from("records/contacts/b.md")]);
4863    }
4864
4865    #[test]
4866    fn dup_unique_key_compound_and_clean_when_one_field_differs() {
4867        let mut fx = Fixture::new();
4868        // expense declares `- unique: date, amount, vendor` (a compound key).
4869        fx.config.schemas.insert(
4870            "expense".into(),
4871            Schema {
4872                unique_keys: vec![vec!["date".into(), "amount".into(), "vendor".into()]],
4873                ..Default::default()
4874            },
4875        );
4876        fx.write("records/companies/acme.md", "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: c\nname: Acme\n---\n# A\n");
4877        let exp = |f: &str, amount: &str| {
4878            format!(
4879            "---\ntype: expense\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: e\ndate: 2026-05-01\namount: {amount}\nvendor: \"[[records/companies/acme]]\"\n---\n\n# {f}\n"
4880        )
4881        };
4882        fx.write("records/expenses/e1.md", &exp("e1", "100"));
4883        fx.write("records/expenses/e2.md", &exp("e2", "100"));
4884        fx.write("records/expenses/e3.md", &exp("e3", "200")); // different amount
4885        let issues = fx.store_all();
4886        // One issue for the e1+e2 group (rule #1), keyed on the smallest path
4887        // (e1) with e2 in `related`; e3 differs on amount and never appears.
4888        assert_eq!(
4889            count(&issues, codes::DUP_UNIQUE_KEY),
4890            1,
4891            "only e1+e2 collide, one issue: {issues:#?}"
4892        );
4893        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
4894        assert_eq!(dup.file, PathBuf::from("records/expenses/e1.md"));
4895        assert_eq!(
4896            dup.line,
4897            Some(1),
4898            "compound-key collision anchors to line 1"
4899        );
4900        assert_eq!(dup.related, vec![PathBuf::from("records/expenses/e2.md")]);
4901        assert!(
4902            !issues.iter().any(|i| i.code == codes::DUP_UNIQUE_KEY
4903                && i.related.contains(&PathBuf::from("records/expenses/e3.md"))),
4904            "e3 differs on amount and must not collide: {issues:#?}"
4905        );
4906    }
4907
4908    #[test]
4909    fn dup_unique_key_list_field_is_order_independent() {
4910        let mut fx = Fixture::new();
4911        // meeting declares `- unique: date, attendees`; the list field is a set.
4912        fx.config.schemas.insert(
4913            "meeting".into(),
4914            Schema {
4915                unique_keys: vec![vec!["date".into(), "attendees".into()]],
4916                ..Default::default()
4917            },
4918        );
4919        fx.write("records/contacts/a.md", &valid_contact("a"));
4920        fx.write("records/contacts/b.md", &valid_contact("b"));
4921        let m = |f: &str, order: &str| {
4922            let attendees = if order == "ab" {
4923                "  - [[records/contacts/a]]\n  - [[records/contacts/b]]"
4924            } else {
4925                "  - [[records/contacts/b]]\n  - [[records/contacts/a]]"
4926            };
4927            format!(
4928                "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nattendees:\n{attendees}\n---\n\n# {f}\n"
4929            )
4930        };
4931        fx.write("records/meetings/m1.md", &m("m1", "ab"));
4932        fx.write("records/meetings/m2.md", &m("m2", "ba"));
4933        let issues = fx.store_all();
4934        // The attendee SET is order-independent, so m1 (ab) and m2 (ba) collide
4935        // → a single issue on the smaller path.
4936        assert_eq!(
4937            count(&issues, codes::DUP_UNIQUE_KEY),
4938            1,
4939            "same date + same attendee set (any order) collide as one issue: {issues:#?}"
4940        );
4941        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
4942        assert_eq!(dup.file, PathBuf::from("records/meetings/m1.md"));
4943        assert_eq!(dup.related, vec![PathBuf::from("records/meetings/m2.md")]);
4944    }
4945
4946    // ── indexes ───────────────────────────────────────────────────────────────
4947
4948    #[test]
4949    fn missing_indexes_at_all_three_levels() {
4950        let fx = Fixture::new();
4951        fx.write("records/contacts/a.md", &valid_contact("a"));
4952        let issues = fx.store_all();
4953        // root, layer (records), and type-folder (records/contacts) all missing.
4954        // The type-folder INDEX_MISSING is keyed on the FOLDER path (not its
4955        // would-be index.md), per the field convention `EXPECTED` pins.
4956        let missing_files: BTreeSet<PathBuf> = issues
4957            .iter()
4958            .filter(|i| i.code == codes::INDEX_MISSING)
4959            .map(|i| i.file.clone())
4960            .collect();
4961        assert!(
4962            missing_files.contains(&PathBuf::from("index.md")),
4963            "{issues:#?}"
4964        );
4965        assert!(
4966            missing_files.contains(&PathBuf::from("records/index.md")),
4967            "{issues:#?}"
4968        );
4969        assert!(
4970            missing_files.contains(&PathBuf::from("records/contacts")),
4971            "{issues:#?}"
4972        );
4973        // When the index.md is entirely absent we do NOT additionally fire
4974        // INDEX_JSONL_MISSING — one INDEX_MISSING covers the folder (rule #4).
4975        assert!(!has(&issues, codes::INDEX_JSONL_MISSING), "{issues:#?}");
4976    }
4977
4978    #[test]
4979    fn index_stale_entry_and_missing_entry() {
4980        let fx = Fixture::new();
4981        fx.write(
4982            "records/contacts/present.md",
4983            &valid_contact("present contact"),
4984        );
4985        // Indexes for the parents (root/layer) present so we isolate type-folder.
4986        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4987        fx.write(
4988            "records/index.md",
4989            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4990        );
4991        // Type-folder index lists a GHOST (stale) and omits `present` (missing).
4992        fx.write(
4993            "records/contacts/index.md",
4994            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/ghost]] — gone\n",
4995        );
4996        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/present.md\",\"type\":\"contact\",\"summary\":\"present contact\"}\n");
4997        let issues = fx.store_all();
4998        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
4999        assert!(stale.message.contains("ghost"));
5000        assert!(stale.is_error());
5001        let missing = find(&issues, codes::INDEX_MISSING_ENTRY);
5002        assert!(
5003            missing.message.contains("present.md"),
5004            "{}",
5005            missing.message
5006        );
5007    }
5008
5009    #[test]
5010    fn index_md_entry_with_traversal_path_is_stale_not_probe() {
5011        let fx = Fixture::new();
5012        fx.write("records/contacts/a.md", &valid_contact("a"));
5013        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5014        fx.write(
5015            "records/index.md",
5016            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5017        );
5018        fx.write(
5019            "records/contacts/index.md",
5020            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/../../ghost]] — unsafe\n",
5021        );
5022        fx.write(
5023            "records/contacts/index.jsonl",
5024            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5025        );
5026        let issues = fx.store_all();
5027        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
5028        assert!(stale.message.contains("not a safe store-relative path"));
5029    }
5030
5031    #[test]
5032    fn index_summary_mismatch() {
5033        let fx = Fixture::new();
5034        fx.write("records/contacts/a.md", &valid_contact("the real summary"));
5035        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5036        fx.write(
5037            "records/index.md",
5038            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5039        );
5040        fx.write(
5041            "records/contacts/index.md",
5042            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a STALE summary\n",
5043        );
5044        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"the real summary\"}\n");
5045        let issues = fx.store_all();
5046        let issue = find(&issues, codes::INDEX_SUMMARY_MISMATCH);
5047        assert!(issue.is_error());
5048        assert_eq!(issue.related, vec![PathBuf::from("records/contacts/a.md")]);
5049    }
5050
5051    #[test]
5052    fn index_summary_match_passes() {
5053        let fx = Fixture::new();
5054        fx.write("records/contacts/a.md", &valid_contact("matching summary"));
5055        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5056        fx.write(
5057            "records/index.md",
5058            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5059        );
5060        fx.write(
5061            "records/contacts/index.md",
5062            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — matching summary\n",
5063        );
5064        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"matching summary\"}\n");
5065        let issues = fx.store_all();
5066        assert!(!has(&issues, codes::INDEX_SUMMARY_MISMATCH), "{issues:#?}");
5067    }
5068
5069    #[test]
5070    fn index_entry_with_tag_suffix_matches_summary() {
5071        let fx = Fixture::new();
5072        fx.write("records/contacts/a.md", &valid_contact("clean summary"));
5073        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5074        fx.write(
5075            "records/index.md",
5076            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5077        );
5078        // Entry carries the renderer's `  ·  #tag` suffix (the EXACT double-spaced
5079        // delimiter `crate::index::format_md_entry` emits for a tagged file),
5080        // which must be stripped before comparing against the file's summary.
5081        fx.write(
5082            "records/contacts/index.md",
5083            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — clean summary  ·  #customer\n",
5084        );
5085        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"clean summary\"}\n");
5086        let issues = fx.store_all();
5087        assert!(
5088            !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
5089            "tag suffix should be stripped: {issues:#?}"
5090        );
5091    }
5092
5093    #[test]
5094    fn index_entry_single_spaced_middot_tail_is_part_of_summary() {
5095        // Regression (the finding): a tagless file whose `summary` legitimately
5096        // ends in a single-spaced ` · #word` tail round-trips through `index
5097        // rebuild` verbatim (the renderer appends NO `  ·  #tag` block, since the
5098        // file has no tags). The validator must NOT mistake that single-spaced
5099        // tail for the renderer's tag suffix, or it reports a spurious — and
5100        // unfixable — INDEX_SUMMARY_MISMATCH on a freshly rebuilt store.
5101        let fx = Fixture::new();
5102        fx.write(
5103            "records/contacts/a.md",
5104            &valid_contact("Standup notes · #standup"),
5105        );
5106        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5107        fx.write(
5108            "records/index.md",
5109            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5110        );
5111        fx.write(
5112            "records/contacts/index.md",
5113            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — Standup notes · #standup\n",
5114        );
5115        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"Standup notes · #standup\"}\n");
5116        let issues = fx.store_all();
5117        assert!(
5118            !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
5119            "a single-spaced middot tail is part of the summary, not a tag block: {issues:#?}"
5120        );
5121    }
5122
5123    #[test]
5124    fn index_jsonl_desync_missing_file_in_jsonl() {
5125        let fx = Fixture::new();
5126        fx.write("records/contacts/a.md", &valid_contact("a"));
5127        fx.write("records/contacts/b.md", &valid_contact("b"));
5128        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (2 files)\n");
5129        fx.write(
5130            "records/index.md",
5131            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5132        );
5133        fx.write(
5134            "records/contacts/index.md",
5135            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n- [[records/contacts/b]] — b\n",
5136        );
5137        // jsonl only lists `a` → `b` is a desync (the twin must be complete).
5138        fx.write(
5139            "records/contacts/index.jsonl",
5140            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5141        );
5142        let issues = fx.store_all();
5143        let desync = find(&issues, codes::INDEX_JSONL_DESYNC);
5144        assert!(desync.message.contains("b.md"), "{}", desync.message);
5145    }
5146
5147    #[test]
5148    fn index_jsonl_desync_record_points_at_missing_file() {
5149        let fx = Fixture::new();
5150        fx.write("records/contacts/a.md", &valid_contact("a"));
5151        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5152        fx.write(
5153            "records/index.md",
5154            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5155        );
5156        fx.write(
5157            "records/contacts/index.md",
5158            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
5159        );
5160        fx.write(
5161            "records/contacts/index.jsonl",
5162            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
5163        );
5164        let issues = fx.store_all();
5165        assert!(
5166            issues
5167                .iter()
5168                .any(|i| i.code == codes::INDEX_JSONL_DESYNC && i.message.contains("ghost.md")),
5169            "{issues:#?}"
5170        );
5171    }
5172
5173    #[test]
5174    fn index_jsonl_record_with_traversal_path_is_desync_not_probe() {
5175        let fx = Fixture::new();
5176        fx.write("records/contacts/a.md", &valid_contact("a"));
5177        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5178        fx.write(
5179            "records/index.md",
5180            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5181        );
5182        fx.write(
5183            "records/contacts/index.md",
5184            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
5185        );
5186        fx.write(
5187            "records/contacts/index.jsonl",
5188            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/../../ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
5189        );
5190        let issues = fx.store_all();
5191        assert!(
5192            issues.iter().any(|i| i.code == codes::INDEX_JSONL_DESYNC
5193                && i.message.contains("not a safe store-relative path")),
5194            "{issues:#?}"
5195        );
5196    }
5197
5198    #[test]
5199    fn index_jsonl_stale_summary() {
5200        let fx = Fixture::new();
5201        fx.write("records/contacts/a.md", &valid_contact("real summary"));
5202        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5203        fx.write(
5204            "records/index.md",
5205            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5206        );
5207        fx.write(
5208            "records/contacts/index.md",
5209            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — real summary\n",
5210        );
5211        // jsonl summary disagrees with the file frontmatter.
5212        fx.write(
5213            "records/contacts/index.jsonl",
5214            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"OUTDATED\"}\n",
5215        );
5216        let issues = fx.store_all();
5217        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5218        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
5219        assert!(stale.key.as_deref().unwrap().contains("summary"));
5220    }
5221
5222    /// The whole point of `INDEX_JSONL_STALE`: a sidecar field the query/search
5223    /// path actually reads (`email`, `domain`, the `(date,amount,vendor)` dedup
5224    /// tuple, `tags`, `updated`, `links`, `company` …) that disagrees with the
5225    /// `.md` is STALE — even when `summary` and `type` are perfectly correct.
5226    /// Pre-fix the validator only diffed summary+type, so a sidecar with a wrong
5227    /// `email` validated clean and answered `--where email=…` with a phantom
5228    /// value present in no file. This is the direct regression guard.
5229    #[test]
5230    fn index_jsonl_stale_queryable_field_email() {
5231        let fx = Fixture::new();
5232        let contact = "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"a contact\"\nname: A\nemail: real@correct.com\n---\n\n# A\n";
5233        fx.write("records/contacts/a.md", contact);
5234        // Start from the canonical, fully-correct sidecar set …
5235        fx.rebuild_indexes();
5236        let jsonl_path = fx.dir.path().join("records/contacts/index.jsonl");
5237        let good = fs::read_to_string(&jsonl_path).unwrap();
5238        // sanity: the canonical store is clean (no STALE on a fresh rebuild).
5239        assert!(
5240            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
5241            "freshly-rebuilt sidecar must not be stale"
5242        );
5243        // … then desync ONLY the email so it's the single differing field.
5244        assert!(
5245            good.contains("real@correct.com"),
5246            "sidecar projects email: {good}"
5247        );
5248        fx.write(
5249            "records/contacts/index.jsonl",
5250            &good.replace("real@correct.com", "STALE-WRONG@evil.com"),
5251        );
5252
5253        let issues = fx.store_all();
5254        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5255        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
5256        // The mismatch is reported precisely on `email`, and summary/type — which
5257        // still match — are NOT named.
5258        let key = stale.key.as_deref().unwrap();
5259        assert!(
5260            key.contains("email"),
5261            "expected `email` in stale key, got {key:?}"
5262        );
5263        assert!(!key.contains("summary"), "summary still matches: {key:?}");
5264        assert!(!key.contains("type"), "type still matches: {key:?}");
5265    }
5266
5267    /// Broaden the guard across the typed/list/timestamp projections at once:
5268    /// a wrong `tags`, `updated`, and a custom dedup field (`amount`) are each
5269    /// caught, with all three named in one issue.
5270    #[test]
5271    fn index_jsonl_stale_typed_and_list_fields() {
5272        let fx = Fixture::new();
5273        let expense = "---\ntype: expense\ncreated: 2026-05-20T08:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"office chairs\"\ntags: [furniture, q2]\namount: 1299\nvendor: Acme\ndate: 2026-05-20\n---\n\n# Expense\n";
5274        fx.write("records/expenses/e.md", expense);
5275        fx.rebuild_indexes();
5276        let jsonl_path = fx.dir.path().join("records/expenses/index.jsonl");
5277        let good = fs::read_to_string(&jsonl_path).unwrap();
5278        assert!(
5279            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
5280            "freshly-rebuilt sidecar must not be stale"
5281        );
5282        // Desync a list field (tags), a timestamp (updated), and a number (amount).
5283        let stale_line = good
5284            .replace("\"q2\"", "\"WRONG-TAG\"")
5285            .replace("2026-05-22T10:00:00-07:00", "2099-01-01T00:00:00-07:00")
5286            .replace("1299", "9999");
5287        fx.write("records/expenses/index.jsonl", &stale_line);
5288
5289        let issues = fx.store_all();
5290        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5291        let key = stale.key.as_deref().unwrap();
5292        for expected in ["amount", "tags", "updated"] {
5293            assert!(
5294                key.contains(expected),
5295                "expected `{expected}` in stale key, got {key:?}"
5296            );
5297        }
5298    }
5299
5300    #[test]
5301    fn index_orphan_in_noncanonical_folder() {
5302        let fx = Fixture::new();
5303        fx.write("records/contacts/a.md", &valid_contact("a"));
5304        // Build the canonical indexes so they aren't reported as orphans.
5305        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5306        fx.write(
5307            "records/index.md",
5308            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5309        );
5310        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
5311        fx.write(
5312            "records/contacts/index.jsonl",
5313            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5314        );
5315        // An index.md inside a sub-sub-folder (operator territory) is an orphan.
5316        fx.write(
5317            "records/contacts/subfolder/index.md",
5318            "---\ntype: index\nscope: type-folder\n---\n\n# stray\n",
5319        );
5320        let issues = fx.store_all();
5321        let orphan = find(&issues, codes::INDEX_ORPHAN);
5322        assert_eq!(orphan.severity, Severity::Warning);
5323        assert_eq!(
5324            orphan.file,
5325            PathBuf::from("records/contacts/subfolder/index.md")
5326        );
5327    }
5328
5329    #[test]
5330    fn index_wrong_scope() {
5331        let fx = Fixture::new();
5332        fx.write("records/contacts/a.md", &valid_contact("a"));
5333        // Root index declares the wrong scope.
5334        fx.write("index.md", "---\ntype: index\nscope: layer\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5335        fx.write(
5336            "records/index.md",
5337            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5338        );
5339        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
5340        fx.write(
5341            "records/contacts/index.jsonl",
5342            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5343        );
5344        let issues = fx.store_all();
5345        let issue = find(&issues, codes::INDEX_WRONG_SCOPE);
5346        assert_eq!(issue.severity, Severity::Warning);
5347        assert_eq!(issue.file, PathBuf::from("index.md"));
5348    }
5349
5350    #[test]
5351    fn capped_type_folder_index_does_not_flag_missing_entries() {
5352        // Over the 500-entry cap, omitted entries are expected, not an error.
5353        let fx = Fixture::new();
5354        for i in 0..501 {
5355            fx.write(
5356                &format!("records/contacts/c{i:04}.md"),
5357                &valid_contact(&format!("contact {i}")),
5358            );
5359        }
5360        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (501 files)\n");
5361        fx.write(
5362            "records/index.md",
5363            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5364        );
5365        // Type-folder index lists only ONE entry + a More footer.
5366        fx.write(
5367            "records/contacts/index.md",
5368            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/c0000]] — contact 0\n\n## More\n\nThis folder has 501 files.\n",
5369        );
5370        // jsonl must still be complete — write all 501 lines.
5371        let mut jsonl = String::new();
5372        for i in 0..501 {
5373            jsonl.push_str(&format!(
5374                "{{\"path\":\"records/contacts/c{i:04}.md\",\"type\":\"contact\",\"summary\":\"contact {i}\"}}\n"
5375            ));
5376        }
5377        fx.write("records/contacts/index.jsonl", &jsonl);
5378        let issues = fx.store_all();
5379        assert!(
5380            !has(&issues, codes::INDEX_MISSING_ENTRY),
5381            "over the cap, missing browse entries are expected: {issues:#?}"
5382        );
5383        // But the jsonl is complete → no desync.
5384        assert!(
5385            !has(&issues, codes::INDEX_JSONL_DESYNC),
5386            "{:#?}",
5387            issues
5388                .iter()
5389                .filter(|i| i.code == codes::INDEX_JSONL_DESYNC)
5390                .collect::<Vec<_>>()
5391        );
5392    }
5393
5394    // ── log ────────────────────────────────────────────────────────────────
5395
5396    #[test]
5397    fn log_bad_timestamp_unknown_kind_out_of_order() {
5398        let fx = Fixture::new();
5399        fx.write(
5400            "log.md",
5401            concat!(
5402                "---\ntype: log\n---\n\n# Log\n\n",
5403                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5404                "## [2026-05-27 09:00] update | records/contacts/b\nx\n\n", // out of order
5405                "## [2026-05-27 11:00] frobnicate | records/contacts/c\nx\n\n", // unknown kind
5406                "## [not-a-date] create | records/contacts/d\nx\n",         // bad timestamp
5407            ),
5408        );
5409        let issues = fx.store_all();
5410        assert!(has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
5411        assert_eq!(
5412            find(&issues, codes::LOG_OUT_OF_ORDER).severity,
5413            Severity::Warning
5414        );
5415        let unknown = find(&issues, codes::LOG_UNKNOWN_KIND);
5416        assert_eq!(unknown.severity, Severity::Warning);
5417        assert!(unknown.message.contains("frobnicate"));
5418        assert!(unknown
5419            .suggestion
5420            .as_deref()
5421            .is_some_and(|s| s.contains("create")));
5422        let bad = find(&issues, codes::LOG_BAD_TIMESTAMP);
5423        assert!(bad.is_error());
5424    }
5425
5426    #[test]
5427    fn log_validate_entry_without_object_is_well_formed() {
5428        let fx = Fixture::new();
5429        fx.write(
5430            "log.md",
5431            "---\ntype: log\n---\n\n## [2026-05-27 10:00] validate\nPASS\n",
5432        );
5433        let issues = fx.store_all();
5434        assert!(!has(&issues, codes::LOG_BAD_TIMESTAMP), "{issues:#?}");
5435        assert!(!has(&issues, codes::LOG_UNKNOWN_KIND), "{issues:#?}");
5436    }
5437
5438    #[test]
5439    fn log_in_order_is_clean() {
5440        let fx = Fixture::new();
5441        fx.write(
5442            "log.md",
5443            concat!(
5444                "---\ntype: log\n---\n\n",
5445                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5446                "## [2026-05-27 10:05] update | records/contacts/a\nx\n",
5447            ),
5448        );
5449        let issues = fx.store_all();
5450        assert!(!has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
5451    }
5452
5453    #[test]
5454    fn log_not_checked_in_working_set() {
5455        // log.md ordering is an --all-only check.
5456        let fx = Fixture::new();
5457        fx.write(
5458            "log.md",
5459            concat!(
5460                "---\ntype: log\n---\n\n",
5461                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5462                "## [2026-05-27 09:00] update | records/contacts/a\nx\n",
5463            ),
5464        );
5465        let issues = validate_working_set(&fx.store(), None).unwrap();
5466        assert!(
5467            !has(&issues, codes::LOG_OUT_OF_ORDER),
5468            "log ordering is --all only: {issues:#?}"
5469        );
5470    }
5471
5472    // ── working-set scoping ───────────────────────────────────────────────────
5473
5474    #[test]
5475    fn working_set_validates_only_changed_files() {
5476        let fx = Fixture::new();
5477        // `dirty` has a bad timestamp; `clean_but_unlogged` also does but is NOT
5478        // in the log → working set must skip it.
5479        fx.write(
5480            "records/contacts/dirty.md",
5481            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5482        );
5483        fx.write(
5484            "records/contacts/unlogged.md",
5485            "---\ntype: contact\ncreated: ALSO-BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5486        );
5487        fx.write(
5488            "log.md",
5489            "---\ntype: log\n---\n\n## [2026-05-22 10:00] update | records/contacts/dirty\nedited\n",
5490        );
5491        let issues = validate_working_set(&fx.store(), None).unwrap();
5492        assert!(
5493            issues.iter().any(|i| i.code == codes::FM_BAD_TIMESTAMP
5494                && i.file == Path::new("records/contacts/dirty.md")),
5495            "{issues:#?}"
5496        );
5497        assert!(
5498            !issues
5499                .iter()
5500                .any(|i| i.file == Path::new("records/contacts/unlogged.md")),
5501            "unlogged file must not be in the working set: {issues:#?}"
5502        );
5503    }
5504
5505    #[test]
5506    fn working_set_includes_incoming_linkers_to_changed_path() {
5507        let fx = Fixture::new();
5508        // `changed` was renamed/removed (logged). `linker` points at it with a
5509        // now-broken link and was NOT itself logged — but must be pulled in.
5510        fx.write(
5511            "wiki/people/linker.md",
5512            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: links to a removed page\n---\n\nSee [[records/contacts/changed]].\n",
5513        );
5514        // `changed.md` does NOT exist on disk (removed).
5515        fx.write(
5516            "log.md",
5517            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/changed\nremoved\n",
5518        );
5519        let issues = validate_working_set(&fx.store(), None).unwrap();
5520        assert!(
5521            issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN
5522                && i.file == Path::new("wiki/people/linker.md")),
5523            "incoming linker to a removed path must be validated: {issues:#?}"
5524        );
5525    }
5526
5527    #[test]
5528    fn working_set_respects_explicit_since_cutoff() {
5529        let fx = Fixture::new();
5530        fx.write(
5531            "records/contacts/old.md",
5532            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5533        );
5534        fx.write(
5535            "records/contacts/new.md",
5536            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5537        );
5538        fx.write(
5539            "log.md",
5540            concat!(
5541                "---\ntype: log\n---\n\n",
5542                "## [2026-05-20 10:00] update | records/contacts/old\nx\n\n",
5543                "## [2026-05-25 10:00] update | records/contacts/new\nx\n",
5544            ),
5545        );
5546        // Cutoff after `old` but before `new`.
5547        let since = DateTime::parse_from_rfc3339("2026-05-22T00:00:00+00:00").unwrap();
5548        let issues = validate_working_set(&fx.store(), Some(since)).unwrap();
5549        assert!(
5550            issues
5551                .iter()
5552                .any(|i| i.file == Path::new("records/contacts/new.md")),
5553            "{issues:#?}"
5554        );
5555        assert!(
5556            !issues
5557                .iter()
5558                .any(|i| i.file == Path::new("records/contacts/old.md")),
5559            "old change is before the cutoff: {issues:#?}"
5560        );
5561    }
5562
5563    #[test]
5564    fn working_set_default_since_is_last_validate_entry() {
5565        let fx = Fixture::new();
5566        // `before` changed before the last validate; `after` changed after.
5567        fx.write(
5568            "records/contacts/before.md",
5569            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5570        );
5571        fx.write(
5572            "records/contacts/after.md",
5573            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5574        );
5575        fx.write(
5576            "log.md",
5577            concat!(
5578                "---\ntype: log\n---\n\n",
5579                "## [2026-05-20 10:00] update | records/contacts/before\nx\n\n",
5580                "## [2026-05-21 10:00] validate\nPASS\n\n",
5581                "## [2026-05-22 10:00] update | records/contacts/after\nx\n",
5582            ),
5583        );
5584        let issues = validate_working_set(&fx.store(), None).unwrap();
5585        assert!(
5586            issues
5587                .iter()
5588                .any(|i| i.file == Path::new("records/contacts/after.md")),
5589            "{issues:#?}"
5590        );
5591        assert!(
5592            !issues
5593                .iter()
5594                .any(|i| i.file == Path::new("records/contacts/before.md")),
5595            "change before the last validate entry is outside the default window: {issues:#?}"
5596        );
5597    }
5598
5599    // ── ordering / determinism ────────────────────────────────────────────────
5600
5601    #[test]
5602    fn issues_are_sorted_by_file_then_line() {
5603        let fx = Fixture::new();
5604        fx.write("wiki/people/z.md", "---\ntype: wiki-page\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
5605        fx.write("wiki/people/a.md", "---\ntype: wiki-page\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
5606        let issues = fx.store_all();
5607        let files: Vec<&PathBuf> = issues.iter().map(|i| &i.file).collect();
5608        let mut sorted = files.clone();
5609        sorted.sort();
5610        assert_eq!(
5611            files, sorted,
5612            "issues must be emitted in a stable file order"
5613        );
5614    }
5615
5616    // ── boundaries: codes validate must NOT emit ──────────────────────────────
5617
5618    #[test]
5619    fn frozen_page_is_not_a_validate_error() {
5620        // POLICY_FROZEN_PAGE is a *write-time* refusal, never a validate finding.
5621        // A clean file listed in `### Frozen pages` must validate clean.
5622        let mut fx = Fixture::new();
5623        fx.config
5624            .frozen_pages
5625            .push(PathBuf::from("records/decisions/d.md"));
5626        fx.write(
5627            "records/decisions/d.md",
5628            "---\ntype: decision\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a finalized decision\n---\n\n# D\n",
5629        );
5630        let issues = fx.store_all();
5631        assert!(
5632            !has(&issues, codes::POLICY_FROZEN_PAGE),
5633            "frozen pages are enforced at write-time, not by validate: {issues:#?}"
5634        );
5635    }
5636
5637    #[test]
5638    fn wiki_link_ambiguous_is_never_emitted_under_full_path_doctrine() {
5639        // The full-path doctrine makes ambiguity impossible; the defensive code
5640        // must never fire on a normal store.
5641        let fx = Fixture::new();
5642        fx.write("records/contacts/sarah-chen.md", &valid_contact("sarah"));
5643        let mut body = valid_contact("links to sarah");
5644        body.push_str("\nSee [[records/contacts/sarah-chen]].\n");
5645        fx.write("wiki/people/p.md", &body);
5646        let issues = fx.store_all();
5647        assert!(!has(&issues, codes::WIKI_LINK_AMBIGUOUS), "{issues:#?}");
5648    }
5649
5650    // ── unknown-type / unknown-field passthrough ──────────────────────────────
5651
5652    #[test]
5653    fn unknown_type_passes_through() {
5654        // A custom type is ambient context: it has a `type`, so no
5655        // FM_MISSING_TYPE, and with no matching schema there are no schema
5656        // errors. Only the universal contract (summary, timestamps) applies.
5657        let fx = Fixture::new();
5658        fx.write(
5659            "records/proposals/x.md",
5660            "---\ntype: proposal\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a proposal\ncustom_field: anything\nbudget: 5000\n---\n\n# Proposal\n",
5661        );
5662        let issues = fx.store_all();
5663        assert!(!has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
5664        assert!(!has(&issues, codes::SCHEMA_MISSING_REQUIRED), "{issues:#?}");
5665        assert!(!has(&issues, codes::SCHEMA_SHAPE_MISMATCH), "{issues:#?}");
5666        // The unknown fields don't trip anything.
5667        assert!(
5668            !issues
5669                .iter()
5670                .any(|i| i.key.as_deref() == Some("custom_field")
5671                    || i.key.as_deref() == Some("budget")),
5672            "unknown fields are ambient context: {issues:#?}"
5673        );
5674    }
5675
5676    // ── find_links_to prefix-collision safety (working set) ───────────────────
5677
5678    #[test]
5679    fn incoming_linker_scan_does_not_prefix_match() {
5680        // A changed `records/contacts/sarah` must NOT pull in a file that only
5681        // links to `records/contacts/sarah-chen` (a longer path sharing a prefix).
5682        let fx = Fixture::new();
5683        fx.write(
5684            "wiki/people/only-sarah-chen.md",
5685            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
5686        );
5687        // The log says `records/contacts/sarah` (the shorter path) changed.
5688        fx.write(
5689            "log.md",
5690            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah\nremoved\n",
5691        );
5692        let issues = validate_working_set(&fx.store(), None).unwrap();
5693        assert!(
5694            !issues
5695                .iter()
5696                .any(|i| i.file == Path::new("wiki/people/only-sarah-chen.md")),
5697            "a prefix-sharing link must not pull a file into the working set: {issues:#?}"
5698        );
5699    }
5700
5701    #[test]
5702    fn incoming_linker_scan_pulls_in_catalog_index_md() {
5703        // CONTRACT: the working-set incoming-linker scan rides the embedded-
5704        // ripgrep `Store::find_links_to`, which scans EVERY `.md` (including
5705        // `index.md` catalogs) — NOT the walk-and-read over `walk_content_files`,
5706        // which excludes `index.md`. A type-folder `index.md` that lists a now-
5707        // deleted target must be pulled into the working set so its dangling
5708        // catalog entry is flagged `WIKI_LINK_BROKEN`. The old walk-and-read
5709        // implementation skipped `index.md` and let this broken link survive the
5710        // loop silently; this test fails if anyone reverts to that path.
5711        let fx = Fixture::new();
5712        // A catalog that still lists the deleted contact (a real, common stale
5713        // state after a `delete`). No other file references the target, so the
5714        // catalog is the ONLY incoming linker — if it isn't scanned, nothing is.
5715        fx.write(
5716            "records/contacts/index.md",
5717            "---\ntype: index\n---\n\n- [[records/contacts/sarah-chen]] — Sarah Chen\n",
5718        );
5719        // The log says `records/contacts/sarah-chen` was deleted.
5720        fx.write(
5721            "log.md",
5722            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n",
5723        );
5724        let issues = validate_working_set(&fx.store(), None).unwrap();
5725        assert!(
5726            issues
5727                .iter()
5728                .any(|i| i.file == Path::new("records/contacts/index.md")
5729                    && i.code == codes::WIKI_LINK_BROKEN),
5730            "the catalog `index.md` linking to the deleted target must be pulled \
5731             into the working set and flagged WIKI_LINK_BROKEN (proves the scan \
5732             uses embedded-ripgrep `Store::find_links_to`, not the index-skipping \
5733             walk-and-read): {issues:#?}"
5734        );
5735    }
5736
5737    #[test]
5738    fn incoming_linker_scan_covers_the_whole_changed_set_in_one_pass() {
5739        // CONTRACT (the O(changed × store) fix): the working-set scan finds
5740        // incoming linkers for EVERY changed object, and does so via the single
5741        // batch pass `Store::find_links_to_any` — not one full store read per
5742        // changed object. This test pins the behavior that makes the single-pass
5743        // correct: with two DISTINCT deleted targets, the linker to EACH is pulled
5744        // into the working set and flagged. A regression that scanned for only the
5745        // first/last changed object, or that dropped the batch union, would leave
5746        // one of the two broken links unreported and fail here.
5747        let fx = Fixture::new();
5748        // Linker A → deleted target #1 (in the body).
5749        fx.write(
5750            "wiki/people/refers-sarah.md",
5751            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
5752        );
5753        // Linker B → deleted target #2 (in a typed frontmatter field — an edge the
5754        // sidecar `links` projection would miss, which is why this must be a
5755        // content scan, not a sidecar read).
5756        fx.write(
5757            "records/meetings/2026/05/kickoff.md",
5758            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\ncompany: \"[[records/companies/acme]]\"\n---\n\n# Kickoff\n",
5759        );
5760        // The log says BOTH targets were deleted in this window.
5761        fx.write(
5762            "log.md",
5763            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n\n## [2026-05-22 10:05] delete | records/companies/acme\nremoved\n",
5764        );
5765
5766        let issues = validate_working_set(&fx.store(), None).unwrap();
5767        assert!(
5768            issues
5769                .iter()
5770                .any(|i| i.file == Path::new("wiki/people/refers-sarah.md")
5771                    && i.code == codes::WIKI_LINK_BROKEN),
5772            "linker to the FIRST deleted target must be pulled in and flagged: {issues:#?}"
5773        );
5774        assert!(
5775            issues.iter().any(
5776                |i| i.file == Path::new("records/meetings/2026/05/kickoff.md")
5777                    && i.code == codes::WIKI_LINK_BROKEN
5778            ),
5779            "linker to the SECOND deleted target (typed-field edge) must also be \
5780             pulled in and flagged — proves the scan covers the whole changed set, \
5781             not just one object: {issues:#?}"
5782        );
5783    }
5784
5785    #[test]
5786    fn frontmatter_block_sequence_links_each_get_their_own_line() {
5787        // Each block-sequence wiki-link reports on its own source line.
5788        let fx = Fixture::new();
5789        // Neither target exists → two WIKI_LINK_BROKEN, on different lines.
5790        fx.write(
5791            "records/meetings/m.md",
5792            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nparticipants:\n  - [[records/contacts/ghost1]]\n  - [[records/contacts/ghost2]]\n---\n\n# M\n",
5793        );
5794        let issues = fx.store_all();
5795        let broken_lines: BTreeSet<Option<u32>> = issues
5796            .iter()
5797            .filter(|i| i.code == codes::WIKI_LINK_BROKEN)
5798            .map(|i| i.line)
5799            .collect();
5800        assert_eq!(
5801            broken_lines.len(),
5802            2,
5803            "two distinct broken-link lines: {issues:#?}"
5804        );
5805    }
5806
5807    // ── Regression: null / non-scalar created/updated ────────────────────────
5808
5809    #[test]
5810    fn null_created_is_missing_not_silently_passed() {
5811        // Regression: a present-but-`null` `created:` previously slipped past
5812        // both FM_MISSING_CREATED (only `!contains_key` was checked) and
5813        // FM_BAD_TIMESTAMP (`scalar_string(null)` is None → branch no-oped).
5814        let fx = Fixture::new();
5815        fx.write(
5816            "records/contacts/a.md",
5817            "---\ntype: contact\ncreated:\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5818        );
5819        let issues = fx.store_all();
5820        assert!(
5821            has(&issues, codes::FM_MISSING_CREATED),
5822            "null `created:` must read as missing: {issues:#?}"
5823        );
5824    }
5825
5826    #[test]
5827    fn sequence_created_is_bad_timestamp() {
5828        // A non-scalar `created: [2026]` is not a timestamp string → FM_BAD_TIMESTAMP.
5829        let fx = Fixture::new();
5830        fx.write(
5831            "records/contacts/a.md",
5832            "---\ntype: contact\ncreated: [2026]\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5833        );
5834        let issues = fx.store_all();
5835        assert!(
5836            issues
5837                .iter()
5838                .any(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created")),
5839            "a sequence `created:` must be FM_BAD_TIMESTAMP: {issues:#?}"
5840        );
5841    }
5842
5843    // ── Regression: schema required null / empty-collection ──────────────────
5844
5845    #[test]
5846    fn required_field_null_or_empty_collection_is_missing() {
5847        // Regression: a plain required field (no shape/enum) holding YAML null
5848        // (`name:`), an empty list (`name: []`), or an empty mapping (`name: {}`)
5849        // previously validated with 0 issues — `scalar_string` returned None and
5850        // `.unwrap_or(false)` treated the value as non-empty.
5851        for value in ["", " []", " {}"] {
5852            let mut fx = Fixture::new();
5853            fx.config.schemas.insert(
5854                "contact".into(),
5855                Schema {
5856                    fields: vec![FieldSpec {
5857                        name: "name".into(),
5858                        required: true,
5859                        ..Default::default()
5860                    }],
5861                    ..Default::default()
5862                },
5863            );
5864            fx.write(
5865                "records/contacts/a.md",
5866                &format!(
5867                    "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname:{value}\n---\n\n# A\n"
5868                ),
5869            );
5870            let issues = fx.store_all();
5871            assert!(
5872                issues
5873                    .iter()
5874                    .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
5875                        && i.key.as_deref() == Some("name")),
5876                "required `name:{value}` must be SCHEMA_MISSING_REQUIRED: {issues:#?}"
5877            );
5878        }
5879    }
5880
5881    // ── Regression: WIKI_LINK_BROKEN on raw source files ─────────────────────
5882
5883    #[test]
5884    fn wiki_link_to_raw_source_file_resolves() {
5885        // Regression: a body link to a raw `.eml`/`.pdf` source kept verbatim
5886        // under `sources/` was flagged WIKI_LINK_BROKEN because the existence
5887        // probe only ever stat'd `{bare}.md`. It must resolve the literal path.
5888        let fx = Fixture::new();
5889        fx.write("sources/emails/2026-05-22-elena.eml", "raw email bytes\n");
5890        fx.write(
5891            "records/contacts/a.md",
5892            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\nSee [[sources/emails/2026-05-22-elena.eml]] for context.\n",
5893        );
5894        let issues = fx.store_all();
5895        assert!(
5896            !issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN),
5897            "a link to an existing raw source file must not be broken: {issues:#?}"
5898        );
5899    }
5900
5901    // ── Regression: unreadable (non-UTF-8) content file ──────────────────────
5902
5903    #[test]
5904    fn non_utf8_content_file_is_reported() {
5905        // Regression: a content file with invalid UTF-8 bytes made
5906        // check_content_file return None silently, so the store passed with exit
5907        // 0. It must surface FM_UNREADABLE instead of passing vacuously.
5908        let fx = Fixture::new();
5909        let abs = fx.dir.path().join("records/notes/corrupt.md");
5910        fs::create_dir_all(abs.parent().unwrap()).unwrap();
5911        fs::write(&abs, [0xFF, 0xFE, 0x00, 0x01]).unwrap();
5912        let issues = validate_working_set(&fx.store(), None).unwrap();
5913        assert!(
5914            has(&issues, codes::FM_UNREADABLE),
5915            "an unreadable content file must be reported, not silently skipped: {issues:#?}"
5916        );
5917    }
5918
5919    // ── Regression: code-fence char/run tracking ─────────────────────────────
5920
5921    #[test]
5922    fn tilde_fence_containing_backtick_fence_does_not_invert() {
5923        // Regression: a `~~~` block legally contains ``` lines (documenting a
5924        // backtick fence); a naive toggle inverted `in_fence` and checked the
5925        // demo `[[fake]]` inside the code block as a live link. The link inside
5926        // BOTH fences must be skipped.
5927        let body = "~~~markdown\n```\n[[fake-link]]\n```\n~~~\n";
5928        let links = extract_wiki_links(body);
5929        assert!(
5930            links.is_empty(),
5931            "wiki-link inside a nested code fence must be skipped: {links:?}"
5932        );
5933    }
5934
5935    // ── Regression: --all skips in-layer `log/` folder ───────────────────────
5936
5937    #[test]
5938    fn all_sweep_visits_in_layer_log_folder() {
5939        // Regression: `validate --all` pruned every dir named `log`, so a real
5940        // content folder like `records/log/` was invisible to the full sweep —
5941        // reporting FEWER errors than the default scope. A frontmatter-less file
5942        // there must still surface FM_MISSING_TYPE under --all.
5943        let fx = Fixture::new();
5944        fx.write("records/log/2026-06-01-pricing.md", "no frontmatter here\n");
5945        let issues = fx.store_all();
5946        assert!(
5947            has(&issues, codes::FM_MISSING_TYPE),
5948            "--all must validate files under an in-layer `log/` folder: {issues:#?}"
5949        );
5950    }
5951
5952    // ── Regression: flow-form list with whitespace ───────────────────────────
5953
5954    #[test]
5955    fn flow_form_link_list_with_spaces_is_flagged() {
5956        // Regression: `attendees: [ [[a]] ]` parses to the same nested-sequence
5957        // mis-encoding as `[[[a]]]` but evaded the literal `starts_with("[[[")`
5958        // text test. The value-based detector must catch the whitespace variant.
5959        let keys = detect_flow_form_link_lists("attendees: [ [[records/contacts/elena]] ]\n");
5960        assert!(
5961            keys.iter().any(|k| k == "attendees"),
5962            "spaced flow-form list must be detected: {keys:?}"
5963        );
5964    }
5965
5966    // ── Regression: INDEX_SUMMARY_MISMATCH middot tail ───────────────────────
5967
5968    #[test]
5969    fn middot_hashtag_summary_tail_round_trips() {
5970        // Regression: a tagless summary that legitimately ends in a single-spaced
5971        // ` · #word` tail round-trips through the renderer verbatim, but the loose
5972        // ` · ` strip mistook it for the tag block and reported a spurious,
5973        // unfixable INDEX_SUMMARY_MISMATCH. The strip must use the renderer's
5974        // exact double-spaced `  ·  ` delimiter.
5975        assert_eq!(
5976            extract_index_entry_summary("— Standup notes · #standup").as_deref(),
5977            Some("Standup notes · #standup"),
5978            "a single-spaced middot tail is part of the summary, not a tag block"
5979        );
5980        // The renderer's real double-spaced tag suffix IS still stripped.
5981        assert_eq!(
5982            extract_index_entry_summary("— Renewal champion  ·  #renewal #acme").as_deref(),
5983            Some("Renewal champion"),
5984            "the renderer's double-spaced `  ·  #tag` suffix is stripped"
5985        );
5986    }
5987
5988    // ── Regression: shape Url / Email edge cases ─────────────────────────────
5989
5990    #[test]
5991    fn url_shape_accepts_short_http_and_rejects_bare_scheme() {
5992        assert!(is_url("http://x"), "an 8-char http URL is valid");
5993        assert!(is_url("https://x"), "a 9-char https URL is valid");
5994        assert!(!is_url("http://"), "a bare scheme with no host is rejected");
5995        assert!(!is_url("https://"), "a bare https scheme is rejected");
5996    }
5997
5998    #[test]
5999    fn email_shape_rejects_double_at() {
6000        assert!(!is_email("sarah@@acme.com"), "double-@ domain is rejected");
6001        assert!(!is_email("a@b@c.com"), "two @ signs are rejected");
6002        assert!(is_email("sarah@acme.com"), "a normal address still passes");
6003    }
6004
6005    // ── Regression: working-set vs --all agree on log.md links ───────────────
6006
6007    #[test]
6008    fn working_set_does_not_flag_log_md_body_links() {
6009        // Regression: the working-set incoming-linker scan runs root `log.md`
6010        // through the body wiki-link check, flagging a historical `[[deleted]]`
6011        // mention as WIKI_LINK_BROKEN — an error `--all` never reports and that
6012        // the append-only log can't have "fixed". The root meta files must be
6013        // excluded from the body link check, matching --all.
6014        let fx = Fixture::new();
6015        fx.write("records/contacts/a.md", &valid_contact("A"));
6016        fx.write(
6017            "log.md",
6018            "---\ntype: log\n---\n\n## [2026-06-01 10:00] delete | records/contacts/ghost\n\nRemoved [[records/contacts/ghost]] per cleanup.\n",
6019        );
6020        let issues = validate_working_set(&fx.store(), None).unwrap();
6021        assert!(
6022            !issues
6023                .iter()
6024                .any(|i| i.code == codes::WIKI_LINK_BROKEN
6025                    && i.file == std::path::Path::new("log.md")),
6026            "a broken wiki-link inside append-only log.md must not be flagged: {issues:#?}"
6027        );
6028    }
6029
6030    // ── Regression: DB.md schema field lint ──────────────────────────────────
6031
6032    #[test]
6033    fn schema_duplicate_field_name_is_flagged() {
6034        let mut fx = Fixture::new();
6035        fx.config.schemas.insert(
6036            "contact".into(),
6037            Schema {
6038                fields: vec![
6039                    FieldSpec {
6040                        name: "name".into(),
6041                        required: true,
6042                        ..Default::default()
6043                    },
6044                    FieldSpec {
6045                        name: "name".into(),
6046                        ..Default::default()
6047                    },
6048                ],
6049                ..Default::default()
6050            },
6051        );
6052        let issues = fx.store_all();
6053        assert!(
6054            issues
6055                .iter()
6056                .any(|i| i.code == codes::DB_MD_SCHEMA_FIELD && i.key.as_deref() == Some("name")),
6057            "a duplicate schema field name must be flagged: {issues:#?}"
6058        );
6059    }
6060
6061    #[test]
6062    fn schema_unknown_modifier_is_info() {
6063        let mut fx = Fixture::new();
6064        fx.config.schemas.insert(
6065            "contact".into(),
6066            Schema {
6067                fields: vec![FieldSpec {
6068                    name: "name".into(),
6069                    unknown_modifiers: vec!["requierd".into()],
6070                    ..Default::default()
6071                }],
6072                ..Default::default()
6073            },
6074        );
6075        let issues = fx.store_all();
6076        assert!(
6077            issues.iter().any(|i| i.code == codes::DB_MD_SCHEMA_FIELD
6078                && i.severity == Severity::Info
6079                && i.key.as_deref() == Some("name")),
6080            "an unrecognized schema modifier must surface as Info: {issues:#?}"
6081        );
6082    }
6083
6084    /// Every code in `mod codes` must appear as a row in SPEC.md § Validation —
6085    /// the SPEC table is the declared "complete vocabulary" an agent branches on,
6086    /// and the module doc-comment promises this code implements "exactly those
6087    /// codes — no more, no fewer." This guards against the code/SPEC drift where a
6088    /// new validation code is added to the engine but never documented.
6089    #[test]
6090    fn every_code_constant_is_documented_in_spec() {
6091        // Parse the canonical constant *values* straight out of this module's
6092        // source, so a future `pub const X: &str = "X";` is covered with no test
6093        // edit. Format is uniform: `    pub const NAME: &str = "VALUE";`.
6094        let this_src = include_str!("validate.rs");
6095        let mut codes_in_module: Vec<String> = Vec::new();
6096        let mut in_codes_mod = false;
6097        for line in this_src.lines() {
6098            let t = line.trim();
6099            if t.starts_with("pub mod codes") {
6100                in_codes_mod = true;
6101                continue;
6102            }
6103            // The `mod codes` block ends at its closing brace at column 0.
6104            if in_codes_mod && line == "}" {
6105                break;
6106            }
6107            if in_codes_mod {
6108                if let Some(rest) = t.strip_prefix("pub const ") {
6109                    // rest = `NAME: &str = "VALUE";`
6110                    let value = rest
6111                        .split_once('=')
6112                        .map(|(_, v)| v.trim())
6113                        .and_then(|v| v.strip_prefix('"'))
6114                        .and_then(|v| v.strip_suffix("\";"))
6115                        .unwrap_or_else(|| panic!("unparseable code constant line: {line:?}"));
6116                    codes_in_module.push(value.to_string());
6117                }
6118            }
6119        }
6120        assert!(
6121            codes_in_module.len() >= 36,
6122            "parsed only {} code constants from `mod codes`; the parser likely \
6123             broke against a source-format change",
6124            codes_in_module.len()
6125        );
6126
6127        // SPEC.md lives at the repo root, two levels up from this crate's manifest.
6128        let spec_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../../SPEC.md");
6129        let spec = fs::read_to_string(&spec_path)
6130            .unwrap_or_else(|e| panic!("cannot read {}: {e}", spec_path.display()));
6131
6132        // Each code must appear as a SPEC § Validation table cell: `` | `CODE` | ``.
6133        let missing: Vec<&String> = codes_in_module
6134            .iter()
6135            .filter(|code| !spec.contains(&format!("| `{code}` |")))
6136            .collect();
6137        assert!(
6138            missing.is_empty(),
6139            "validation codes emitted by the engine but absent from SPEC.md \
6140             § Validation (the declared complete vocabulary): {missing:?}"
6141        );
6142    }
6143}