Skip to main content

dbmd_core/
validate.rs

1//! `validate` — the validation engine.
2//!
3//! The canonical issue-code vocabulary is **SPEC.md § Validation** (that table
4//! is the single source of truth). This module implements exactly those codes
5//! — no more, no fewer. If a code is added here it must be added to the SPEC
6//! table in the same change. The codes are exposed as the [`codes`] constants
7//! so call sites never spell a code as a bare string literal.
8//!
9//! **Two scopes.** [`validate_working_set`] is the loop default: content files
10//! changed since `since`, plus any file whose wiki-links target a changed path.
11//! The changed set and the per-file checks are O(changed); the incoming linkers
12//! are found by a *single* embedded-ripgrep pass over the store for the whole
13//! changed set at once ([`Store::find_links_to_any`], one scan — not a full read
14//! per changed object, and not the parse-the-tree walk `--all` does). On this
15//! changed-set path it never builds the global cross-file state.
16//!
17//! The **one** exception is the vacuous-pass guard: when the change log records
18//! no objects since the cutoff and no explicit `--since` was given (a fresh
19//! store, a missing/empty `log.md`, or external edits never logged), the default
20//! call falls back to a single per-file content sweep ([`Store::walk`]) so an
21//! externally edited or freshly copied store cannot pass validation vacuously.
22//! That fallback is O(store) by design; the O(changed) guarantee is about the
23//! normal post-write path, not this safety net.
24//!
25//! [`validate_all`] is the full SWEEP: it adds the checks that need the global
26//! cross-file state — entity-dedup `DUP_*`, every-index sync, and `log.md`
27//! ordering.
28//!
29//! ## Why this module is self-contained
30//!
31//! Validation does its own frontmatter split, YAML parse, wiki-link scan,
32//! log-header parse, and file walk here, reading only the two public,
33//! caller-populated fields of a [`Store`]: [`Store::root`] and
34//! [`Store::config`] — rather than routing through the sibling modules
35//! ([`crate::parser`], [`crate::store`], [`crate::log`], [`crate::index`]).
36//! Keeping the checks local lets the validator report precise, per-issue
37//! diagnostics (exact codes, file, and context) without coupling its output to
38//! incidental behavior of the shared readers; the public surface and the
39//! emitted issue vocabulary are the contract.
40
41use std::collections::{BTreeMap, BTreeSet, HashMap};
42use std::path::{Component, Path, PathBuf};
43
44use chrono::{DateTime, FixedOffset, NaiveDateTime};
45use serde_norway::Value;
46
47use crate::parser::{Schema, Shape};
48use crate::store::Store;
49
50/// Severity of a validation [`Issue`]. Any [`Severity::Error`] fails validation
51/// (non-zero exit); warnings and info do not.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum Severity {
54    /// Blocks: a hard violation of the format or doctrine.
55    Error,
56    /// A decision point the agent resolves at its discretion.
57    Warning,
58    /// Visibility only; never affects exit status.
59    Info,
60}
61
62/// A single structured validation finding. Agent-primary and machine-parseable
63/// via `--json`; `suggestion` is a deterministic remediation hint the agent
64/// applies without guessing.
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub struct Issue {
67    /// The severity; only [`Severity::Error`] fails validation.
68    pub severity: Severity,
69    /// The structured code, e.g. `"WIKI_LINK_SHORT_FORM"` — one of [`codes`].
70    pub code: &'static str,
71    /// The file the issue is about.
72    pub file: PathBuf,
73    /// The 1-based line, when applicable.
74    pub line: Option<u32>,
75    /// The frontmatter key, when the issue is about a specific field.
76    pub key: Option<String>,
77    /// A human-readable message.
78    pub message: String,
79    /// A deterministic remediation hint, when one exists.
80    pub suggestion: Option<String>,
81    /// Other files involved (e.g. the duplicate partner in a collision).
82    pub related: Vec<PathBuf>,
83}
84
85impl Issue {
86    /// True if this issue fails validation (i.e. its severity is
87    /// [`Severity::Error`]).
88    pub fn is_error(&self) -> bool {
89        matches!(self.severity, Severity::Error)
90    }
91}
92
93/// The canonical validation issue codes — one constant per row of the SPEC.md
94/// § Validation table. Call sites reference these instead of bare strings so
95/// the code and the SPEC table can never silently drift.
96pub mod codes {
97    /// path has no `DB.md`; not a db.md store.
98    pub const NOT_A_STORE: &str = "NOT_A_STORE";
99    /// the store's `DB.md` is not `type: db-md`.
100    pub const DB_MD_BAD_TYPE: &str = "DB_MD_BAD_TYPE";
101    /// the store's `DB.md` frontmatter lacks `scope` or `owner`.
102    pub const DB_MD_MISSING_FIELD: &str = "DB_MD_MISSING_FIELD";
103    /// `DB.md` has an `##` section other than the three recognized ones.
104    pub const DB_MD_UNKNOWN_SECTION: &str = "DB_MD_UNKNOWN_SECTION";
105    /// a `DB.md ## Schemas` field declaration is malformed (empty or duplicate
106    /// field name) or carries an unrecognized modifier.
107    pub const DB_MD_SCHEMA_FIELD: &str = "DB_MD_SCHEMA_FIELD";
108    /// content file has no `type:`.
109    pub const FM_MISSING_TYPE: &str = "FM_MISSING_TYPE";
110    /// content file has no `created:`.
111    pub const FM_MISSING_CREATED: &str = "FM_MISSING_CREATED";
112    /// content file has no `updated:`.
113    pub const FM_MISSING_UPDATED: &str = "FM_MISSING_UPDATED";
114    /// content file can't be read (not valid UTF-8, or an I/O error).
115    pub const FM_UNREADABLE: &str = "FM_UNREADABLE";
116    /// frontmatter block isn't valid YAML.
117    pub const FM_MALFORMED_YAML: &str = "FM_MALFORMED_YAML";
118    /// `created` or `updated` isn't ISO-8601.
119    pub const FM_BAD_TIMESTAMP: &str = "FM_BAD_TIMESTAMP";
120    /// `meta-type` is present but not one of fact / operational / conclusion.
121    pub const FM_BAD_META_TYPE: &str = "FM_BAD_META_TYPE";
122    /// content file has no `summary`.
123    pub const SUMMARY_MISSING: &str = "SUMMARY_MISSING";
124    /// `summary` present but empty.
125    pub const SUMMARY_EMPTY: &str = "SUMMARY_EMPTY";
126    /// `summary` contains newlines.
127    pub const SUMMARY_MULTILINE: &str = "SUMMARY_MULTILINE";
128    /// `summary` > 200 chars.
129    pub const SUMMARY_TOO_LONG: &str = "SUMMARY_TOO_LONG";
130    /// wiki-link target isn't a full store-relative path.
131    pub const WIKI_LINK_SHORT_FORM: &str = "WIKI_LINK_SHORT_FORM";
132    /// wiki-link target file doesn't exist.
133    pub const WIKI_LINK_BROKEN: &str = "WIKI_LINK_BROKEN";
134    /// wiki-link target matches multiple files (defensive).
135    pub const WIKI_LINK_AMBIGUOUS: &str = "WIKI_LINK_AMBIGUOUS";
136    /// wiki-link target carries a `.md` extension — drop it.
137    pub const WIKI_LINK_HAS_EXTENSION: &str = "WIKI_LINK_HAS_EXTENSION";
138    /// frontmatter list uses inline `[[[a]], [[b]]]` — use block form.
139    pub const WIKI_LINK_FLOW_FORM_LIST: &str = "WIKI_LINK_FLOW_FORM_LIST";
140    /// two files declare the same explicit `id`.
141    pub const DUP_ID: &str = "DUP_ID";
142    /// two records of a type collide on a `DB.md ## Schemas` `unique:` key.
143    pub const DUP_UNIQUE_KEY: &str = "DUP_UNIQUE_KEY";
144    /// a `DB.md` schema requires a field that's absent.
145    pub const SCHEMA_MISSING_REQUIRED: &str = "SCHEMA_MISSING_REQUIRED";
146    /// a value doesn't match the schema's shape modifier.
147    pub const SCHEMA_SHAPE_MISMATCH: &str = "SCHEMA_SHAPE_MISMATCH";
148    /// a `link to <prefix>/` field has a plain or wrong-prefix value.
149    pub const SCHEMA_LINK_PREFIX_MISMATCH: &str = "SCHEMA_LINK_PREFIX_MISMATCH";
150    /// a value isn't in the schema's `enum`.
151    pub const SCHEMA_ENUM_VIOLATION: &str = "SCHEMA_ENUM_VIOLATION";
152    /// a write was attempted on a `### Frozen pages` path (write-time).
153    pub const POLICY_FROZEN_PAGE: &str = "POLICY_FROZEN_PAGE";
154    /// a file with an `### Ignored types` type exists.
155    pub const POLICY_IGNORED_TYPE_PRESENT: &str = "POLICY_IGNORED_TYPE_PRESENT";
156    /// a `meta-type: conclusion` record derives from an ignored-type record.
157    pub const POLICY_IGNORED_TYPE_DERIVED: &str = "POLICY_IGNORED_TYPE_DERIVED";
158    /// a `log.md` entry header timestamp is unparseable.
159    pub const LOG_BAD_TIMESTAMP: &str = "LOG_BAD_TIMESTAMP";
160    /// a `log.md` entry kind isn't recognized.
161    pub const LOG_UNKNOWN_KIND: &str = "LOG_UNKNOWN_KIND";
162    /// `log.md` entries aren't in non-decreasing time order (possible rewrite).
163    pub const LOG_OUT_OF_ORDER: &str = "LOG_OUT_OF_ORDER";
164    /// a non-empty canonical folder lacks `index.md`.
165    pub const INDEX_MISSING: &str = "INDEX_MISSING";
166    /// an `index.md` lists a file that no longer exists.
167    pub const INDEX_STALE_ENTRY: &str = "INDEX_STALE_ENTRY";
168    /// a file isn't listed in its folder's `index.md`.
169    pub const INDEX_MISSING_ENTRY: &str = "INDEX_MISSING_ENTRY";
170    /// an `index.md` sits in an empty / non-canonical folder.
171    pub const INDEX_ORPHAN: &str = "INDEX_ORPHAN";
172    /// an index's `scope:` doesn't match its filesystem location.
173    pub const INDEX_WRONG_SCOPE: &str = "INDEX_WRONG_SCOPE";
174    /// an index entry's text doesn't match the target file's `summary`.
175    pub const INDEX_SUMMARY_MISMATCH: &str = "INDEX_SUMMARY_MISMATCH";
176    /// a type-folder's `index.jsonl` twin is missing.
177    pub const INDEX_JSONL_MISSING: &str = "INDEX_JSONL_MISSING";
178    /// a file isn't in the `index.jsonl`, or a jsonl record points at a missing
179    /// file.
180    pub const INDEX_JSONL_DESYNC: &str = "INDEX_JSONL_DESYNC";
181    /// a `index.jsonl` record's fields don't match the file's frontmatter.
182    pub const INDEX_JSONL_STALE: &str = "INDEX_JSONL_STALE";
183    /// `tags` isn't a flat YAML list of short scalar labels.
184    pub const TAGS_MALFORMED: &str = "TAGS_MALFORMED";
185    /// a line in `assets.jsonl` is not a valid asset record.
186    pub const ASSET_MANIFEST_MALFORMED: &str = "ASSET_MANIFEST_MALFORMED";
187    /// a content file references an `asset`/`assets` path with no record in
188    /// `assets.jsonl` (run `dbmd assets scan`).
189    pub const ASSET_UNDECLARED: &str = "ASSET_UNDECLARED";
190    /// an `assets.jsonl` record names a wrapper file that does not exist.
191    pub const ASSET_WRAPPER_BROKEN: &str = "ASSET_WRAPPER_BROKEN";
192    /// an `assets.jsonl` record's path is referenced by no wrapper.
193    pub const ASSET_MANIFEST_ORPHAN: &str = "ASSET_MANIFEST_ORPHAN";
194    /// an `asset`/`assets` path points at a tracked markdown content file.
195    pub const ASSET_PATH_IS_CONTENT: &str = "ASSET_PATH_IS_CONTENT";
196}
197
198/// The SPEC's `summary` length bound (chars). Over it → `SUMMARY_TOO_LONG`.
199const MAX_SUMMARY_LEN: usize = 200;
200
201/// Recognized `log.md` entry kinds (SPEC § `log.md`). Anything else →
202/// `LOG_UNKNOWN_KIND` (warning, not error).
203const RECOGNIZED_LOG_KINDS: &[&str] = &[
204    "ingest",
205    "create",
206    "update",
207    "delete",
208    "rename",
209    "link",
210    "validate",
211    "index-rebuild",
212    "contradiction",
213];
214
215// ─────────────────────────────────────────────────────────────────────────────
216//  Public entrypoints
217// ─────────────────────────────────────────────────────────────────────────────
218
219/// **Loop default.** Validate the working set: content files changed since
220/// `since` (default: the last `validate` entry in `log.md`), plus any file whose
221/// wiki-links target a changed/renamed/removed path. Per-file *checks* only —
222/// none of the cross-file global passes (entity-dedup, every-index sync,
223/// `log.md` ordering) that `--all` adds. If the default call finds no logged
224/// changed objects, it falls back to a per-file content sweep so an externally
225/// edited or freshly copied store cannot pass vacuously.
226///
227/// **Cost.** The changed set is read from `log.md` — O(changed): every
228/// `create`/`update`/`ingest`/`rename`/`delete`/`link` entry newer than the
229/// cutoff names an object. Per-file frontmatter + link-doctrine checks then run
230/// over that set plus its incoming linkers — also O(changed). The one part that
231/// is *not* O(changed) is discovering those incoming linkers: a link to a
232/// changed path can live in the body or a typed frontmatter field of any file,
233/// so it is found by a **single** embedded-ripgrep pass over the store
234/// ([`Store::find_links_to_any`]) for the whole changed set at once — one store
235/// scan, flat in the changed-set size. (It was previously a full store read
236/// *per* changed object — `O(changed × store)`; that is the blow-up this path
237/// no longer pays.) The unavoidable single content scan is the same shape as
238/// free-text `dbmd search`; the sidecar `links` projection can't replace it
239/// because it omits body/typed-field edges.
240pub fn validate_working_set(
241    store: &Store,
242    since: Option<DateTime<FixedOffset>>,
243) -> crate::Result<Vec<Issue>> {
244    if !store_marker_present(store) {
245        return Ok(vec![not_a_store_issue(store)]);
246    }
247
248    let cutoff = match since {
249        Some(ts) => Some(ts),
250        None => last_validate_at(store),
251    };
252
253    // 1. Changed objects, straight from the log (O(changed) — never a walk).
254    let changed = changed_objects_since(store, cutoff);
255    if changed.is_empty() && since.is_none() {
256        return validate_content_sweep(store);
257    }
258
259    // 2. Add every file with an incoming wiki-link to a changed/renamed/removed
260    //    path (the linker may now be stale even though it didn't change). The
261    //    incoming-linker scan is `Store::find_links_to_any` — ONE embedded-ripgrep
262    //    pass over the store for the WHOLE changed set (one `.md` walk, one
263    //    presence-only/early-exit scan per file), not one walk per object. This
264    //    is the fix for the `O(changed × store)` blow-up that calling
265    //    `find_links_to` in a loop produced (a full store read per changed
266    //    object); the cost is now a single store scan regardless of how many
267    //    objects changed. A returned self-link is harmlessly deduped by the set
268    //    (the object is already inserted below).
269    let changed_targets: Vec<PathBuf> = changed.iter().cloned().collect();
270    let mut working: BTreeSet<PathBuf> = changed;
271    for linker in store.find_links_to_any(&changed_targets)? {
272        working.insert(linker);
273    }
274
275    let mut issues = Vec::new();
276    for rel in &working {
277        let abs = store.root.join(rel);
278        // A changed path can be a *deletion* — skip files that no longer exist;
279        // the incoming-linker scan above already flagged links into them.
280        if !abs.is_file() {
281            continue;
282        }
283        // `None` basename index: the working-set pass does not build the
284        // store-wide basename map (that is a `--all`-only structure), so a bare
285        // short-form target is reported as plain `WIKI_LINK_SHORT_FORM` and the
286        // `--all` sweep does the ambiguity upgrade.
287        check_content_file(store, rel, &abs, None, &mut issues);
288    }
289    issues.sort_by(issue_order);
290    Ok(issues)
291}
292
293fn validate_content_sweep(store: &Store) -> crate::Result<Vec<Issue>> {
294    let mut issues = Vec::new();
295    for rel in store.walk()? {
296        let abs = store.root.join(&rel);
297        check_content_file(store, &rel, &abs, None, &mut issues);
298    }
299    issues.sort_by(issue_order);
300    Ok(issues)
301}
302
303/// **Full SWEEP (O(store)).** Validate every file, every link, and every index,
304/// adding the cross-file checks that need global state: entity-dedup `DUP_*`,
305/// every-index sync (md + jsonl), and `log.md` ordering. CI / recovery, not the
306/// loop.
307pub fn validate_all(store: &Store) -> crate::Result<Vec<Issue>> {
308    if !store_marker_present(store) {
309        return Ok(vec![not_a_store_issue(store)]);
310    }
311
312    let mut issues = Vec::new();
313
314    // Store-identity file: `DB.md` shape (type / required fields / section
315    // headers). A single root file, checked once in the sweep — not a content
316    // file (it carries no `summary`), so it is not part of `walk_content_files`.
317    check_db_md(store, &mut issues);
318
319    let files = walk_content_files(&store.root);
320
321    // The basename index makes the short-form wiki-link check able to upgrade a
322    // bare-basename target to `WIKI_LINK_AMBIGUOUS` when it matches ≥2 files.
323    // Built once from the already-gathered sweep list (no extra walk); only the
324    // `--all` path has it (the working-set path stays O(changed)).
325    let basenames = build_basename_index(&files);
326
327    // Per-file checks over the whole store.
328    let mut parsed: Vec<(PathBuf, Parsed)> = Vec::new();
329    for rel in &files {
330        let abs = store.root.join(rel);
331        if let Some(p) = check_content_file(store, rel, &abs, Some(&basenames), &mut issues) {
332            parsed.push((rel.clone(), p));
333        }
334    }
335
336    // Cross-file: hard `id` + soft schema-declared `unique:` dedup collisions.
337    check_duplicates(store, &parsed, &mut issues);
338
339    // Cross-file: hierarchical index.md + index.jsonl sync.
340    check_indexes(store, &files, &mut issues);
341
342    // Cross-file: log.md well-formedness + ordering.
343    check_log(store, &mut issues);
344
345    // Cross-file: asset manifest (assets.jsonl) integrity against wrapper
346    // declarations. Text-only, no hashing, no byte reads — a SWEEP check like
347    // dedup. Byte presence/correctness is `dbmd assets verify`, not validate, so
348    // a fresh clone with no restored bytes still passes here.
349    check_assets(store, &parsed, &mut issues);
350
351    issues.sort_by(issue_order);
352    Ok(issues)
353}
354
355// ─────────────────────────────────────────────────────────────────────────────
356//  Per-file content checks (shared by both scopes)
357// ─────────────────────────────────────────────────────────────────────────────
358
359/// What `validate_all`'s cross-file pass needs from a per-file parse: the
360/// parsed YAML mapping (for dedup keys) and the raw frontmatter text (for
361/// text-based wiki-link extraction). The body and fence-line are consumed
362/// inline during the per-file pass and not carried here.
363struct Parsed {
364    /// The parsed top-level YAML mapping, keyed by string. `None` ⇒ malformed
365    /// YAML (a `FM_MALFORMED_YAML` was already emitted).
366    fm: Option<BTreeMap<String, Value>>,
367    /// The raw frontmatter YAML text (between the fences) — the source for
368    /// text-based wiki-link extraction in dedup.
369    fm_yaml: String,
370}
371
372/// Run every per-file check on one content file, pushing issues. Returns the
373/// parsed file so `validate_all` can reuse it for cross-file checks. Returns
374/// `None` only when the file is unreadable or has no frontmatter block at all
375/// (which for a content file is itself reported).
376fn check_content_file(
377    store: &Store,
378    rel: &Path,
379    abs: &Path,
380    basenames: Option<&BasenameIndex>,
381    issues: &mut Vec<Issue>,
382) -> Option<Parsed> {
383    let text = match std::fs::read_to_string(abs) {
384        Ok(t) => t,
385        Err(e) => {
386            // The file exists in the walk but can't be read as UTF-8 text
387            // (invalid bytes) or hit an I/O error. Returning `None` silently
388            // here let a store whose only content file was binary garbage pass
389            // `dbmd validate` with exit 0 — the exact vacuous-pass the fallback
390            // sweep exists to prevent. Report it so the agent gets an actionable
391            // diagnostic naming the unreadable file (and `index rebuild`, which
392            // hard-fails on the same file, isn't the only signal).
393            let detail = if e.kind() == std::io::ErrorKind::InvalidData {
394                "file is not valid UTF-8 text".to_string()
395            } else {
396                format!("file could not be read: {e}")
397            };
398            push(
399                issues,
400                Severity::Error,
401                codes::FM_UNREADABLE,
402                rel,
403                None,
404                None,
405                format!("content file is unreadable: {detail}"),
406                Some(
407                    "save the file as UTF-8 text, or remove it if it isn't a db.md content file"
408                        .into(),
409                ),
410                vec![],
411            );
412            return None;
413        }
414    };
415
416    let is_content = is_content_file(rel);
417
418    let (fm_yaml, body, fm_end_line) = match split_frontmatter(&text) {
419        Some(split) => split,
420        None => {
421            // No frontmatter at all. For a content file that means there's no
422            // `type:` and no `summary:` — report both the way a parsed-but-empty
423            // file would, so the agent gets the same actionable codes.
424            if is_content {
425                push(
426                    issues,
427                    Severity::Error,
428                    codes::FM_MISSING_TYPE,
429                    rel,
430                    None,
431                    Some("type".into()),
432                    "content file has no frontmatter `type:`".into(),
433                    Some("add a YAML frontmatter block with `type:`".into()),
434                    vec![],
435                );
436                push(
437                    issues,
438                    Severity::Error,
439                    codes::SUMMARY_MISSING,
440                    rel,
441                    None,
442                    Some("summary".into()),
443                    "content file has no `summary`".into(),
444                    Some("run `dbmd fm init`".into()),
445                    vec![],
446                );
447            }
448            return None;
449        }
450    };
451
452    // Parse the YAML block.
453    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
454        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
455        // An empty frontmatter block parses as Null; treat as an empty mapping.
456        Ok(Value::Null) => Some(BTreeMap::new()),
457        Ok(_) => {
458            // A scalar / sequence at the top level isn't a frontmatter mapping.
459            // Anchor to line 1 — the frontmatter block's opening `---`; the whole
460            // block is opaque, so there is no single offending field line.
461            push(
462                issues,
463                Severity::Error,
464                codes::FM_MALFORMED_YAML,
465                rel,
466                Some(1),
467                None,
468                "frontmatter is not a YAML mapping".into(),
469                Some("repair the frontmatter YAML mapping, then rerun `dbmd validate`".into()),
470                vec![],
471            );
472            None
473        }
474        Err(e) => {
475            // Anchor to line 1 (the opening `---`): an unparseable block has no
476            // single offending field line; the agent re-reads the whole block.
477            push(
478                issues,
479                Severity::Error,
480                codes::FM_MALFORMED_YAML,
481                rel,
482                Some(1),
483                None,
484                format!("frontmatter block isn't valid YAML: {e}"),
485                Some("repair the frontmatter YAML block, then rerun `dbmd validate`".into()),
486                vec![],
487            );
488            None
489        }
490    };
491
492    if let Some(map) = &fm {
493        // The detailed frontmatter checks only run when the YAML parsed.
494        check_frontmatter(store, rel, map, &fm_yaml, basenames, issues, is_content);
495    }
496
497    // Wiki-link doctrine checks run on the body of content files (and on
498    // `index.md` files, whose entries are wiki-links too). They are NOT run on
499    // the root append-only meta files `log.md`/`DB.md`: those reach this
500    // function only via the working-set incoming-linker scan (`walk_all_md`
501    // includes them), and `validate --all` never link-checks their bodies
502    // (`walk_content_files` skips them; `check_log`/`check_db_md` do no body
503    // link checks). Without this guard the two scopes disagree — a historical
504    // `[[deleted-page]]` mention in a `log.md` note, or a `[[…]]` in DB.md's
505    // `## Agent instructions`, is flagged `WIKI_LINK_BROKEN` by the default
506    // working set but is clean under `--all`. The log is append-only by spec, so
507    // the suggested "fix the link" remedy can't even be applied.
508    if !is_root_meta_file(rel) {
509        check_body_wiki_links(store, rel, &body, fm_end_line, basenames, issues);
510    }
511
512    Some(Parsed { fm, fm_yaml })
513}
514
515/// All frontmatter-level checks for a content file with valid YAML.
516fn check_frontmatter(
517    store: &Store,
518    rel: &Path,
519    fm: &BTreeMap<String, Value>,
520    fm_yaml: &str,
521    basenames: Option<&BasenameIndex>,
522    issues: &mut Vec<Issue>,
523    is_content: bool,
524) {
525    let type_ = fm.get("type").and_then(scalar_string);
526
527    // ── type ────────────────────────────────────────────────────────────────
528    if is_content && type_.is_none() {
529        push(
530            issues,
531            Severity::Error,
532            codes::FM_MISSING_TYPE,
533            rel,
534            fm_key_line_or_top(fm_yaml, "type"),
535            Some("type".into()),
536            "content file has no `type:`".into(),
537            Some("add a `type:` field (e.g. `type: contact`)".into()),
538            vec![],
539        );
540    }
541
542    // ── meta-type (records-only epistemic class; closed enum) ─────────────────
543    // Present-but-out-of-enum is an error; absent is fine (effective default
544    // `fact`). Sources don't normally carry one, but validating the value when
545    // present is layer-agnostic and harmless.
546    if is_content {
547        if let Some(mt) = fm.get("meta-type").and_then(scalar_string) {
548            if !matches!(mt.as_str(), "fact" | "operational" | "conclusion") {
549                push(
550                    issues,
551                    Severity::Error,
552                    codes::FM_BAD_META_TYPE,
553                    rel,
554                    fm_key_line_or_top(fm_yaml, "meta-type"),
555                    Some("meta-type".into()),
556                    format!("`meta-type: {mt}` is not one of fact / operational / conclusion"),
557                    Some(
558                        "use one of: fact, operational, conclusion (or omit for the default `fact`)"
559                            .into(),
560                    ),
561                    vec![],
562                );
563            }
564        }
565    }
566
567    // ── summary (universal on content files) ──────────────────────────────────
568    if is_content {
569        check_summary(rel, fm, fm_yaml, issues);
570    }
571
572    // ── timestamps: created / updated ─────────────────────────────────────────
573    // The `created`/`updated` contract is content-file-only; meta files
574    // (`DB.md`, `log.md`, index twins) legitimately carry no such timestamps.
575    if is_content {
576        for (key, missing_code) in [
577            ("created", codes::FM_MISSING_CREATED),
578            ("updated", codes::FM_MISSING_UPDATED),
579        ] {
580            // A key that is absent, or present-but-`null`, has *no* timestamp →
581            // `FM_MISSING_*`. The toolkit's parser also treats a null value as
582            // "no timestamp", so a null `created:` must read as missing, not
583            // silently pass.
584            let value = fm.get(key);
585            let missing = value.is_none() || value.is_some_and(Value::is_null);
586            if missing {
587                push(
588                    issues,
589                    Severity::Error,
590                    missing_code,
591                    rel,
592                    fm_key_line_or_top(fm_yaml, key),
593                    Some(key.into()),
594                    format!("content file has no `{key}:` timestamp"),
595                    Some(format!(
596                        "set `{key}` to an RFC3339 timestamp, e.g. 2026-05-27T08:00:00-07:00"
597                    )),
598                    vec![],
599                );
600            } else if let Some(v) = value {
601                // Present and non-null. A scalar is checked for ISO-8601; a
602                // sequence/mapping is not a timestamp string at all and so
603                // cannot be ISO-8601 → `FM_BAD_TIMESTAMP` (it must not slip
604                // through the way it did when `scalar_string` returned `None`
605                // and the branch silently no-oped).
606                match scalar_string(v) {
607                    Some(s) if is_iso8601(&s) => {}
608                    Some(s) => push(
609                        issues,
610                        Severity::Error,
611                        codes::FM_BAD_TIMESTAMP,
612                        rel,
613                        fm_key_line(fm_yaml, key),
614                        Some(key.into()),
615                        format!("`{key}` is not ISO-8601: {s:?}"),
616                        Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
617                        vec![],
618                    ),
619                    None => push(
620                        issues,
621                        Severity::Error,
622                        codes::FM_BAD_TIMESTAMP,
623                        rel,
624                        fm_key_line(fm_yaml, key),
625                        Some(key.into()),
626                        format!(
627                            "`{key}` is not ISO-8601: expected a timestamp string, found a list or mapping"
628                        ),
629                        Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
630                        vec![],
631                    ),
632                }
633            }
634        }
635    }
636    // ── tags shape ────────────────────────────────────────────────────────────
637    if let Some(tags) = fm.get("tags") {
638        if !is_flat_scalar_list(tags) {
639            push(
640                issues,
641                Severity::Warning,
642                codes::TAGS_MALFORMED,
643                rel,
644                fm_key_line(fm_yaml, "tags"),
645                Some("tags".into()),
646                "`tags` must be a flat YAML list of short scalar labels".into(),
647                Some("use block form: one `- <tag>` per line".into()),
648                vec![],
649            );
650        }
651    }
652
653    // ── inline flow-form wiki-link lists in frontmatter ──────────────────────
654    for key in detect_flow_form_link_lists(fm_yaml) {
655        push(
656            issues,
657            Severity::Error,
658            codes::WIKI_LINK_FLOW_FORM_LIST,
659            rel,
660            fm_key_line(fm_yaml, &key),
661            Some(key.clone()),
662            format!("`{key}` uses inline flow form `[[[a]], [[b]]]`"),
663            Some("use YAML block-sequence form: one `- [[...]]` per line".into()),
664            vec![],
665        );
666    }
667
668    // ── frontmatter wiki-link fields: doctrine + integrity ───────────────────
669    // Skip keys that have an explicit `link to` schema spec — those are checked
670    // (with prefix enforcement) in `check_schema`, and double-reporting the same
671    // link via two paths would be noise.
672    let schema_link_keys: BTreeSet<String> =
673        effective_schema(store, type_.as_deref().unwrap_or(""))
674            .map(|s| {
675                s.fields
676                    .iter()
677                    .filter(|f| f.link_prefix.is_some())
678                    .map(|f| f.name.clone())
679                    .collect()
680            })
681            .unwrap_or_default();
682    for (key, link) in frontmatter_link_fields_text(fm_yaml, 2) {
683        if schema_link_keys.contains(&key) {
684            continue;
685        }
686        check_wiki_link(
687            store,
688            rel,
689            &link,
690            Some(link.line),
691            Some(&key),
692            basenames,
693            issues,
694        );
695    }
696
697    // ── policies: ignored types ──────────────────────────────────────────────
698    if let Some(t) = &type_ {
699        if store.config.ignored_types.iter().any(|it| it == t) {
700            push(
701                issues,
702                Severity::Info,
703                codes::POLICY_IGNORED_TYPE_PRESENT,
704                rel,
705                fm_key_line(fm_yaml, "type"),
706                Some("type".into()),
707                format!("file has ignored type `{t}` (per DB.md ## Policies)"),
708                Some(
709                    "change the `type`, or remove it from DB.md `### Ignored types` if it should be managed"
710                        .into(),
711                ),
712                // The policy source: `DB.md` declares the ignored type.
713                vec![PathBuf::from("DB.md")],
714            );
715        }
716        // A wiki-page deriving from an ignored-type record → warning. The
717        // decision lives in the shared `derived_from_ignored_type` entry point;
718        // this side only supplies the `derived_from` targets (with their line,
719        // which the issue carries) and renders the finding.
720        let meta_type = fm
721            .get("meta-type")
722            .and_then(scalar_string)
723            .unwrap_or_else(|| "fact".to_string());
724        for link in frontmatter_links_for_key(fm_yaml, "derived_from", 2) {
725            if let Some(hit) =
726                derived_from_ignored_type(store, &meta_type, std::iter::once(link.target.as_str()))
727            {
728                push(
729                    issues,
730                    Severity::Warning,
731                    codes::POLICY_IGNORED_TYPE_DERIVED,
732                    rel,
733                    Some(link.line),
734                    Some("derived_from".into()),
735                    format!(
736                        "conclusion record derives from ignored-type record `{}` (type `{}`)",
737                        hit.target, hit.target_type
738                    ),
739                    Some(
740                        "drop this `derived_from` link, or remove the target type from DB.md `### Ignored types`"
741                            .into(),
742                    ),
743                    // The ignored-type source record, plus `DB.md` (the policy
744                    // source that lists the ignored type).
745                    vec![
746                        PathBuf::from(format!("{}.md", hit.target)),
747                        PathBuf::from("DB.md"),
748                    ],
749                );
750            }
751        }
752    }
753
754    // ── schema enforcement: DB.md ## Schemas (the only schema source) ─────────
755    if let Some(t) = &type_ {
756        if let Some(schema) = effective_schema(store, t) {
757            check_schema(store, rel, fm, fm_yaml, &schema, issues);
758        }
759    }
760}
761
762/// `summary` rules: required, non-empty, single-line, ≤ 200 chars.
763fn check_summary(rel: &Path, fm: &BTreeMap<String, Value>, fm_yaml: &str, issues: &mut Vec<Issue>) {
764    let line = fm_key_line(fm_yaml, "summary");
765    match fm.get("summary") {
766        None => push(
767            issues,
768            Severity::Error,
769            codes::SUMMARY_MISSING,
770            rel,
771            // A missing `summary` key has no line of its own → anchor to the
772            // frontmatter block top (line 1), the EXPECTED field-absence rule.
773            fm_key_line_or_top(fm_yaml, "summary"),
774            Some("summary".into()),
775            "content file has no `summary`".into(),
776            Some("run `dbmd fm init`".into()),
777            vec![],
778        ),
779        Some(v) => {
780            let s = scalar_string(v).unwrap_or_default();
781            if s.trim().is_empty() {
782                push(
783                    issues,
784                    Severity::Error,
785                    codes::SUMMARY_EMPTY,
786                    rel,
787                    line,
788                    Some("summary".into()),
789                    "`summary` is present but empty".into(),
790                    Some("write a one-line summary, or run `dbmd fm init`".into()),
791                    vec![],
792                );
793            } else if s.contains('\n') {
794                push(
795                    issues,
796                    Severity::Error,
797                    codes::SUMMARY_MULTILINE,
798                    rel,
799                    line,
800                    Some("summary".into()),
801                    "`summary` must be one line (contains a newline)".into(),
802                    Some("collapse the summary to a single line".into()),
803                    vec![],
804                );
805            } else if s.chars().count() > MAX_SUMMARY_LEN {
806                push(
807                    issues,
808                    Severity::Warning,
809                    codes::SUMMARY_TOO_LONG,
810                    rel,
811                    line,
812                    Some("summary".into()),
813                    format!(
814                        "`summary` is {} chars (> {MAX_SUMMARY_LEN})",
815                        s.chars().count()
816                    ),
817                    Some(format!("trim the summary to ≤ {MAX_SUMMARY_LEN} chars")),
818                    vec![],
819                );
820            }
821        }
822    }
823}
824
825/// Wiki-link checks for a body. Per-link doctrine (`WIKI_LINK_*`).
826fn check_body_wiki_links(
827    store: &Store,
828    rel: &Path,
829    body: &str,
830    fm_end_line: u32,
831    basenames: Option<&BasenameIndex>,
832    issues: &mut Vec<Issue>,
833) {
834    for link in extract_wiki_links(body) {
835        // Body lines are offset past the frontmatter block. `link.line` is
836        // 1-based within `body`; the body starts at `fm_end_line + 1`.
837        let abs_line = fm_end_line + link.line;
838        check_wiki_link(store, rel, &link, Some(abs_line), None, basenames, issues);
839    }
840}
841
842/// A store-wide map from a file's bare basename (its stem, no `.md`) to every
843/// store-relative path carrying that basename. Built once per `validate --all`
844/// sweep so the short-form wiki-link check can distinguish a merely short-form
845/// target (`WIKI_LINK_SHORT_FORM`) from one that is *ambiguous* because the bare
846/// basename matches two or more files (`WIKI_LINK_AMBIGUOUS`, the defensive
847/// code). `None` in the working-set path — that loop is O(changed) and never
848/// walks the store, so it reports the plain short-form error without the scan.
849type BasenameIndex = HashMap<String, Vec<PathBuf>>;
850
851/// Build the [`BasenameIndex`] from the swept file list (already gathered by
852/// `validate_all`; no extra walk).
853fn build_basename_index(files: &[PathBuf]) -> BasenameIndex {
854    let mut idx: BasenameIndex = HashMap::new();
855    for rel in files {
856        if let Some(stem) = rel.file_stem().and_then(|s| s.to_str()) {
857            idx.entry(stem.to_string()).or_default().push(rel.clone());
858        }
859    }
860    idx
861}
862
863/// The shared per-wiki-link doctrine + integrity check used by both body links
864/// and frontmatter link-fields. `basenames` is `Some` only in the `--all`
865/// sweep, where a no-slash short-form target is upgraded to `WIKI_LINK_AMBIGUOUS`
866/// when its bare basename matches ≥2 files.
867fn check_wiki_link(
868    store: &Store,
869    rel: &Path,
870    link: &Link,
871    line: Option<u32>,
872    key: Option<&str>,
873    basenames: Option<&BasenameIndex>,
874    issues: &mut Vec<Issue>,
875) {
876    let bare = link.target.trim_end_matches(".md");
877
878    // Short-form: not a full store-relative path (no `/`, or first segment isn't
879    // a known layer).
880    if !is_full_store_path(bare) {
881        // Ambiguous (defensive) takes precedence over plain short-form when the
882        // target is a bare basename (no `/`) that matches ≥2 files in the store.
883        // Only computable in the sweep (where `basenames` is populated); the
884        // working-set path falls through to the plain short-form error.
885        if !bare.contains('/') {
886            if let Some(idx) = basenames {
887                if let Some(matches) = idx.get(bare) {
888                    if matches.len() >= 2 {
889                        let mut related = matches.clone();
890                        related.sort();
891                        push(
892                            issues,
893                            Severity::Error,
894                            codes::WIKI_LINK_AMBIGUOUS,
895                            rel,
896                            line,
897                            key.map(str::to_string),
898                            format!(
899                                "short-form wiki-link `[[{}]]` matches multiple files",
900                                link.target
901                            ),
902                            Some("use the full store-relative path to disambiguate".into()),
903                            related,
904                        );
905                        return;
906                    }
907                }
908            }
909        }
910        push(
911            issues,
912            Severity::Error,
913            codes::WIKI_LINK_SHORT_FORM,
914            rel,
915            line,
916            key.map(str::to_string),
917            format!(
918                "wiki-link `[[{}]]` is not a full store-relative path",
919                link.target
920            ),
921            short_form_suggestion(bare),
922            vec![],
923        );
924        // Don't also report broken; the agent must fix the form first.
925        return;
926    }
927
928    // `.md` extension → warning, then still check existence.
929    if link.target.ends_with(".md") {
930        push(
931            issues,
932            Severity::Warning,
933            codes::WIKI_LINK_HAS_EXTENSION,
934            rel,
935            line,
936            key.map(str::to_string),
937            format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
938            Some(format!("drop the extension: [[{bare}]]")),
939            vec![],
940        );
941    }
942
943    // Broken: target file doesn't exist (O(1) stat). Resolve the target the
944    // same way the graph engine does — the literal path first (so a link to a
945    // raw `.eml`/`.pdf` source kept verbatim under `sources/` resolves), then
946    // the `.md`-appended path.
947    match resolve_wiki_target(store, bare) {
948        TargetResolution::Exists => {}
949        TargetResolution::Missing => push(
950            issues,
951            Severity::Error,
952            codes::WIKI_LINK_BROKEN,
953            rel,
954            line,
955            key.map(str::to_string),
956            format!("wiki-link target `{bare}` doesn't exist"),
957            Some(format!(
958                "create `{bare}.md`, or point the link at an existing file"
959            )),
960            vec![],
961        ),
962        TargetResolution::Unsafe => push(
963            issues,
964            Severity::Error,
965            codes::WIKI_LINK_BROKEN,
966            rel,
967            line,
968            key.map(str::to_string),
969            format!("wiki-link target `{bare}` is not a safe store-relative path"),
970            Some("use a full store-relative path under sources/ or records/".into()),
971            vec![],
972        ),
973    }
974}
975
976// ─────────────────────────────────────────────────────────────────────────────
977//  Schema enforcement (user-declared DB.md ## Schemas — the only source)
978// ─────────────────────────────────────────────────────────────────────────────
979
980/// The effective schema for a type: the store's explicit `DB.md ## Schemas`
981/// block, or `None`. This is the **only** source of schema enforcement — the
982/// toolkit ships no implicit or built-in per-type schema (SPEC § Schemas). A
983/// store that wants its `contact` / `expense` / etc. fields enforced declares
984/// them in `## Schemas`; the example schema pack in SPEC § Example types is a
985/// copy-in starting point.
986fn effective_schema(store: &Store, type_: &str) -> Option<Schema> {
987    store.config.schemas.get(type_).cloned()
988}
989
990/// Validate a file's frontmatter against a schema's [`FieldSpec`]s.
991fn check_schema(
992    store: &Store,
993    rel: &Path,
994    fm: &BTreeMap<String, Value>,
995    fm_yaml: &str,
996    schema: &Schema,
997    issues: &mut Vec<Issue>,
998) {
999    for spec in &schema.fields {
1000        let present = fm.get(&spec.name);
1001        let line = fm_key_line(fm_yaml, &spec.name);
1002
1003        // Required. "Empty" means: the key is absent, or its value carries no
1004        // content — a YAML `null` (`name:`), an empty list (`name: []`), an
1005        // empty mapping (`name: {}`), or a blank/whitespace-only scalar
1006        // (`name: ""`). `scalar_string` returns `None` for null/list/mapping, so
1007        // a bare `.unwrap_or(false)` wrongly treated those as non-empty and let
1008        // a required field with a null or empty-collection value pass silently;
1009        // route them through `is_empty_value` instead.
1010        let is_empty = match present {
1011            None => true,
1012            Some(v) => is_empty_value(v),
1013        };
1014        if spec.required && is_empty {
1015            push(
1016                issues,
1017                Severity::Error,
1018                codes::SCHEMA_MISSING_REQUIRED,
1019                rel,
1020                // Absent key → anchor to the frontmatter top (line 1); a
1021                // present-but-empty value keeps its own line.
1022                fm_key_line_or_top(fm_yaml, &spec.name),
1023                Some(spec.name.clone()),
1024                format!("required field `{}` is absent or empty", spec.name),
1025                Some(format!("set `{}` to a non-empty value", spec.name)),
1026                vec![],
1027            );
1028            continue;
1029        }
1030        let Some(value) = present else { continue };
1031
1032        // An OPTIONAL field that is `null` or empty is simply unset — there is
1033        // no value to shape/enum/link-check. (The required+empty case already
1034        // returned above as `SCHEMA_MISSING_REQUIRED`.) Without this, an
1035        // `paid_at: null` on an `invoice` whose schema marks `paid_at (date)`
1036        // would wrongly fire `SCHEMA_SHAPE_MISMATCH` against the empty string.
1037        let value_empty = value.is_null()
1038            || scalar_string(value)
1039                .map(|s| s.trim().is_empty())
1040                .unwrap_or(false);
1041        if !spec.required && value_empty {
1042            continue;
1043        }
1044
1045        // link to <prefix>/ — extract the link target(s) from the raw frontmatter
1046        // text (unquoted `[[...]]` is a YAML nested-sequence, not a string).
1047        if let Some(prefix) = &spec.link_prefix {
1048            check_schema_link(store, rel, &spec.name, fm_yaml, prefix, line, issues);
1049            continue; // a link field is never also shape/enum-checked
1050        }
1051
1052        // A shape- or enum-constrained field expects a SCALAR. A YAML sequence
1053        // or mapping satisfies neither, and would otherwise slip through both
1054        // checks (`scalar_string` returns `None` for non-scalars, so the enum
1055        // and shape bodies silently no-op). Flag it as a shape mismatch rather
1056        // than let a structurally-wrong value validate clean. (Link fields,
1057        // which legitimately take block-form sequences, already `continue`d.)
1058        if (spec.shape.is_some() || spec.enum_values.is_some()) && scalar_string(value).is_none() {
1059            push(
1060                issues,
1061                Severity::Error,
1062                codes::SCHEMA_SHAPE_MISMATCH,
1063                rel,
1064                line,
1065                Some(spec.name.clone()),
1066                format!(
1067                    "`{}` must be a scalar value, found a list or mapping",
1068                    spec.name
1069                ),
1070                Some(format!("set `{}` to a single scalar value", spec.name)),
1071                vec![],
1072            );
1073            continue;
1074        }
1075
1076        // enum
1077        if let Some(allowed) = &spec.enum_values {
1078            if let Some(s) = scalar_string(value) {
1079                if !allowed.iter().any(|a| a == &s) {
1080                    push(
1081                        issues,
1082                        Severity::Error,
1083                        codes::SCHEMA_ENUM_VIOLATION,
1084                        rel,
1085                        line,
1086                        Some(spec.name.clone()),
1087                        format!("`{}` value {s:?} not in enum {allowed:?}", spec.name),
1088                        Some(format!("use one of: {}", allowed.join(", "))),
1089                        vec![],
1090                    );
1091                }
1092            }
1093            continue;
1094        }
1095
1096        // shape
1097        if let Some(shape) = spec.shape {
1098            check_schema_shape(rel, &spec.name, value, shape, line, issues);
1099        }
1100    }
1101}
1102
1103/// `link to <prefix>/` enforcement: the value must be a wiki-link whose target
1104/// starts with `<prefix>`. Reads the link target(s) from the raw frontmatter
1105/// text so unquoted `field: [[...]]` (a YAML nested-sequence, not a string) is
1106/// recognized exactly like the quoted form.
1107fn check_schema_link(
1108    store: &Store,
1109    rel: &Path,
1110    field: &str,
1111    fm_yaml: &str,
1112    prefix: &Path,
1113    line: Option<u32>,
1114    issues: &mut Vec<Issue>,
1115) {
1116    let prefix_str = prefix.to_string_lossy();
1117    let prefix_str = prefix_str.trim_end_matches('/');
1118    let suggestion = |target_leaf: &str| {
1119        Some(format!(
1120            "expected `link to {prefix_str}/`; replace with [[{prefix_str}/{target_leaf}]]"
1121        ))
1122    };
1123
1124    let links = frontmatter_links_for_key(fm_yaml, field, 2);
1125    if links.is_empty() {
1126        // No wiki-link in the field's value → it's a plain string.
1127        let raw = frontmatter_raw_value_for_key(fm_yaml, field, 2).unwrap_or_default();
1128        let raw = raw.trim().trim_matches('"').trim_matches('\'').trim();
1129        let leaf = slugish(raw);
1130        push(
1131            issues,
1132            Severity::Error,
1133            codes::SCHEMA_LINK_PREFIX_MISMATCH,
1134            rel,
1135            line,
1136            Some(field.to_string()),
1137            format!(
1138                "`{field}` is a plain string {raw:?}, expected a wiki-link under `{prefix_str}/`"
1139            ),
1140            suggestion(&leaf),
1141            vec![],
1142        );
1143        return;
1144    }
1145
1146    for link in links {
1147        if link.target.ends_with(".md") {
1148            let bare = link.target.trim_end_matches(".md");
1149            push(
1150                issues,
1151                Severity::Warning,
1152                codes::WIKI_LINK_HAS_EXTENSION,
1153                rel,
1154                Some(link.line),
1155                Some(field.to_string()),
1156                format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
1157                Some(format!("drop the extension: [[{bare}]]")),
1158                vec![],
1159            );
1160        }
1161        let bare = link.target.trim_end_matches(".md");
1162        if !path_under_prefix(bare, prefix_str) {
1163            let leaf = bare.rsplit('/').next().unwrap_or(bare);
1164            push(
1165                issues,
1166                Severity::Error,
1167                codes::SCHEMA_LINK_PREFIX_MISMATCH,
1168                rel,
1169                line,
1170                Some(field.to_string()),
1171                format!("`{field}` target `{bare}` is not under `{prefix_str}/`"),
1172                suggestion(leaf),
1173                vec![],
1174            );
1175        } else {
1176            // Correct prefix — still surface a broken target so the agent sees
1177            // one consistent vocabulary. Resolve like the graph engine (literal
1178            // path first, then `.md`) so a `link to sources/` field pointing at a
1179            // raw `.eml`/`.pdf` source isn't wrongly flagged broken.
1180            match resolve_wiki_target(store, bare) {
1181                TargetResolution::Exists => {}
1182                TargetResolution::Missing => push(
1183                    issues,
1184                    Severity::Error,
1185                    codes::WIKI_LINK_BROKEN,
1186                    rel,
1187                    line,
1188                    Some(field.to_string()),
1189                    format!("wiki-link target `{bare}` doesn't exist"),
1190                    Some(format!(
1191                        "create `{bare}.md`, or point the link at an existing file"
1192                    )),
1193                    vec![],
1194                ),
1195                TargetResolution::Unsafe => push(
1196                    issues,
1197                    Severity::Error,
1198                    codes::WIKI_LINK_BROKEN,
1199                    rel,
1200                    line,
1201                    Some(field.to_string()),
1202                    format!("wiki-link target `{bare}` is not a safe store-relative path"),
1203                    Some("use a full store-relative path under sources/ or records/".into()),
1204                    vec![],
1205                ),
1206            }
1207        }
1208    }
1209}
1210
1211/// Shape enforcement for a non-link, non-enum schema field.
1212fn check_schema_shape(
1213    rel: &Path,
1214    field: &str,
1215    value: &Value,
1216    shape: Shape,
1217    line: Option<u32>,
1218    issues: &mut Vec<Issue>,
1219) {
1220    let s = scalar_string(value).unwrap_or_default();
1221    let ok = match shape {
1222        Shape::String => true, // any scalar string
1223        Shape::Int => value.is_i64() || value.is_u64() || s.trim().parse::<i64>().is_ok(),
1224        Shape::Bool => value.is_bool() || matches!(s.trim(), "true" | "false"),
1225        Shape::Date => is_iso8601_date_or_datetime(&s),
1226        Shape::Email => is_email(&s),
1227        Shape::Currency => is_currency(&s),
1228        Shape::Url => is_url(&s),
1229    };
1230    if !ok {
1231        push(
1232            issues,
1233            Severity::Error,
1234            codes::SCHEMA_SHAPE_MISMATCH,
1235            rel,
1236            line,
1237            Some(field.to_string()),
1238            format!("`{field}` value {s:?} doesn't match shape {shape:?}"),
1239            Some(shape_suggestion(shape)),
1240            vec![],
1241        );
1242    }
1243}
1244
1245// ─────────────────────────────────────────────────────────────────────────────
1246//  Cross-file: entity-dedup collisions (validate_all only)
1247// ─────────────────────────────────────────────────────────────────────────────
1248
1249/// Hard `DUP_ID` + the soft, schema-declared `DUP_UNIQUE_KEY` collisions.
1250///
1251/// `DUP_ID` is universal (two files with the same explicit `id`).
1252/// `DUP_UNIQUE_KEY` is driven entirely by the store's `DB.md ## Schemas`: each
1253/// `- unique: <field>[, <field> …]` directive on a `### <type>` declares a
1254/// uniqueness constraint, and two records of that type whose declared values
1255/// collide warn. No type carries a built-in dedup key — the store opts in.
1256///
1257/// **Reporting precedence (rule #1 in `corpus-b-edges/EXPECTED/README.md`):** a
1258/// collision group of N files yields exactly ONE issue, not N. Its `file` is the
1259/// lexicographically smallest store-relative path in the group (a total order →
1260/// deterministic); `related` is the rest, sorted. A single-field key anchors to
1261/// that field's line on the reported file and carries it as `key`; a multi-field
1262/// key anchors to line 1 with a null key.
1263fn check_duplicates(store: &Store, parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
1264    // Path → frontmatter YAML, for resolving the anchor field's line on the
1265    // reported (smallest-path) member.
1266    let fm_yaml_of: HashMap<&PathBuf, &str> = parsed
1267        .iter()
1268        .map(|(rel, p)| (rel, p.fm_yaml.as_str()))
1269        .collect();
1270
1271    // ── DUP_ID (hard error): two files with the same explicit `id`. ──────────
1272    let mut by_id: HashMap<String, Vec<PathBuf>> = HashMap::new();
1273    for (rel, p) in parsed {
1274        if let Some(map) = &p.fm {
1275            if let Some(id) = map.get("id").and_then(scalar_string) {
1276                if !id.trim().is_empty() {
1277                    by_id.entry(id).or_default().push(rel.clone());
1278                }
1279            }
1280        }
1281    }
1282    for (id, files) in &by_id {
1283        if files.len() > 1 {
1284            let (reported, related) = canonical_and_related(files);
1285            let line = fm_yaml_of.get(&reported).and_then(|y| fm_key_line(y, "id"));
1286            push(
1287                issues,
1288                Severity::Error,
1289                codes::DUP_ID,
1290                &reported,
1291                line,
1292                Some("id".into()),
1293                format!("id {id:?} is declared by more than one file"),
1294                Some("give each file a unique `id` (or drop it to derive from the path)".into()),
1295                related,
1296            );
1297        }
1298    }
1299
1300    // ── DUP_UNIQUE_KEY (warning): schema-declared `unique:` collisions. ───────
1301    // Every constraint comes from the store's `## Schemas`; a type with no
1302    // `unique:` directive is never dedup-checked. Iteration over the BTreeMap is
1303    // key-ordered, so emitted issues are deterministic across runs.
1304    for (type_name, schema) in &store.config.schemas {
1305        for key_fields in &schema.unique_keys {
1306            soft_dup(parsed, issues, type_name, key_fields, &fm_yaml_of);
1307        }
1308    }
1309}
1310
1311/// Emit ONE `DUP_UNIQUE_KEY` warning per group of ≥2 files of `type_` whose
1312/// declared `key_fields` render to the same token tuple. Files missing any key
1313/// field are skipped — an incomplete key is never a collision.
1314///
1315/// Per reporting rule #1 the issue is keyed on the lexicographically smallest
1316/// store-relative path; `related` is the rest. A single-field key anchors to
1317/// that field's line on the reported file and carries it as `key`; a multi-field
1318/// key anchors to line 1 with a null key. `fm_yaml_of` resolves the field line.
1319fn soft_dup(
1320    parsed: &[(PathBuf, Parsed)],
1321    issues: &mut Vec<Issue>,
1322    type_: &str,
1323    key_fields: &[String],
1324    fm_yaml_of: &HashMap<&PathBuf, &str>,
1325) {
1326    if key_fields.is_empty() {
1327        return;
1328    }
1329    let mut groups: HashMap<Vec<String>, Vec<PathBuf>> = HashMap::new();
1330    for (rel, p) in parsed {
1331        let is_type =
1332            p.fm.as_ref()
1333                .and_then(|m| m.get("type"))
1334                .and_then(scalar_string)
1335                .map(|t| t == type_)
1336                .unwrap_or(false);
1337        if !is_type {
1338            continue;
1339        }
1340        if let Some(key) = dedup_key(p, key_fields) {
1341            groups.entry(key).or_default().push(rel.clone());
1342        }
1343    }
1344    // HashMap iteration is nondeterministic; sort by reported member so the
1345    // emitted issue order is stable across runs.
1346    let mut collisions: Vec<(PathBuf, Vec<PathBuf>)> = groups
1347        .values()
1348        .filter(|files| files.len() > 1)
1349        .map(|files| canonical_and_related(files))
1350        .collect();
1351    collisions.sort_by(|a, b| a.0.cmp(&b.0));
1352
1353    let fields_disp = key_fields.join(", ");
1354    for (reported, related) in collisions {
1355        // Single-field keys anchor to the field's line + carry the key; multi-
1356        // field keys anchor to line 1 with a null key.
1357        let (line, key) = if key_fields.len() == 1 {
1358            (
1359                fm_yaml_of
1360                    .get(&reported)
1361                    .and_then(|y| fm_key_line(y, &key_fields[0])),
1362                Some(key_fields[0].clone()),
1363            )
1364        } else {
1365            (Some(1), None)
1366        };
1367        let n = related.len();
1368        push(
1369            issues,
1370            Severity::Warning,
1371            codes::DUP_UNIQUE_KEY,
1372            &reported,
1373            line,
1374            key,
1375            format!("`{type_}` unique key ({fields_disp}) collides with {n} other record(s)"),
1376            Some("merge with `dbmd rename`, or cross-link with `dbmd link`".into()),
1377            related,
1378        );
1379    }
1380}
1381
1382/// Render a type's `unique:` key for one file: each field's dedup token in
1383/// order, or `None` if any field is absent/empty (an incomplete key never
1384/// collides).
1385fn dedup_key(p: &Parsed, key_fields: &[String]) -> Option<Vec<String>> {
1386    let mut out = Vec::with_capacity(key_fields.len());
1387    for f in key_fields {
1388        out.push(dedup_token(p, f)?);
1389    }
1390    Some(out)
1391}
1392
1393/// One field's normalized dedup token, or `None` when absent/empty. Wiki-link
1394/// values (single or block-sequence list) reduce to their lower-cased target
1395/// path(s); a list collapses to a sorted, de-duplicated set so item order never
1396/// matters. Plain scalars (and YAML scalar lists) lower-case and trim.
1397fn dedup_token(p: &Parsed, field: &str) -> Option<String> {
1398    // Wiki-links first — read from the raw frontmatter text so the unquoted
1399    // `field: [[...]]` (a YAML nested-sequence, not a string) is handled.
1400    let links = frontmatter_links_for_key(&p.fm_yaml, field, 2);
1401    if !links.is_empty() {
1402        let set: BTreeSet<String> = links
1403            .into_iter()
1404            .map(|l| l.target.trim_end_matches(".md").to_lowercase())
1405            .filter(|t| !t.is_empty())
1406            .collect();
1407        return if set.is_empty() {
1408            None
1409        } else {
1410            Some(set.into_iter().collect::<Vec<_>>().join(","))
1411        };
1412    }
1413    match p.fm.as_ref()?.get(field) {
1414        Some(Value::Sequence(items)) => {
1415            let set: BTreeSet<String> = items
1416                .iter()
1417                .filter_map(scalar_string)
1418                .map(|s| s.trim().to_lowercase())
1419                .filter(|t| !t.is_empty())
1420                .collect();
1421            if set.is_empty() {
1422                None
1423            } else {
1424                Some(set.into_iter().collect::<Vec<_>>().join(","))
1425            }
1426        }
1427        Some(v) => {
1428            let s = scalar_string(v)?.trim().to_lowercase();
1429            if s.is_empty() {
1430                None
1431            } else {
1432                Some(s)
1433            }
1434        }
1435        None => None,
1436    }
1437}
1438
1439/// Split a non-empty collision group into `(reported, related)`: the
1440/// lexicographically smallest store-relative path is the reported member; the
1441/// rest, sorted ascending, are `related`. Deterministic because store-relative
1442/// path is a total order — the property reporting rule #1 relies on.
1443fn canonical_and_related(files: &[PathBuf]) -> (PathBuf, Vec<PathBuf>) {
1444    let mut sorted = files.to_vec();
1445    sorted.sort();
1446    let reported = sorted[0].clone();
1447    let related = sorted[1..].to_vec();
1448    (reported, related)
1449}
1450
1451// ─────────────────────────────────────────────────────────────────────────────
1452//  Cross-file: hierarchical index.md + index.jsonl sync (validate_all only)
1453// ─────────────────────────────────────────────────────────────────────────────
1454
1455/// All `INDEX_*` and `INDEX_JSONL_*` checks across the three canonical levels.
1456fn check_indexes(store: &Store, files: &[PathBuf], issues: &mut Vec<Issue>) {
1457    // Group content files by their immediate parent folder (the type-folder,
1458    // *across date shards* — a sharded file's "type folder" is the folder right
1459    // under the layer). We key on the type-folder so shards roll up correctly.
1460    let mut type_folders: BTreeMap<PathBuf, Vec<PathBuf>> = BTreeMap::new();
1461    let mut layers_present: BTreeSet<&'static str> = BTreeSet::new();
1462    for rel in files {
1463        // The layer is the first path component — recorded independently of the
1464        // type-folder so a layer containing only loose files still requires an
1465        // `index.md`.
1466        if let Some(layer) = rel.iter().next().and_then(|s| s.to_str()) {
1467            match layer {
1468                "sources" => layers_present.insert("sources"),
1469                "records" => layers_present.insert("records"),
1470                _ => false,
1471            };
1472        }
1473        if let Some(tf) = type_folder_of(rel) {
1474            type_folders.entry(tf).or_default().push(rel.clone());
1475        }
1476    }
1477
1478    // ── Root index.md ─────────────────────────────────────────────────────────
1479    if !files.is_empty() {
1480        let root_index = store.root.join("index.md");
1481        if !root_index.is_file() {
1482            push(
1483                issues,
1484                Severity::Error,
1485                codes::INDEX_MISSING,
1486                Path::new("index.md"),
1487                None,
1488                None,
1489                "store has files but no root `index.md`".into(),
1490                Some("run `dbmd index rebuild`".into()),
1491                vec![],
1492            );
1493        } else {
1494            check_index_scope(store, Path::new("index.md"), "root", None, issues);
1495        }
1496    }
1497
1498    // ── Layer index.md ────────────────────────────────────────────────────────
1499    for layer in &layers_present {
1500        let layer_index_rel = PathBuf::from(layer).join("index.md");
1501        let abs = store.root.join(&layer_index_rel);
1502        if !abs.is_file() {
1503            push(
1504                issues,
1505                Severity::Error,
1506                codes::INDEX_MISSING,
1507                &layer_index_rel,
1508                None,
1509                None,
1510                format!("layer `{layer}/` has files but no `index.md`"),
1511                Some("run `dbmd index rebuild`".into()),
1512                vec![],
1513            );
1514        } else {
1515            check_index_scope(store, &layer_index_rel, "layer", Some(layer), issues);
1516        }
1517    }
1518
1519    // ── Type-folder index.md + index.jsonl ───────────────────────────────────
1520    for (tf, members) in &type_folders {
1521        let index_md_rel = tf.join("index.md");
1522        let index_md_abs = store.root.join(&index_md_rel);
1523        let index_md_present = index_md_abs.is_file();
1524        if !index_md_present {
1525            // The whole folder index is absent → a single `INDEX_MISSING` keyed
1526            // on the FOLDER (not the would-be `index.md` path). When the index is
1527            // entirely missing we do NOT additionally evaluate per-entry
1528            // completeness or the `index.jsonl` twin: one `INDEX_MISSING` covers
1529            // the folder (precedence rule #4 in `corpus-b-edges/EXPECTED`).
1530            push(
1531                issues,
1532                Severity::Error,
1533                codes::INDEX_MISSING,
1534                tf,
1535                None,
1536                None,
1537                format!("non-empty folder `{}` has no index.md", tf.display()),
1538                Some(format!(
1539                    "run `dbmd index rebuild --folder {}`",
1540                    tf.display()
1541                )),
1542                vec![],
1543            );
1544            continue;
1545        }
1546
1547        check_index_scope(store, &index_md_rel, "type-folder", tf.to_str(), issues);
1548        check_type_folder_index_md(store, tf, &index_md_rel, members, issues);
1549
1550        // index.jsonl twin — must exist and be complete (uncapped). Only checked
1551        // when the `index.md` is present (above): a folder whose entire index is
1552        // missing is one `INDEX_MISSING`, not also an `INDEX_JSONL_MISSING`.
1553        let jsonl_rel = tf.join("index.jsonl");
1554        let jsonl_abs = store.root.join(&jsonl_rel);
1555        if !jsonl_abs.is_file() {
1556            push(
1557                issues,
1558                Severity::Error,
1559                codes::INDEX_JSONL_MISSING,
1560                &jsonl_rel,
1561                None,
1562                None,
1563                format!("type-folder `{}/` has no `index.jsonl` twin", tf.display()),
1564                Some("run `dbmd index rebuild`".into()),
1565                vec![],
1566            );
1567        } else {
1568            check_type_folder_index_jsonl(store, tf, &jsonl_rel, members, issues);
1569        }
1570    }
1571
1572    // ── Orphan index.md: an index file in a folder with no content. ──────────
1573    for rel in walk_index_files(&store.root) {
1574        let parent = rel.parent().unwrap_or(Path::new("")).to_path_buf();
1575        let parent_str = parent.to_string_lossy().to_string();
1576        let is_canonical = parent_str.is_empty() // root
1577            || matches!(parent_str.as_str(), "sources" | "records")
1578            || type_folders.contains_key(&parent);
1579        if !is_canonical {
1580            push(
1581                issues,
1582                Severity::Warning,
1583                codes::INDEX_ORPHAN,
1584                &rel,
1585                None,
1586                None,
1587                format!(
1588                    "`{}` sits in an empty or non-canonical folder",
1589                    rel.display()
1590                ),
1591                Some("remove it, or run `dbmd index rebuild`".into()),
1592                vec![],
1593            );
1594        }
1595    }
1596}
1597
1598/// Check a type-folder `index.md`'s entries against the folder's actual files:
1599/// stale entries (target gone), missing entries (file not listed), and
1600/// summary mismatches.
1601fn check_type_folder_index_md(
1602    store: &Store,
1603    tf: &Path,
1604    index_rel: &Path,
1605    members: &[PathBuf],
1606    issues: &mut Vec<Issue>,
1607) {
1608    let abs = store.root.join(index_rel);
1609    let Ok(text) = std::fs::read_to_string(&abs) else {
1610        return;
1611    };
1612    let entries = parse_index_entries(&text);
1613
1614    let listed: BTreeSet<PathBuf> = entries
1615        .iter()
1616        .map(|e| PathBuf::from(e.target.trim_end_matches(".md")))
1617        .collect();
1618
1619    // Stale entries + summary mismatch.
1620    for entry in &entries {
1621        let bare = entry.target.trim_end_matches(".md");
1622        // Resolve like the graph engine (literal path first, then `.md`) so an
1623        // index entry naming a raw `.eml`/`.pdf` source isn't reported stale.
1624        let target_abs = match resolved_target_abs(store, bare) {
1625            Some(abs) => abs,
1626            None => {
1627                if matches!(resolve_wiki_target(store, bare), TargetResolution::Unsafe) {
1628                    push(
1629                        issues,
1630                        Severity::Error,
1631                        codes::INDEX_STALE_ENTRY,
1632                        index_rel,
1633                        Some(entry.line),
1634                        None,
1635                        format!("index entry `[[{bare}]]` is not a safe store-relative path"),
1636                        Some("run `dbmd index rebuild`".into()),
1637                        vec![],
1638                    );
1639                } else {
1640                    push(
1641                        issues,
1642                        Severity::Error,
1643                        codes::INDEX_STALE_ENTRY,
1644                        index_rel,
1645                        Some(entry.line),
1646                        None,
1647                        format!("index entry `[[{bare}]]` points at a missing file"),
1648                        Some("run `dbmd index rebuild`".into()),
1649                        // The stale target the entry names (the file that no
1650                        // longer exists) — so the agent can locate the dangling
1651                        // reference.
1652                        vec![PathBuf::from(format!("{bare}.md"))],
1653                    );
1654                }
1655                continue;
1656            }
1657        };
1658        // Summary mismatch: the entry text must equal the file's `summary`. A
1659        // bare `- [[path]]` entry (no `— <text>`) when the file HAS a non-empty
1660        // summary is also a mismatch — the SPEC requires every type-folder index
1661        // entry to quote the file's `summary` (`- [[path]] — <summary>`), so a
1662        // missing quote can't validate clean just because there's nothing to
1663        // compare.
1664        if let Some(expected) = read_summary(&target_abs) {
1665            match &entry.summary_text {
1666                Some(text_part) if text_part.trim() != expected.trim() => {
1667                    push(
1668                        issues,
1669                        Severity::Error,
1670                        codes::INDEX_SUMMARY_MISMATCH,
1671                        index_rel,
1672                        Some(entry.line),
1673                        None,
1674                        format!("index entry for `{bare}` text doesn't match the file's `summary`"),
1675                        Some("run `dbmd index rebuild`".into()),
1676                        vec![PathBuf::from(format!("{bare}.md"))],
1677                    );
1678                }
1679                None if !expected.trim().is_empty() => {
1680                    push(
1681                        issues,
1682                        Severity::Error,
1683                        codes::INDEX_SUMMARY_MISMATCH,
1684                        index_rel,
1685                        Some(entry.line),
1686                        None,
1687                        format!("index entry for `{bare}` is missing its summary text (the file has a `summary`)"),
1688                        Some("run `dbmd index rebuild`".into()),
1689                        vec![PathBuf::from(format!("{bare}.md"))],
1690                    );
1691                }
1692                _ => {}
1693            }
1694        }
1695    }
1696
1697    // Missing entries: a member file not listed. Skip the index/log meta files.
1698    // The browse view caps at 500; only flag a missing entry when the folder is
1699    // under the cap (a capped folder legitimately omits older files).
1700    let content_members: Vec<&PathBuf> = members.iter().filter(|m| is_content_file(m)).collect();
1701    if content_members.len() <= 500 {
1702        for m in content_members {
1703            let bare = PathBuf::from(m.to_string_lossy().trim_end_matches(".md").to_string());
1704            if !listed.contains(&bare) {
1705                push(
1706                    issues,
1707                    Severity::Error,
1708                    codes::INDEX_MISSING_ENTRY,
1709                    index_rel,
1710                    None,
1711                    None,
1712                    format!(
1713                        "file `{}` is not listed in its folder's `index.md`",
1714                        m.display()
1715                    ),
1716                    Some("run `dbmd index rebuild`".into()),
1717                    vec![(*m).clone()],
1718                );
1719            }
1720        }
1721    }
1722    let _ = tf;
1723}
1724
1725/// Check a type-folder `index.jsonl` twin: it must list **every** file in the
1726/// folder (uncapped), every record must point at a real file, and each record's
1727/// fields must match the file's frontmatter.
1728fn check_type_folder_index_jsonl(
1729    store: &Store,
1730    tf: &Path,
1731    jsonl_rel: &Path,
1732    members: &[PathBuf],
1733    issues: &mut Vec<Issue>,
1734) {
1735    let abs = store.root.join(jsonl_rel);
1736    let Ok(text) = std::fs::read_to_string(&abs) else {
1737        return;
1738    };
1739
1740    // Parse records (last-write-wins by path), tolerating tombstones/blank lines.
1741    let mut records: BTreeMap<PathBuf, serde_json::Value> = BTreeMap::new();
1742    for (i, line) in text.lines().enumerate() {
1743        let line = line.trim();
1744        if line.is_empty() {
1745            continue;
1746        }
1747        let rec: serde_json::Value = match serde_json::from_str(line) {
1748            Ok(v) => v,
1749            Err(e) => {
1750                push(
1751                    issues,
1752                    Severity::Error,
1753                    codes::INDEX_JSONL_DESYNC,
1754                    jsonl_rel,
1755                    Some((i + 1) as u32),
1756                    None,
1757                    format!("`index.jsonl` line {} is not valid JSON: {e}", i + 1),
1758                    Some("run `dbmd index rebuild`".into()),
1759                    vec![],
1760                );
1761                continue;
1762            }
1763        };
1764        if let Some(path) = rec.get("path").and_then(|v| v.as_str()) {
1765            if !is_safe_store_relative_path(Path::new(path)) {
1766                push(
1767                    issues,
1768                    Severity::Error,
1769                    codes::INDEX_JSONL_DESYNC,
1770                    jsonl_rel,
1771                    Some((i + 1) as u32),
1772                    None,
1773                    format!("`index.jsonl` record path `{path}` is not a safe store-relative path"),
1774                    Some("run `dbmd index rebuild`".into()),
1775                    vec![],
1776                );
1777                continue;
1778            }
1779            records.insert(PathBuf::from(path), rec);
1780        }
1781    }
1782
1783    let member_set: BTreeSet<PathBuf> = members
1784        .iter()
1785        .filter(|m| is_content_file(m))
1786        .cloned()
1787        .collect();
1788
1789    // jsonl record → missing file = desync.
1790    for path in records.keys() {
1791        let target_abs = store.root.join(path);
1792        if !target_abs.is_file() {
1793            push(
1794                issues,
1795                Severity::Error,
1796                codes::INDEX_JSONL_DESYNC,
1797                jsonl_rel,
1798                None,
1799                None,
1800                format!(
1801                    "`index.jsonl` record points at missing file `{}`",
1802                    path.display()
1803                ),
1804                Some("run `dbmd index rebuild`".into()),
1805                vec![],
1806            );
1807        }
1808    }
1809
1810    // file not in jsonl = desync (the jsonl is the complete twin — no cap).
1811    for m in &member_set {
1812        if !records.contains_key(m) {
1813            push(
1814                issues,
1815                Severity::Error,
1816                codes::INDEX_JSONL_DESYNC,
1817                jsonl_rel,
1818                None,
1819                None,
1820                format!(
1821                    "file `{}` is missing from the complete `index.jsonl`",
1822                    m.display()
1823                ),
1824                Some("run `dbmd index rebuild`".into()),
1825                vec![m.clone()],
1826            );
1827        }
1828    }
1829
1830    // Record fields stale vs. frontmatter. SPEC § Validation defines
1831    // `INDEX_JSONL_STALE` as "an `index.jsonl` record's fields don't match the
1832    // file's frontmatter" — ANY field, not just `summary`/`type`. The query and
1833    // search paths read every field straight from these sidecars (`tags`,
1834    // `links`, `created`, `updated`, plus type-specific `email` / `domain` /
1835    // `company` / `amount` / `vendor` …), so a single field left unchecked lets
1836    // a stale value answer queries with data that exists in no `.md` file.
1837    //
1838    // Rather than re-list (and drift from) every projected key, rebuild the
1839    // record the canonical projection would write for this file
1840    // ([`IndexRecord::expected_from_file`], the same path `index rebuild` uses)
1841    // and diff the two as flat JSON maps. Every key the projection emits is
1842    // covered automatically; `path` is the join key and is skipped.
1843    for (path, rec) in &records {
1844        let target_abs = store.root.join(path);
1845        if !target_abs.is_file() {
1846            continue;
1847        }
1848        let Ok(expected) = crate::index::IndexRecord::expected_from_file(&target_abs, path.clone())
1849        else {
1850            continue; // unreadable / unparseable frontmatter is reported elsewhere
1851        };
1852        let Ok(expected_json) = serde_json::to_value(&expected) else {
1853            continue;
1854        };
1855        let (Some(have), Some(want)) = (rec.as_object(), expected_json.as_object()) else {
1856            continue;
1857        };
1858
1859        // Compare the union of keys present on either side; a key the file
1860        // projects but the sidecar omits is just as stale as a wrong value.
1861        let mut mismatched_keys: BTreeSet<&str> = BTreeSet::new();
1862        for key in have.keys().chain(want.keys()) {
1863            if key == "path" {
1864                continue;
1865            }
1866            if have.get(key) != want.get(key) {
1867                mismatched_keys.insert(key);
1868            }
1869        }
1870
1871        if !mismatched_keys.is_empty() {
1872            let keys: Vec<&str> = mismatched_keys.into_iter().collect();
1873            push(
1874                issues,
1875                Severity::Error,
1876                codes::INDEX_JSONL_STALE,
1877                jsonl_rel,
1878                None,
1879                Some(keys.join(",")),
1880                format!(
1881                    "`index.jsonl` record for `{}` is stale ({})",
1882                    path.display(),
1883                    keys.join(", ")
1884                ),
1885                Some("run `dbmd index rebuild`".into()),
1886                vec![path.clone()],
1887            );
1888        }
1889    }
1890    let _ = tf;
1891}
1892
1893/// Check an index's `scope:` frontmatter against its filesystem location.
1894fn check_index_scope(
1895    store: &Store,
1896    index_rel: &Path,
1897    expected_scope: &str,
1898    expected_folder: Option<&str>,
1899    issues: &mut Vec<Issue>,
1900) {
1901    let abs = store.root.join(index_rel);
1902    let Ok(text) = std::fs::read_to_string(&abs) else {
1903        return;
1904    };
1905    let Some((yaml, _, _)) = split_frontmatter(&text) else {
1906        return;
1907    };
1908    let Ok(Value::Mapping(map)) = serde_norway::from_str::<Value>(&yaml) else {
1909        return;
1910    };
1911    let fm = yaml_map_to_btree(&map);
1912
1913    if let Some(scope) = fm.get("scope").and_then(scalar_string) {
1914        // Accept "type-folder" and the SPEC example's looser "folder" alias.
1915        let scope_ok =
1916            scope == expected_scope || (expected_scope == "type-folder" && scope == "folder");
1917        if !scope_ok {
1918            push(
1919                issues,
1920                Severity::Warning,
1921                codes::INDEX_WRONG_SCOPE,
1922                index_rel,
1923                fm_key_line(&yaml, "scope"),
1924                Some("scope".into()),
1925                format!(
1926                    "index `scope: {scope}` doesn't match location (expected `{expected_scope}`)"
1927                ),
1928                Some(format!("set `scope: {expected_scope}`")),
1929                vec![],
1930            );
1931        }
1932    }
1933    // folder: must match for layer/type-folder indexes.
1934    if let Some(expected) = expected_folder {
1935        if let Some(folder) = fm.get("folder").and_then(scalar_string) {
1936            if folder.trim_end_matches('/') != expected.trim_end_matches('/') {
1937                push(
1938                    issues,
1939                    Severity::Warning,
1940                    codes::INDEX_WRONG_SCOPE,
1941                    index_rel,
1942                    fm_key_line(&yaml, "folder"),
1943                    Some("folder".into()),
1944                    format!("index `folder: {folder}` doesn't match location `{expected}`"),
1945                    Some(format!("set `folder: {expected}`")),
1946                    vec![],
1947                );
1948            }
1949        }
1950    }
1951}
1952
1953// ─────────────────────────────────────────────────────────────────────────────
1954//  Cross-file: log.md well-formedness + ordering (validate_all only)
1955// ─────────────────────────────────────────────────────────────────────────────
1956
1957/// `LOG_*` checks: bad timestamps, unknown kinds, out-of-order entries — across
1958/// the active `log.md` AND the rotated `log/<YYYY-MM>.md` archives.
1959///
1960/// [`Log::append`] rolls strictly-prior-month entries into `log/<YYYY-MM>.md`,
1961/// and `Log::tail`/`Log::since` deliberately read those archives back. If the
1962/// LOG_* checks read only the active file, an entry `validate --all` flagged
1963/// while it lived in `log.md` would stop being flagged the moment a newer-month
1964/// append rotated it into an archive — even though the log readers still surface
1965/// that exact entry to the curator. Scanning the archives too keeps validate and
1966/// the readers in agreement after a rotation.
1967///
1968/// Order: archives oldest-month first, then the active `log.md` last — the true
1969/// chronological timeline — so the out-of-order check threads `prev` across the
1970/// rotation boundary the same way it does within a single file.
1971fn check_log(store: &Store, issues: &mut Vec<Issue>) {
1972    let mut prev: Option<DateTime<FixedOffset>> = None;
1973    for rel in log_files_chronological(store) {
1974        check_log_file(store, &rel, &mut prev, issues);
1975    }
1976}
1977
1978/// The log files to scan, in chronological order: every `log/<YYYY-MM>.md`
1979/// archive oldest-month first, then the active `log.md` last. Missing files are
1980/// simply absent from the list.
1981fn log_files_chronological(store: &Store) -> Vec<PathBuf> {
1982    let mut files: Vec<PathBuf> = Vec::new();
1983    let archive_dir = store.root.join("log");
1984    if let Ok(entries) = std::fs::read_dir(&archive_dir) {
1985        let mut archives: Vec<PathBuf> = entries
1986            .flatten()
1987            .map(|e| e.path())
1988            .filter(|p| {
1989                p.is_file()
1990                    && p.file_name()
1991                        .and_then(|s| s.to_str())
1992                        .and_then(|n| n.strip_suffix(".md"))
1993                        .is_some_and(is_year_month_archive)
1994            })
1995            .filter_map(|p| p.strip_prefix(&store.root).ok().map(Path::to_path_buf))
1996            .collect();
1997        // `YYYY-MM` stems sort lexically == chronologically; oldest first.
1998        archives.sort();
1999        files.extend(archives);
2000    }
2001    // The active file holds the current month — newest, so it comes last.
2002    if store.root.join("log.md").is_file() {
2003        files.push(PathBuf::from("log.md"));
2004    }
2005    files
2006}
2007
2008/// Scan one log file's entry headers, threading the running `prev` timestamp so
2009/// the out-of-order check spans file (rotation) boundaries. Issues anchor to the
2010/// given store-relative path so an archived entry points at its archive file.
2011fn check_log_file(
2012    store: &Store,
2013    log_rel: &Path,
2014    prev: &mut Option<DateTime<FixedOffset>>,
2015    issues: &mut Vec<Issue>,
2016) {
2017    let abs = store.root.join(log_rel);
2018    let Ok(text) = std::fs::read_to_string(&abs) else {
2019        return;
2020    };
2021
2022    for (i, line) in text.lines().enumerate() {
2023        if !line.starts_with("## [") {
2024            continue;
2025        }
2026        let line_no = (i + 1) as u32;
2027        match parse_log_header(line) {
2028            None => push(
2029                issues,
2030                Severity::Error,
2031                codes::LOG_BAD_TIMESTAMP,
2032                log_rel,
2033                Some(line_no),
2034                None,
2035                format!("log entry header has an unparseable timestamp: {line:?}"),
2036                Some("use `## [YYYY-MM-DD HH:MM] <kind> | <object>`".into()),
2037                vec![],
2038            ),
2039            Some((ts, kind, _object)) => {
2040                if !RECOGNIZED_LOG_KINDS.contains(&kind.as_str()) {
2041                    push(
2042                        issues,
2043                        Severity::Warning,
2044                        codes::LOG_UNKNOWN_KIND,
2045                        log_rel,
2046                        Some(line_no),
2047                        None,
2048                        format!("log entry kind `{kind}` is not recognized"),
2049                        Some(format!("use one of: {}", RECOGNIZED_LOG_KINDS.join(", "))),
2050                        vec![],
2051                    );
2052                }
2053                if let Some(p) = *prev {
2054                    if ts < p {
2055                        push(
2056                            issues,
2057                            Severity::Warning,
2058                            codes::LOG_OUT_OF_ORDER,
2059                            log_rel,
2060                            Some(line_no),
2061                            None,
2062                            "log entry is older than the entry above it (possible rewrite)".into(),
2063                            Some("append corrective entries; never reorder past ones".into()),
2064                            vec![],
2065                        );
2066                    }
2067                }
2068                *prev = Some(ts);
2069            }
2070        }
2071    }
2072}
2073
2074// ─────────────────────────────────────────────────────────────────────────────
2075//  Self-contained primitives (collapse onto sibling modules once they land)
2076// ─────────────────────────────────────────────────────────────────────────────
2077
2078/// A minimal wiki-link found in a body: target, optional display, 1-based line.
2079#[derive(Debug)]
2080struct Link {
2081    target: String,
2082    line: u32,
2083}
2084
2085/// True if the store marker (`DB.md`, uppercase) is present at the root. On a
2086/// case-insensitive filesystem `db.md` would also match `DB.md`; we require the
2087/// exact-cased directory entry to be present.
2088fn store_marker_present(store: &Store) -> bool {
2089    let want = store.root.join("DB.md");
2090    if !want.is_file() {
2091        return false;
2092    }
2093    // Reject a case-folded match (`db.md`) on case-insensitive filesystems.
2094    match std::fs::read_dir(&store.root) {
2095        Ok(entries) => entries
2096            .flatten()
2097            .any(|e| e.file_name().to_str() == Some("DB.md")),
2098        Err(_) => true, // can't enumerate; trust the is_file() above
2099    }
2100}
2101
2102/// Validate the store's identity file, `DB.md`: its frontmatter `type:` must be
2103/// `db-md`, it must carry both `scope` and `owner`, and its body may contain
2104/// only the three recognized `##` sections (`Agent instructions`, `Policies`,
2105/// `Schemas`).
2106///
2107/// `DB.md` is not a content file (no `summary`), so it is checked here rather
2108/// than through `check_content_file`. The marker presence is established by the
2109/// caller (`store_marker_present`); a malformed-frontmatter `DB.md` still counts
2110/// as a store (the marker is the filename), so we report its shape rather than
2111/// `NOT_A_STORE`. Issues anchor to `DB.md` as the store-relative path.
2112fn check_db_md(store: &Store, issues: &mut Vec<Issue>) {
2113    let rel = Path::new("DB.md");
2114    let abs = store.root.join("DB.md");
2115    let Ok(text) = std::fs::read_to_string(&abs) else {
2116        return; // marker present but unreadable: nothing more to say.
2117    };
2118
2119    let Some((fm_yaml, body, fm_end_line)) = split_frontmatter(&text) else {
2120        // No frontmatter block at all → it cannot declare `type: db-md` and has
2121        // neither required field. Report the type and both missing fields,
2122        // anchored to line 1 (the would-be opening fence).
2123        push(
2124            issues,
2125            Severity::Error,
2126            codes::DB_MD_BAD_TYPE,
2127            rel,
2128            Some(1),
2129            Some("type".into()),
2130            "DB.md has no frontmatter; it must declare `type: db-md`".into(),
2131            Some("add a `---` frontmatter block with `type: db-md`".into()),
2132            vec![],
2133        );
2134        for field in ["scope", "owner"] {
2135            push(
2136                issues,
2137                Severity::Error,
2138                codes::DB_MD_MISSING_FIELD,
2139                rel,
2140                Some(1),
2141                Some(field.into()),
2142                format!("DB.md frontmatter is missing required field `{field}`"),
2143                Some(format!("add `{field}:` to the DB.md frontmatter")),
2144                vec![],
2145            );
2146        }
2147        return;
2148    };
2149
2150    // Parse the frontmatter mapping. If it doesn't parse, we can still say the
2151    // identity contract is unmet (no provable `type: db-md`, no provable fields).
2152    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
2153        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
2154        Ok(Value::Null) => Some(BTreeMap::new()),
2155        _ => None,
2156    };
2157
2158    match &fm {
2159        Some(map) => {
2160            // ── type: db-md ──────────────────────────────────────────────────
2161            let type_ = map.get("type").and_then(scalar_string);
2162            if type_.as_deref() != Some("db-md") {
2163                let (line, msg) = match &type_ {
2164                    Some(t) => (
2165                        fm_key_line(&fm_yaml, "type"),
2166                        format!("DB.md has `type: {t}`; a store's DB.md must be `type: db-md`"),
2167                    ),
2168                    None => (
2169                        Some(1),
2170                        "DB.md frontmatter has no `type:`; it must be `type: db-md`".to_string(),
2171                    ),
2172                };
2173                push(
2174                    issues,
2175                    Severity::Error,
2176                    codes::DB_MD_BAD_TYPE,
2177                    rel,
2178                    line,
2179                    Some("type".into()),
2180                    msg,
2181                    Some("set `type: db-md` in the DB.md frontmatter".into()),
2182                    vec![],
2183                );
2184            }
2185
2186            // ── required fields: scope + owner ───────────────────────────────
2187            for field in ["scope", "owner"] {
2188                let present = map
2189                    .get(field)
2190                    .and_then(scalar_string)
2191                    .map(|s| !s.trim().is_empty())
2192                    .unwrap_or(false);
2193                if !present {
2194                    push(
2195                        issues,
2196                        Severity::Error,
2197                        codes::DB_MD_MISSING_FIELD,
2198                        rel,
2199                        // A present-but-empty field anchors to its line; a fully
2200                        // absent one to the block top.
2201                        fm_key_line_or_top(&fm_yaml, field),
2202                        Some(field.into()),
2203                        format!("DB.md frontmatter is missing required field `{field}`"),
2204                        Some(format!("add `{field}:` to the DB.md frontmatter")),
2205                        vec![],
2206                    );
2207                }
2208            }
2209        }
2210        None => {
2211            // Unparseable frontmatter: the identity contract is unprovable. Emit
2212            // the type error and both field errors, anchored to the block top.
2213            push(
2214                issues,
2215                Severity::Error,
2216                codes::DB_MD_BAD_TYPE,
2217                rel,
2218                Some(1),
2219                Some("type".into()),
2220                "DB.md frontmatter isn't valid YAML; it must declare `type: db-md`".into(),
2221                Some("fix the DB.md frontmatter and set `type: db-md`".into()),
2222                vec![],
2223            );
2224            for field in ["scope", "owner"] {
2225                push(
2226                    issues,
2227                    Severity::Error,
2228                    codes::DB_MD_MISSING_FIELD,
2229                    rel,
2230                    Some(1),
2231                    Some(field.into()),
2232                    format!("DB.md frontmatter is missing required field `{field}`"),
2233                    Some(format!("add `{field}:` to the DB.md frontmatter")),
2234                    vec![],
2235                );
2236            }
2237        }
2238    }
2239
2240    // ── recognized `##` section headers only ─────────────────────────────────
2241    // The body's H2 headings must be one of the three the toolkit reads; any
2242    // other is a likely typo / misplacement (warning — the parser ignores it,
2243    // so the config is not corrupted, but the operator wrote a section that will
2244    // never be read). H3 sub-headings (Frozen pages, Ignored types, `### <type>`
2245    // schema blocks) live under their H2 and are not flagged here.
2246    for section in crate::parser::extract_sections(&body) {
2247        if section.level != 2 {
2248            continue;
2249        }
2250        let name = section.heading.trim().to_ascii_lowercase();
2251        if matches!(name.as_str(), "agent instructions" | "policies" | "schemas") {
2252            continue;
2253        }
2254        // `Section::line` is 1-based within the body; the body begins at file
2255        // line `fm_end_line + 1`.
2256        let file_line = fm_end_line + section.line;
2257        push(
2258            issues,
2259            Severity::Warning,
2260            codes::DB_MD_UNKNOWN_SECTION,
2261            rel,
2262            Some(file_line),
2263            None,
2264            format!(
2265                "DB.md has an unrecognized `## {}` section",
2266                section.heading.trim()
2267            ),
2268            Some(
2269                "DB.md sections are `## Agent instructions`, `## Policies`, `## Schemas` — \
2270                 remove or rename this heading"
2271                    .into(),
2272            ),
2273            vec![],
2274        );
2275    }
2276
2277    // ── `## Schemas` field-declaration lint ──────────────────────────────────
2278    // Without this, every schema misparse is silent: the operator/agent gets no
2279    // signal that DB.md is interpreting their schema differently from what they
2280    // wrote, and downstream records are validated against the degraded schema.
2281    check_db_md_schemas(store, rel, &body, fm_end_line, issues);
2282}
2283
2284/// Lint the parsed `## Schemas` field declarations: an empty field name, a
2285/// duplicate field name within a type, or an unrecognized modifier all parse
2286/// "successfully" into a degraded [`Schema`] today, so a bad declaration never
2287/// surfaces. The parsed schemas live in `store.config.schemas` (directives
2288/// already separated out); this pass reports the suspicious *field* shapes,
2289/// anchored to the `### <type>` heading line so the agent can find the block.
2290fn check_db_md_schemas(
2291    store: &Store,
2292    rel: &Path,
2293    body: &str,
2294    fm_end_line: u32,
2295    issues: &mut Vec<Issue>,
2296) {
2297    if store.config.schemas.is_empty() {
2298        return;
2299    }
2300
2301    // Map each `### <type>` heading (under `## Schemas`) to its file line, so a
2302    // per-type issue can anchor to the declaration block. `extract_sections`
2303    // returns a flat list with 1-based body lines; the body starts at file line
2304    // `fm_end_line + 1`.
2305    let mut type_line: BTreeMap<String, u32> = BTreeMap::new();
2306    let mut current_h2: Option<String> = None;
2307    for section in crate::parser::extract_sections(body) {
2308        match section.level {
2309            2 => current_h2 = Some(section.heading.trim().to_ascii_lowercase()),
2310            3 if current_h2.as_deref() == Some("schemas") => {
2311                // The H3 heading text (as written) is the type name — the same
2312                // key `parse_db_md` inserts into `config.schemas`.
2313                type_line
2314                    .entry(section.heading.trim().to_string())
2315                    .or_insert(fm_end_line + section.line);
2316            }
2317            _ => {}
2318        }
2319    }
2320
2321    for (type_name, schema) in &store.config.schemas {
2322        let line = type_line.get(type_name).copied();
2323        let mut seen: BTreeSet<String> = BTreeSet::new();
2324        for field in &schema.fields {
2325            let name = field.name.trim();
2326
2327            // Empty field name: a `- (string)` / bare `- ` bullet parses to a
2328            // nameless field that can never match a frontmatter key, so its
2329            // required/shape/enum constraints silently never apply.
2330            if name.is_empty() {
2331                push(
2332                    issues,
2333                    Severity::Warning,
2334                    codes::DB_MD_SCHEMA_FIELD,
2335                    rel,
2336                    line,
2337                    None,
2338                    format!("`### {type_name}` has a schema field bullet with no field name"),
2339                    Some(
2340                        "write each field as `- <name> (<modifiers>)`, e.g. `- email (required, email)`"
2341                            .into(),
2342                    ),
2343                    vec![],
2344                );
2345                continue;
2346            }
2347
2348            // Duplicate field name within a type: the second declaration's
2349            // constraints are interpreted independently of the first, so the
2350            // author's intent is ambiguous and likely wrong.
2351            if !seen.insert(name.to_string()) {
2352                push(
2353                    issues,
2354                    Severity::Warning,
2355                    codes::DB_MD_SCHEMA_FIELD,
2356                    rel,
2357                    line,
2358                    Some(name.to_string()),
2359                    format!("`### {type_name}` declares field `{name}` more than once"),
2360                    Some(
2361                        "remove the duplicate field bullet, or merge the modifiers onto one".into(),
2362                    ),
2363                    vec![],
2364                );
2365            }
2366
2367            // Unrecognized modifiers: the parser stashes anything outside the
2368            // known vocabulary (`required` / a shape / `link to …` / `default …`
2369            // / `enum: …`) in `unknown_modifiers`. Surface them as Info so a
2370            // typo'd modifier (`requierd`, `unqiue`) doesn't silently do nothing.
2371            for modifier in &field.unknown_modifiers {
2372                let modifier = modifier.trim();
2373                if modifier.is_empty() {
2374                    continue;
2375                }
2376                push(
2377                    issues,
2378                    Severity::Info,
2379                    codes::DB_MD_SCHEMA_FIELD,
2380                    rel,
2381                    line,
2382                    Some(name.to_string()),
2383                    format!(
2384                        "`### {type_name}` field `{name}` has an unrecognized modifier `{modifier}`"
2385                    ),
2386                    Some(
2387                        "recognized modifiers are `required`, a shape (`string`/`int`/`bool`/`date`/`email`/`currency`/`url`), `link to <prefix>/`, `default <value>`, `enum: <v1>, <v2>, …`"
2388                            .into(),
2389                    ),
2390                    vec![],
2391                );
2392            }
2393        }
2394    }
2395}
2396
2397/// The `NOT_A_STORE` issue for a root with no `DB.md`.
2398fn not_a_store_issue(store: &Store) -> Issue {
2399    Issue {
2400        severity: Severity::Error,
2401        code: codes::NOT_A_STORE,
2402        file: store.root.clone(),
2403        line: None,
2404        key: None,
2405        message: format!("{} has no DB.md; not a db.md store", store.root.display()),
2406        suggestion: Some("create a `DB.md` at the store root".into()),
2407        related: vec![],
2408    }
2409}
2410
2411/// True if a store-relative path is a content file: under `sources/`,
2412/// `records/`, or `wiki/` and not an `index.md`/`index.jsonl`/`log.md`.
2413fn is_content_file(rel: &Path) -> bool {
2414    let Some(first) = rel.iter().next().and_then(|s| s.to_str()) else {
2415        return false;
2416    };
2417    if !matches!(first, "sources" | "records") {
2418        return false;
2419    }
2420    let name = rel.file_name().and_then(|s| s.to_str()).unwrap_or("");
2421    // Only the derived catalog twins are meta INSIDE a layer. `DB.md` / `log.md`
2422    // are reserved meta only at the store ROOT, which the `first` layer check
2423    // above already excludes — so a content file named `log.md` / `DB.md` inside
2424    // a layer (e.g. `records/docs/log.md`) is real content, consistent with
2425    // `Store::walk`.
2426    if matches!(name, "index.md" | "index.jsonl") {
2427        return false;
2428    }
2429    name.ends_with(".md")
2430}
2431
2432/// True for the store's ROOT append-only meta files (`DB.md` / `log.md`): a
2433/// single-component store-relative path whose name is one of those two. An
2434/// in-layer `records/docs/log.md` is real content (multiple components), not a
2435/// root meta file. These reach `check_content_file` only via the working-set
2436/// incoming-linker scan; their bodies are deliberately not link-checked there
2437/// because `validate --all` doesn't link-check them either.
2438fn is_root_meta_file(rel: &Path) -> bool {
2439    let mut comps = rel.components();
2440    let Some(Component::Normal(only)) = comps.next() else {
2441        return false;
2442    };
2443    if comps.next().is_some() {
2444        return false; // has a parent dir → not a root file
2445    }
2446    matches!(only.to_str(), Some("DB.md") | Some("log.md"))
2447}
2448
2449/// Split a file into `(frontmatter_yaml, body, closing_fence_line)`. The block
2450/// must start at the very first line with `---` and end at the next `---`.
2451/// Returns `None` if there's no leading frontmatter block.
2452fn split_frontmatter(text: &str) -> Option<(String, String, u32)> {
2453    // Tolerate a single leading UTF-8 BOM, matching parser/store/index (which
2454    // already strip it). Without this, a BOM-prefixed file is read as having no
2455    // frontmatter here while the catalog still indexes it — so validate would
2456    // silently skip frontmatter checks on a file the rest of the toolkit sees.
2457    let text = text.strip_prefix('\u{feff}').unwrap_or(text);
2458    let mut lines = text.lines();
2459    let first = lines.next()?;
2460    if first.trim_end() != "---" {
2461        return None;
2462    }
2463    let mut yaml = String::new();
2464    let mut close_line: Option<u32> = None;
2465    // line 1 is the opening fence; YAML starts at line 2.
2466    let mut current = 1u32;
2467    for line in lines {
2468        current += 1;
2469        if line.trim_end() == "---" {
2470            close_line = Some(current);
2471            break;
2472        }
2473        yaml.push_str(line);
2474        yaml.push('\n');
2475    }
2476    let close_line = close_line?;
2477    // Body = everything after the closing fence.
2478    let body: String = text
2479        .lines()
2480        .skip(close_line as usize)
2481        .collect::<Vec<_>>()
2482        .join("\n");
2483    Some((yaml, body, close_line))
2484}
2485
2486/// Read just the `summary` field of a file, or `None` if absent/unparseable.
2487fn read_summary(abs: &Path) -> Option<String> {
2488    let text = std::fs::read_to_string(abs).ok()?;
2489    let (yaml, _, _) = split_frontmatter(&text)?;
2490    let value: Value = serde_norway::from_str(&yaml).ok()?;
2491    if let Value::Mapping(m) = value {
2492        m.get(Value::String("summary".into()))
2493            .and_then(scalar_string)
2494    } else {
2495        None
2496    }
2497}
2498
2499/// Convert a `serde_norway` mapping into a string-keyed [`BTreeMap`], dropping
2500/// non-string keys (frontmatter keys are always strings).
2501fn yaml_map_to_btree(map: &serde_norway::Mapping) -> BTreeMap<String, Value> {
2502    let mut out = BTreeMap::new();
2503    for (k, v) in map {
2504        if let Value::String(s) = k {
2505            out.insert(s.clone(), v.clone());
2506        }
2507    }
2508    out
2509}
2510
2511/// A scalar YAML value as a string (`String`/`Number`/`Bool`); `None` for
2512/// sequences/mappings/null.
2513fn scalar_string(v: &Value) -> Option<String> {
2514    match v {
2515        Value::String(s) => Some(s.clone()),
2516        Value::Number(n) => Some(n.to_string()),
2517        Value::Bool(b) => Some(b.to_string()),
2518        _ => None,
2519    }
2520}
2521
2522/// True if a frontmatter value carries no content for a *required*-field check:
2523/// a YAML `null` (`name:`), an empty sequence (`name: []`), an empty mapping
2524/// (`name: {}`), or a blank/whitespace-only scalar (`name: ""`). A non-empty
2525/// list or mapping is NOT treated as empty here — a structurally-wrong value on
2526/// a shape/enum field is caught by the later non-scalar shape check, not by the
2527/// required-presence check.
2528fn is_empty_value(v: &Value) -> bool {
2529    match v {
2530        Value::Null => true,
2531        Value::Sequence(items) => items.is_empty(),
2532        Value::Mapping(map) => map.is_empty(),
2533        other => scalar_string(other)
2534            .map(|s| s.trim().is_empty())
2535            .unwrap_or(true),
2536    }
2537}
2538
2539/// True if `tags` is a flat YAML sequence of scalars. A mapping, a scalar, or a
2540/// sequence containing a nested sequence/mapping → false (`TAGS_MALFORMED`).
2541fn is_flat_scalar_list(v: &Value) -> bool {
2542    match v {
2543        Value::Sequence(items) => items.iter().all(|it| scalar_string(it).is_some()),
2544        _ => false,
2545    }
2546}
2547
2548/// Extract every frontmatter wiki-link, returning `(key, Link)` pairs with the
2549/// link's 1-based file line. **Text-based, by necessity:** an unquoted
2550/// `company: [[records/companies/x]]` parses in YAML as a nested *sequence*, not
2551/// a string (because `[[x]]` is YAML flow-list-in-a-list); a quoted
2552/// `"[[...]]"` parses as a string. Scanning the raw frontmatter text catches
2553/// both forms uniformly, the way the link textually appears — the doctrine view.
2554///
2555/// `fm_start_line` is the file line of the first YAML line (file line 2, since
2556/// line 1 is the opening `---`), so the returned `Link::line` is absolute.
2557fn frontmatter_link_fields_text(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, Link)> {
2558    let mut out = Vec::new();
2559    for (key, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2560        for link in links {
2561            out.push((key.clone(), link));
2562        }
2563    }
2564    out
2565}
2566
2567/// The wiki-link targets declared under a single top-level frontmatter key
2568/// (text-based; handles quoted + unquoted forms). Empty if the key is absent or
2569/// carries no `[[...]]`.
2570fn frontmatter_links_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Vec<Link> {
2571    for (k, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2572        if k == key {
2573            return links;
2574        }
2575    }
2576    Vec::new()
2577}
2578
2579/// The raw value text under a single top-level frontmatter key (the remainder of
2580/// the key line plus any indented continuation/sequence lines), trimmed. Used to
2581/// decide whether a `link to` field holds a plain string vs. a wiki-link.
2582fn frontmatter_raw_value_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Option<String> {
2583    for (k, value_text, _links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2584        if k == key {
2585            return Some(value_text);
2586        }
2587    }
2588    None
2589}
2590
2591/// Split a frontmatter YAML block into `(key, raw_value_text, wiki_links)` for
2592/// each top-level key. A top-level key is a line with no leading indentation in
2593/// `name:` form; its value spans the rest of that line plus any deeper-indented
2594/// continuation lines (block scalars, block sequences) until the next top-level
2595/// key. Wiki-links are every `[[...]]` found anywhere in that span, with their
2596/// absolute file line.
2597fn frontmatter_key_blocks(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, String, Vec<Link>)> {
2598    let mut blocks: Vec<(String, String, Vec<Link>)> = Vec::new();
2599    let mut current: Option<(String, String, Vec<Link>)> = None;
2600
2601    for (idx, raw_line) in fm_yaml.lines().enumerate() {
2602        let file_line = fm_start_line + idx as u32;
2603        let indented = raw_line.starts_with(' ') || raw_line.starts_with('\t');
2604        let trimmed = raw_line.trim();
2605
2606        // A new top-level key: no indentation, `name:` prefix, not a list dash or
2607        // comment. (Indented or dash lines belong to the current key's value.)
2608        let new_key = if !indented && !trimmed.starts_with('#') && !trimmed.starts_with('-') {
2609            top_level_key(raw_line)
2610        } else {
2611            None
2612        };
2613
2614        if let Some((key, after)) = new_key {
2615            if let Some(done) = current.take() {
2616                blocks.push(done);
2617            }
2618            let mut links = Vec::new();
2619            collect_line_links(after, file_line, &mut links);
2620            current = Some((key, after.trim().to_string(), links));
2621        } else if let Some((_k, value_text, links)) = current.as_mut() {
2622            // Continuation of the current key's value (indented or dash line).
2623            if !value_text.is_empty() {
2624                value_text.push('\n');
2625            }
2626            value_text.push_str(trimmed);
2627            collect_line_links(raw_line, file_line, links);
2628        }
2629    }
2630    if let Some(done) = current.take() {
2631        blocks.push(done);
2632    }
2633    blocks
2634}
2635
2636/// Parse a top-level frontmatter key line into `(key, value_after_colon)`.
2637/// `None` if the line isn't a `name:` mapping entry.
2638fn top_level_key(line: &str) -> Option<(String, &str)> {
2639    let (key, rest) = line.split_once(':')?;
2640    let key = key.trim();
2641    if key.is_empty()
2642        || !key
2643            .chars()
2644            .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
2645    {
2646        return None;
2647    }
2648    Some((key.to_string(), rest))
2649}
2650
2651/// Append every `[[target]]` / `[[target|display]]` found in `s` to `links`,
2652/// each tagged with `file_line`.
2653fn collect_line_links(s: &str, file_line: u32, links: &mut Vec<Link>) {
2654    let bytes = s.as_bytes();
2655    let mut i = 0;
2656    while i + 1 < bytes.len() {
2657        if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2658            if let Some(close) = s[i + 2..].find("]]") {
2659                let inner = &s[i + 2..i + 2 + close];
2660                // Guard against `[[[` (nested) double-counting: the inner must
2661                // not itself open another `[[`.
2662                let target = inner
2663                    .trim_start_matches('[')
2664                    .split('|')
2665                    .next()
2666                    .unwrap_or(inner)
2667                    .trim()
2668                    .to_string();
2669                if !target.is_empty() {
2670                    links.push(Link {
2671                        target,
2672                        line: file_line,
2673                    });
2674                }
2675                i = i + 2 + close + 2;
2676                continue;
2677            }
2678        }
2679        i += 1;
2680    }
2681}
2682
2683/// Extract every `[[...]]` wiki-link from a body, with 1-based line numbers.
2684/// Skips fenced code blocks, so example links in docs don't trip the validator.
2685///
2686/// Fence tracking matches the toolkit's parser ([`crate::parser`]'s
2687/// `extract_sections`): an open fence is `(fence char, run length)` and closes
2688/// only on a line that is the **same** fence character with a run **at least as
2689/// long**. A naive "toggle a bool on any ``` or ~~~ line" inverts the state when
2690/// a `~~~` block legally contains a ```` ``` ```` line (the standard way to
2691/// document a backtick fence) — the inner backtick line would flip `in_fence`
2692/// off and the demo `[[…]]` inside the code block would be checked as a live
2693/// link, falsely flagging a legal store.
2694fn extract_wiki_links(body: &str) -> Vec<Link> {
2695    let mut out = Vec::new();
2696    let mut fence: Option<(u8, usize)> = None;
2697    for (idx, line) in body.lines().enumerate() {
2698        let content = line.trim_end_matches('\r');
2699        if let Some(f) = fence {
2700            // Inside a fence: the only thing that matters is whether THIS line
2701            // closes it (matching char, run ≥ the opening run). Everything else
2702            // is opaque code — no link extraction.
2703            if fence_closes(content, f) {
2704                fence = None;
2705            }
2706            continue;
2707        }
2708        if let Some(opened) = fence_opens(content) {
2709            fence = Some(opened);
2710            continue;
2711        }
2712        let line_no = (idx + 1) as u32;
2713        let bytes = line.as_bytes();
2714        let mut i = 0;
2715        while i + 1 < bytes.len() {
2716            if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2717                if let Some(close) = line[i + 2..].find("]]") {
2718                    let inner = &line[i + 2..i + 2 + close];
2719                    let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
2720                    // Skip a triple-bracket `[[[…` opening: the inner content
2721                    // starts with `[`, so this is the rejected flow-form list
2722                    // mis-encoding (`[[[a]], [[b]]]`), not a real wiki-link. A
2723                    // legitimate target never starts with `[`. The frontmatter
2724                    // `WIKI_LINK_FLOW_FORM_LIST` check already owns that error;
2725                    // extracting a bogus body link here would double-report it as
2726                    // a spurious `WIKI_LINK_SHORT_FORM`.
2727                    if !target.is_empty() && !target.starts_with('[') {
2728                        out.push(Link {
2729                            target,
2730                            line: line_no,
2731                        });
2732                    }
2733                    i = i + 2 + close + 2;
2734                    continue;
2735                }
2736            }
2737            i += 1;
2738        }
2739    }
2740    out
2741}
2742
2743/// If `line` opens a fenced code block, return `(fence byte, run length)`. A
2744/// local mirror of the parser's `opening_fence` so the validator's fence
2745/// tracking matches the rest of the toolkit: a fence is ``` ``` ``` or `~~~`
2746/// (run ≥ 3) at ≤ 3 spaces of indent, and a backtick fence's info string may
2747/// not itself contain a backtick.
2748fn fence_opens(line: &str) -> Option<(u8, usize)> {
2749    let indent = line.len() - line.trim_start_matches(' ').len();
2750    if indent > 3 {
2751        return None;
2752    }
2753    let rest = &line[indent..];
2754    let byte = rest.bytes().next()?;
2755    if byte != b'`' && byte != b'~' {
2756        return None;
2757    }
2758    let run = rest.len() - rest.trim_start_matches(byte as char).len();
2759    if run < 3 {
2760        return None;
2761    }
2762    // A backtick fence's info string may not itself contain a backtick.
2763    if byte == b'`' && rest[run..].contains('`') {
2764        return None;
2765    }
2766    Some((byte, run))
2767}
2768
2769/// True if `line` closes the currently open `fence`: same char, run at least as
2770/// long, nothing but trailing whitespace after. Local mirror of the parser's
2771/// `is_closing_fence` — so an inner fence of the *other* character (a ``` ``` ```
2772/// line inside a `~~~` block) does NOT close the outer fence.
2773fn fence_closes(line: &str, fence: (u8, usize)) -> bool {
2774    let (byte, open_len) = fence;
2775    let indent = line.len() - line.trim_start_matches(' ').len();
2776    if indent > 3 {
2777        return false;
2778    }
2779    let rest = &line[indent..];
2780    let run = rest.len() - rest.trim_start_matches(byte as char).len();
2781    if run < open_len {
2782        return false;
2783    }
2784    rest[run..].trim().is_empty()
2785}
2786
2787/// Detect the frontmatter INLINE flow-form wiki-link-list mis-encoding —
2788/// `attendees: [[[a]], [[b]]]` — and return the offending keys.
2789///
2790/// **Scoped to the inline value on the key line.** The SPEC's canonical
2791/// list-of-links form is the *unquoted YAML block sequence* (`- [[a]]` per
2792/// indented line), which is explicitly correct (SPEC § Linking) and MUST NOT be
2793/// flagged — even though, parsed whole, it nests the same way the rejected
2794/// inline flow form does. So this check looks only at the value written *inline*
2795/// after the colon: if it opens a flow sequence (`[…]`) whose parsed shape is a
2796/// nested sequence (a list whose items are themselves lists — the wiki-link-list
2797/// mis-encoding), it is flagged. A key with no inline value (the block form,
2798/// whose items live on continuation lines) is never inspected here.
2799///
2800/// Parsing the inline value (rather than a literal `starts_with("[[[")` text
2801/// test) is what catches the whitespace variant `attendees: [ [[a]] ]`, which
2802/// encodes the identical nested sequence but evaded the old prefix match.
2803fn detect_flow_form_link_lists(fm_yaml: &str) -> Vec<String> {
2804    let mut out = Vec::new();
2805    for line in fm_yaml.lines() {
2806        // Top-level key lines only (no indentation, not a comment or list dash).
2807        if line.starts_with(' ') || line.starts_with('\t') {
2808            continue;
2809        }
2810        let Some((key, rest)) = line.split_once(':') else {
2811            continue;
2812        };
2813        let key = key.trim();
2814        if key.is_empty()
2815            || key.starts_with('#')
2816            || key.starts_with('-')
2817            || !key
2818                .chars()
2819                .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
2820        {
2821            continue;
2822        }
2823        let rest = rest.trim();
2824        // Only an inline flow sequence (`[…]`) on the key line is a candidate;
2825        // the unquoted block form has an empty inline value and is never flagged.
2826        if !rest.starts_with('[') {
2827            continue;
2828        }
2829        // Parse just the inline value and test its shape: a list whose items are
2830        // themselves lists is the wiki-link-list mis-encoding (`[[[a]]]` parses
2831        // to `Seq[Seq[Seq[String]]]`; the scalar inline link `[[a]]` is only
2832        // `Seq[Seq[String]]` and is NOT flagged).
2833        if let Ok(Value::Sequence(items)) = serde_norway::from_str::<Value>(rest) {
2834            let nested = items.iter().any(|item| match item {
2835                Value::Sequence(inner) => inner.iter().any(|x| matches!(x, Value::Sequence(_))),
2836                _ => false,
2837            });
2838            if nested {
2839                out.push(key.to_string());
2840            }
2841        }
2842    }
2843    out
2844}
2845
2846/// True if a bare target (no `.md`) is a full store-relative path: it contains a
2847/// `/` and its first segment is a known layer.
2848fn is_full_store_path(bare: &str) -> bool {
2849    let mut parts = bare.splitn(2, '/');
2850    let first = parts.next().unwrap_or("");
2851    let has_rest = parts.next().map(|r| !r.is_empty()).unwrap_or(false);
2852    matches!(first, "sources" | "records") && has_rest
2853}
2854
2855/// True if a path contains only normal relative components. Validator inputs
2856/// come from user-authored markdown/JSON sidecars; never let absolute paths,
2857/// platform prefixes, or `..` turn a validation probe into a filesystem escape.
2858fn is_safe_store_relative_path(path: &Path) -> bool {
2859    let mut saw_component = false;
2860    for component in path.components() {
2861        match component {
2862            Component::Normal(_) => saw_component = true,
2863            Component::CurDir => {}
2864            Component::ParentDir | Component::RootDir | Component::Prefix(_) => return false,
2865        }
2866    }
2867    saw_component
2868}
2869
2870fn safe_md_target_rel(bare: &str) -> Option<PathBuf> {
2871    let path = Path::new(bare);
2872    if !is_safe_store_relative_path(path) {
2873        return None;
2874    }
2875    Some(PathBuf::from(format!("{bare}.md")))
2876}
2877
2878/// How a wiki-link / index-entry target resolves on disk.
2879enum TargetResolution {
2880    /// The target exists (either as the literal path or with a `.md` suffix).
2881    Exists,
2882    /// The target is a safe store-relative path but no file exists for it.
2883    Missing,
2884    /// The target escapes the store (absolute, `..`, prefix) — never probe it.
2885    Unsafe,
2886}
2887
2888/// Resolve a bare wiki-link / index-entry target the way the graph engine does
2889/// ([`crate::graph`]'s `resolve_existing`): try the path **as written** first
2890/// (so a link to a raw non-`.md` source file kept verbatim under `sources/` —
2891/// `[[sources/emails/x.eml]]`, `[[sources/contracts/y.pdf]]` — resolves to the
2892/// real file), then the `.md`-appended path (the common case for content
2893/// pages). Without trying the literal path first, a legal link to a raw source
2894/// file is wrongly flagged `WIKI_LINK_BROKEN` even though `graph backlinks`
2895/// resolves it.
2896fn resolve_wiki_target(store: &Store, bare: &str) -> TargetResolution {
2897    // The literal path and the `.md`-appended path share the same safety check
2898    // (`safe_md_target_rel` only differs by appending `.md`), so an unsafe bare
2899    // target is unsafe in both forms.
2900    if !is_safe_store_relative_path(Path::new(bare)) {
2901        return TargetResolution::Unsafe;
2902    }
2903    match resolved_target_abs(store, bare) {
2904        Some(_) => TargetResolution::Exists,
2905        None => TargetResolution::Missing,
2906    }
2907}
2908
2909/// The absolute on-disk path a bare wiki-link / index-entry target resolves to,
2910/// trying the literal path first, then `.md`-appended — mirroring the graph
2911/// engine. `None` when neither exists, or when the bare target escapes the store
2912/// (callers that need to distinguish unsafe from merely-missing use
2913/// [`resolve_wiki_target`]).
2914fn resolved_target_abs(store: &Store, bare: &str) -> Option<PathBuf> {
2915    if !is_safe_store_relative_path(Path::new(bare)) {
2916        return None;
2917    }
2918    // The literal path, as written (e.g. an `.eml`/`.pdf` source file kept
2919    // verbatim under `sources/`).
2920    let literal = store.root.join(bare);
2921    if literal.is_file() {
2922        return Some(literal);
2923    }
2924    // The `.md`-appended path (a content page referenced without its extension).
2925    let with_md = store.root.join(format!("{bare}.md"));
2926    if with_md.is_file() {
2927        return Some(with_md);
2928    }
2929    None
2930}
2931
2932/// True if a bare target path is under `prefix` (both `.md`-stripped).
2933fn path_under_prefix(bare: &str, prefix: &str) -> bool {
2934    let prefix = prefix.trim_end_matches('/');
2935    bare == prefix || bare.starts_with(&format!("{prefix}/"))
2936}
2937
2938/// The type-folder for a store-relative content path: `<layer>/<type-folder>`
2939/// (the folder directly under the layer; date-shards roll up to it). `None` for
2940/// files directly in a layer folder or outside the three layers.
2941fn type_folder_of(rel: &Path) -> Option<PathBuf> {
2942    let comps: Vec<&str> = rel.iter().filter_map(|s| s.to_str()).collect();
2943    if comps.len() < 3 {
2944        return None; // need layer/type-folder/file at minimum
2945    }
2946    if !matches!(comps[0], "sources" | "records") {
2947        return None;
2948    }
2949    Some(PathBuf::from(comps[0]).join(comps[1]))
2950}
2951
2952/// **SWEEP.** Walk every `.md` content file under `sources/`/`records/`/`wiki/`,
2953/// returning store-relative paths to be parsed in full. Skips hidden dirs and
2954/// the index twin (`index.jsonl`). Used only by `validate_all`; the working-set
2955/// incoming-linker scan rides the embedded-ripgrep `Store::find_links_to_any`
2956/// (a single presence-only pass), so the loop default never walks-and-*parses*
2957/// the whole content tree.
2958///
2959/// **`log/` is NOT pruned here.** Only the *root-level* `log/` rotation archive
2960/// is reserved (`Store::is_in_log_dir` checks only the first path component);
2961/// the walk roots are the three layers, so the root archive is already out of
2962/// scope. A `log`-named folder *inside* a layer (e.g. `records/log/` — a
2963/// decision log) is real content (see `is_content_file`), so pruning every
2964/// `name == "log"` made `--all` silently skip those files — reporting fewer
2965/// errors than the default working-set scope on the same store.
2966fn walk_content_files(root: &Path) -> Vec<PathBuf> {
2967    let mut out = Vec::new();
2968    for layer in ["sources", "records"] {
2969        let base = root.join(layer);
2970        if !base.is_dir() {
2971            continue;
2972        }
2973        for entry in walkdir::WalkDir::new(&base)
2974            .into_iter()
2975            .filter_entry(|e| {
2976                let name = e.file_name().to_str().unwrap_or("");
2977                !name.starts_with('.')
2978            })
2979            .flatten()
2980        {
2981            if !entry.file_type().is_file() {
2982                continue;
2983            }
2984            let name = entry.file_name().to_str().unwrap_or("");
2985            if name.ends_with(".md") && name != "index.md" {
2986                if let Ok(rel) = entry.path().strip_prefix(root) {
2987                    out.push(rel.to_path_buf());
2988                }
2989            }
2990        }
2991    }
2992    out.sort();
2993    out
2994}
2995
2996/// Every `index.md` under the store (root + layers + type-folders), as
2997/// store-relative paths. Used to detect orphan indexes. Like
2998/// [`walk_content_files`], a `log`-named folder *inside* a layer is real content
2999/// and its `index.md` is not pruned (only the root-level `log/` archive is
3000/// reserved, and the walk roots are the three layers, so it is already
3001/// out of scope).
3002fn walk_index_files(root: &Path) -> Vec<PathBuf> {
3003    let mut out = Vec::new();
3004    if root.join("index.md").is_file() {
3005        out.push(PathBuf::from("index.md"));
3006    }
3007    for layer in ["sources", "records"] {
3008        let base = root.join(layer);
3009        if !base.is_dir() {
3010            continue;
3011        }
3012        for entry in walkdir::WalkDir::new(&base)
3013            .into_iter()
3014            .filter_entry(|e| {
3015                let name = e.file_name().to_str().unwrap_or("");
3016                !name.starts_with('.')
3017            })
3018            .flatten()
3019        {
3020            if entry.file_type().is_file() && entry.file_name().to_str() == Some("index.md") {
3021                if let Ok(rel) = entry.path().strip_prefix(root) {
3022                    out.push(rel.to_path_buf());
3023                }
3024            }
3025        }
3026    }
3027    out.sort();
3028    out
3029}
3030
3031/// A parsed `index.md` entry line: the wiki-link target, the optional summary
3032/// text after the `—`, and the 1-based line number.
3033struct IndexEntry {
3034    target: String,
3035    summary_text: Option<String>,
3036    line: u32,
3037}
3038
3039/// Parse the `- [[<path>]] — <summary>` entry lines of an `index.md`. Stops at a
3040/// `## More` footer (those lines aren't file entries). Root/layer entries with a
3041/// `|display` segment and a `(N)` count are parsed too — the target is the bare
3042/// path, the summary text is whatever follows the em dash.
3043fn parse_index_entries(text: &str) -> Vec<IndexEntry> {
3044    let mut out = Vec::new();
3045    let mut in_more = false;
3046    for (idx, line) in text.lines().enumerate() {
3047        let trimmed = line.trim_start();
3048        if trimmed.starts_with("## More") {
3049            in_more = true;
3050            continue;
3051        }
3052        if in_more {
3053            continue;
3054        }
3055        if !trimmed.starts_with("- ") {
3056            continue;
3057        }
3058        // Find the first `[[...]]`.
3059        let Some(open) = trimmed.find("[[") else {
3060            continue;
3061        };
3062        let Some(close_rel) = trimmed[open + 2..].find("]]") else {
3063            continue;
3064        };
3065        let inner = &trimmed[open + 2..open + 2 + close_rel];
3066        let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
3067
3068        // Summary text: whatever follows the first em dash (`—`) or ` - `.
3069        let after = &trimmed[open + 2 + close_rel + 2..];
3070        let summary_text = extract_index_entry_summary(after);
3071
3072        out.push(IndexEntry {
3073            target,
3074            summary_text,
3075            line: (idx + 1) as u32,
3076        });
3077    }
3078    out
3079}
3080
3081/// Pull the summary portion out of the text trailing an index entry's
3082/// wiki-link: drop a leading `(N files)` count, then the `—`/`-` separator, then
3083/// strip a trailing `  ·  #tag` suffix **only when it is a genuine tag block**
3084/// (so a literal `·` inside the summary text is preserved, not mistaken for the
3085/// renderer's tag separator).
3086fn extract_index_entry_summary(after: &str) -> Option<String> {
3087    let mut s = after.trim();
3088    // Drop a leading "(N ...)" count segment, if present.
3089    if s.starts_with('(') {
3090        if let Some(close) = s.find(')') {
3091            s = s[close + 1..].trim_start();
3092        }
3093    }
3094    // Require an em dash or hyphen separator before the summary.
3095    let s = if let Some(rest) = s.strip_prefix('—') {
3096        rest.trim()
3097    } else if let Some(rest) = s.strip_prefix('-') {
3098        rest.trim()
3099    } else {
3100        return None;
3101    };
3102    if s.is_empty() {
3103        return None;
3104    }
3105    // Strip a trailing tag block — but ONLY when it matches the EXACT delimiter
3106    // the renderer emits: `  ·  #tag #tag` (a *double*-spaced middot, per
3107    // `crate::index::format_md_entry`'s `format!("  ·  {tags}")`), dropped when
3108    // the file has no tags. The previous code also accepted a *single*-spaced
3109    // ` · ` separator, which collided with a legal summary whose own text ends
3110    // in a single-spaced middot-plus-hashtag tail — e.g. a tagless file with
3111    // `summary: "Standup notes · #standup"`. The renderer round-trips that
3112    // summary verbatim (no tag block, since there are no tags), but the loose
3113    // strip mistook the ` · #standup` for the renderer's tag suffix, compared
3114    // `"Standup notes"` against the file's full summary, and emitted a spurious
3115    // `INDEX_SUMMARY_MISMATCH` that `dbmd index rebuild` could never fix
3116    // (rebuild regenerates the identical line). Matching the renderer's exact
3117    // double-spaced delimiter makes the comparison round-trip. `rsplit_once`
3118    // matches from the right so only the real trailing tag block is considered.
3119    let s = match s.rsplit_once("  ·  ") {
3120        Some((summary, tags)) if is_tag_suffix(tags) => summary.trim(),
3121        _ => s,
3122    };
3123    Some(s.to_string())
3124}
3125
3126/// True if `s` is a non-empty tag block: one or more whitespace-separated tokens
3127/// each starting with `#`, the exact shape the index renderer appends after the
3128/// `·` separator (`crate::index::format_md_entry`). Used to distinguish the
3129/// renderer's `  ·  #tag` suffix from a literal `·` inside the summary text.
3130fn is_tag_suffix(s: &str) -> bool {
3131    let mut any = false;
3132    for tok in s.split_whitespace() {
3133        if !tok.starts_with('#') || tok.len() < 2 {
3134            return false;
3135        }
3136        any = true;
3137    }
3138    any
3139}
3140
3141/// Parse a `log.md` entry header `## [YYYY-MM-DD HH:MM] <kind> | <object>`.
3142/// Returns `(timestamp, kind, object)`; `None` if the timestamp is unparseable
3143/// or the header isn't well-formed.
3144fn parse_log_header(line: &str) -> Option<(DateTime<FixedOffset>, String, Option<String>)> {
3145    let rest = line.strip_prefix("## [")?;
3146    let close = rest.find(']')?;
3147    let ts_str = &rest[..close];
3148    let tail = rest[close + 1..].trim();
3149
3150    // Parse `YYYY-MM-DD HH:MM` (the SPEC header form) as a naive local time and
3151    // attach a zero offset — the log header carries minute precision, no zone.
3152    let naive = NaiveDateTime::parse_from_str(ts_str.trim(), "%Y-%m-%d %H:%M").ok()?;
3153    let offset = FixedOffset::east_opt(0)?;
3154    let ts = naive.and_local_timezone(offset).single()?;
3155
3156    // kind | object
3157    let (kind, object) = match tail.split_once('|') {
3158        Some((k, o)) => {
3159            let o = o.trim();
3160            (
3161                k.trim().to_string(),
3162                if o.is_empty() {
3163                    None
3164                } else {
3165                    Some(o.to_string())
3166                },
3167            )
3168        }
3169        None => (tail.to_string(), None),
3170    };
3171    if kind.is_empty() {
3172        return None;
3173    }
3174    Some((ts, kind, object))
3175}
3176
3177/// Every log file that holds entries for the working-set scan: the active
3178/// `log.md` plus every `log/<YYYY-MM>.md` archive. [`Log::append`] rotates
3179/// strictly-prior-month entries into the archives, so the active file alone is
3180/// NOT the full timeline — both the last `validate` cutoff and a changed-but-
3181/// unvalidated object can live in an archive after a month rollover. Reading the
3182/// archives here keeps the working-set readers in sync with the rest of the log
3183/// layer (`Log::since`/`Log::tail`), which deliberately cross archives, and
3184/// prevents `dbmd validate` from silently skipping archived changed files. Reads
3185/// only log headers, never the content store, so the loop budget is preserved.
3186fn log_files_for_working_set(store: &Store) -> Vec<PathBuf> {
3187    let mut files = vec![store.root.join("log.md")];
3188    let archive_dir = store.root.join("log");
3189    if let Ok(entries) = std::fs::read_dir(&archive_dir) {
3190        let mut archives: Vec<PathBuf> = entries
3191            .flatten()
3192            .map(|e| e.path())
3193            .filter(|p| {
3194                p.is_file()
3195                    && p.file_name()
3196                        .and_then(|s| s.to_str())
3197                        .and_then(|n| n.strip_suffix(".md"))
3198                        .is_some_and(is_year_month_archive)
3199            })
3200            .collect();
3201        // Deterministic order (oldest month first); the callers fold across all
3202        // files so order doesn't affect the result, but a stable order keeps the
3203        // scan reproducible.
3204        archives.sort();
3205        files.extend(archives);
3206    }
3207    files
3208}
3209
3210/// True if `s` looks like a `YYYY-MM` archive stem (4 digits, `-`, 2 digits) —
3211/// the `log/<YYYY-MM>.md` naming the rotation in [`crate::log`] emits.
3212fn is_year_month_archive(s: &str) -> bool {
3213    let b = s.as_bytes();
3214    b.len() == 7
3215        && b[..4].iter().all(u8::is_ascii_digit)
3216        && b[4] == b'-'
3217        && b[5..7].iter().all(u8::is_ascii_digit)
3218}
3219
3220/// The timestamp of the most recent `validate` entry across the active `log.md`
3221/// **and** the `log/<YYYY-MM>.md` archives — the default working-set cutoff.
3222/// Reads only headers; never the whole store. Archive-aware so a `validate`
3223/// entry that rotated into an archive after a month rollover still anchors the
3224/// cutoff (without this, the cutoff silently resets to `None`).
3225fn last_validate_at(store: &Store) -> Option<DateTime<FixedOffset>> {
3226    let mut latest: Option<DateTime<FixedOffset>> = None;
3227    for file in log_files_for_working_set(store) {
3228        let Ok(text) = std::fs::read_to_string(&file) else {
3229            continue;
3230        };
3231        for line in text.lines() {
3232            if !line.starts_with("## [") {
3233                continue;
3234            }
3235            if let Some((ts, kind, _)) = parse_log_header(line) {
3236                if kind == "validate" {
3237                    latest = Some(match latest {
3238                        Some(p) if p >= ts => p,
3239                        _ => ts,
3240                    });
3241                }
3242            }
3243        }
3244    }
3245    latest
3246}
3247
3248/// The set of content objects changed since `cutoff`, read from log entries
3249/// whose kind mutates a file. When `cutoff` is `None`, every mutating entry
3250/// counts (no prior validate window). Returns store-relative `.md` paths.
3251///
3252/// Scans the active `log.md` **and** every `log/<YYYY-MM>.md` archive: after a
3253/// month rollover [`Log::append`] rotates prior-month entries out of the active
3254/// file, so an object changed-but-never-validated in a prior month lives only in
3255/// an archive. Reading the archives here is what keeps `dbmd validate` from
3256/// silently skipping those files. Reads only log headers, never the content
3257/// store.
3258fn changed_objects_since(
3259    store: &Store,
3260    cutoff: Option<DateTime<FixedOffset>>,
3261) -> BTreeSet<PathBuf> {
3262    let mut out = BTreeSet::new();
3263    for file in log_files_for_working_set(store) {
3264        let Ok(text) = std::fs::read_to_string(&file) else {
3265            continue;
3266        };
3267        for line in text.lines() {
3268            if !line.starts_with("## [") {
3269                continue;
3270            }
3271            let Some((ts, kind, object)) = parse_log_header(line) else {
3272                continue;
3273            };
3274            if let Some(c) = cutoff {
3275                if ts < c {
3276                    continue;
3277                }
3278            }
3279            if !matches!(
3280                kind.as_str(),
3281                "create" | "update" | "ingest" | "rename" | "delete" | "link"
3282            ) {
3283                continue;
3284            }
3285            if let Some(obj) = object {
3286                // The object slot is a store-relative path (or a wiki-link target).
3287                let bare = obj
3288                    .trim()
3289                    .trim_start_matches("[[")
3290                    .trim_end_matches("]]")
3291                    .split('|')
3292                    .next()
3293                    .unwrap_or("")
3294                    .trim()
3295                    .trim_end_matches(".md")
3296                    .to_string();
3297                if bare.is_empty() {
3298                    continue;
3299                }
3300                out.insert(PathBuf::from(format!("{bare}.md")));
3301            }
3302        }
3303    }
3304    out
3305}
3306
3307/// The result of the [`derived_from_ignored_type`] policy check: the
3308/// `derived_from` target that resolves to an ignored-type record, plus that
3309/// record's type. Carries exactly what both the validate finding and the
3310/// write-time warning need to render their message.
3311#[derive(Debug, Clone, PartialEq, Eq)]
3312pub struct DerivedFromIgnored {
3313    /// The `derived_from` wiki-link target as written (bare store-relative path,
3314    /// no `.md`).
3315    pub target: String,
3316    /// The resolved `type` of that target, which is present in
3317    /// `store.config.ignored_types`.
3318    pub target_type: String,
3319}
3320
3321/// **The single authoritative `### Ignored types` derivation check.** Decides
3322/// whether a `wiki-page` derives from an ignored-type record: the type must be
3323/// `wiki-page`, `### Ignored types` must be non-empty, and some `derived_from`
3324/// target must resolve to a record whose `type` is in `ignored_types`. Returns
3325/// the first such target (and its type), or `None`.
3326///
3327/// Both surfaces call this so the policy lives in exactly one place:
3328/// [`check_content_file`] (read side — `dbmd validate`) feeds it the
3329/// `derived_from` targets it scanned from the raw frontmatter, and the write
3330/// surface (`dbmd write`) feeds it the targets from the composed frontmatter.
3331/// The link *extraction* differs per surface (text-scan with line numbers vs.
3332/// the parsed `Frontmatter`); the *decision* — type gate, target-type
3333/// resolution, and `ignored_types` membership — does not.
3334pub fn derived_from_ignored_type<I, S>(
3335    store: &Store,
3336    meta_type: &str,
3337    derived_from_targets: I,
3338) -> Option<DerivedFromIgnored>
3339where
3340    I: IntoIterator<Item = S>,
3341    S: AsRef<str>,
3342{
3343    if meta_type != "conclusion" || store.config.ignored_types.is_empty() {
3344        return None;
3345    }
3346    for target in derived_from_targets {
3347        let target = target.as_ref();
3348        if let Some(target_type) = link_target_type(store, target) {
3349            if store.config.ignored_types.contains(&target_type) {
3350                return Some(DerivedFromIgnored {
3351                    target: target.to_string(),
3352                    target_type,
3353                });
3354            }
3355        }
3356    }
3357    None
3358}
3359
3360/// Resolve the `type` of a wiki-link target file (bare, no `.md`), or `None`.
3361fn link_target_type(store: &Store, target: &str) -> Option<String> {
3362    let bare = target.trim_end_matches(".md");
3363    let abs = store.root.join(safe_md_target_rel(bare)?);
3364    let text = std::fs::read_to_string(&abs).ok()?;
3365    let (yaml, _, _) = split_frontmatter(&text)?;
3366    let value: Value = serde_norway::from_str(&yaml).ok()?;
3367    if let Value::Mapping(m) = value {
3368        m.get(Value::String("type".into())).and_then(scalar_string)
3369    } else {
3370        None
3371    }
3372}
3373
3374// ── Shape validators ─────────────────────────────────────────────────────────
3375
3376/// True if a string is RFC3339 / ISO-8601 with a time + zone (the
3377/// `created`/`updated` contract: `2026-05-27T08:00:00-07:00`).
3378fn is_iso8601(s: &str) -> bool {
3379    DateTime::parse_from_rfc3339(s.trim()).is_ok()
3380}
3381
3382/// True if a string is an ISO-8601 *date* (`2026-05-27`) or a full RFC3339
3383/// datetime. Type-specific date fields (`expense.date`, `contact.last_touch`)
3384/// accept the date-only form per the SPEC's worked example.
3385fn is_iso8601_date_or_datetime(s: &str) -> bool {
3386    let s = s.trim();
3387    if DateTime::parse_from_rfc3339(s).is_ok() {
3388        return true;
3389    }
3390    chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_ok()
3391}
3392
3393/// True for `<local>@<domain>` with a non-empty local part and a dotted domain.
3394/// There must be exactly one `@`: a domain that still contains an `@` after the
3395/// split (the common double-`@` typo `sarah@@acme.com`, or `a@b@c.com`) is
3396/// rejected — without this the domain `@acme.com` passed every other check.
3397fn is_email(s: &str) -> bool {
3398    let s = s.trim();
3399    let Some((local, domain)) = s.split_once('@') else {
3400        return false;
3401    };
3402    !local.is_empty()
3403        && !domain.contains('@')
3404        && domain.contains('.')
3405        && !domain.starts_with('.')
3406        && !domain.ends_with('.')
3407        && !domain.contains(' ')
3408        && !local.contains(' ')
3409}
3410
3411/// True for a currency amount: an optional symbol or 3-letter ISO code, then a
3412/// plain decimal number with optional thousands separators and ≤ 2 decimals.
3413///
3414/// The numeric part is validated by hand (not `f64::parse`) so the non-numeric
3415/// floats `f64` accepts — `inf`, `-inf`, `NaN`, and `1e3`-style exponents — are
3416/// rejected, and the ≤ 2-decimal rule is actually enforced.
3417fn is_currency(s: &str) -> bool {
3418    let mut t = s.trim();
3419    // Strip a leading currency symbol …
3420    for sym in ["$", "€", "£", "¥"] {
3421        if let Some(rest) = t.strip_prefix(sym) {
3422            t = rest.trim_start();
3423            break;
3424        }
3425    }
3426    // … or a leading 3-letter ISO-4217-ish code (`USD 100`, `EUR 9.50`). The
3427    // code must be exactly three ASCII letters and separated from the number by
3428    // whitespace, so a bare `USD` with no amount still fails.
3429    if let Some((head, rest)) = t.split_once(char::is_whitespace) {
3430        if head.len() == 3 && head.chars().all(|c| c.is_ascii_alphabetic()) {
3431            t = rest.trim_start();
3432        }
3433    }
3434
3435    let cleaned: String = t.chars().filter(|c| *c != ',').collect();
3436    is_plain_amount(cleaned.trim())
3437}
3438
3439/// True for a bare decimal amount: optional sign, ≥ 1 digit, an optional
3440/// fractional part of 1–2 digits. No exponents, no `inf`/`NaN`, no empty string.
3441fn is_plain_amount(s: &str) -> bool {
3442    let digits = s.strip_prefix(['+', '-']).unwrap_or(s);
3443    let (int_part, frac_part) = match digits.split_once('.') {
3444        Some((i, f)) => (i, Some(f)),
3445        None => (digits, None),
3446    };
3447    if int_part.is_empty() || !int_part.bytes().all(|b| b.is_ascii_digit()) {
3448        return false;
3449    }
3450    match frac_part {
3451        None => true,
3452        Some(f) => (1..=2).contains(&f.len()) && f.bytes().all(|b| b.is_ascii_digit()),
3453    }
3454}
3455
3456/// True for an http(s) URL: a recognized scheme prefix with at least one
3457/// character after it. The length guard uses the *matched* scheme's own length,
3458/// so a single-character host on the shorter `http://` scheme (`http://x`, 8
3459/// bytes — e.g. an intranet/container hostname) is accepted; a bare scheme with
3460/// nothing after it (`http://`, `https://`) is rejected.
3461fn is_url(s: &str) -> bool {
3462    let s = s.trim();
3463    for scheme in ["http://", "https://"] {
3464        if let Some(rest) = s.strip_prefix(scheme) {
3465            return !rest.is_empty();
3466        }
3467    }
3468    false
3469}
3470
3471/// A short, deterministic suggestion for a `SCHEMA_SHAPE_MISMATCH`.
3472fn shape_suggestion(shape: Shape) -> String {
3473    match shape {
3474        Shape::String => "use a scalar string".into(),
3475        Shape::Int => "use an integer".into(),
3476        Shape::Bool => "use `true` or `false`".into(),
3477        Shape::Date => "use an ISO-8601 date, e.g. 2026-05-27".into(),
3478        Shape::Email => "use a `<local>@<domain>` address".into(),
3479        Shape::Currency => "use a numeric amount, e.g. 1234.56".into(),
3480        Shape::Url => "use an http(s) URL".into(),
3481    }
3482}
3483
3484/// Suggest a full-path rewrite for a short-form wiki-link. Without the layer we
3485/// can't know the folder, so the suggestion is generic but actionable.
3486fn short_form_suggestion(bare: &str) -> Option<String> {
3487    Some(format!(
3488        "use a full store-relative path, e.g. [[records/contacts/{}]]",
3489        slugish(bare)
3490    ))
3491}
3492
3493/// A filesystem-ish leaf for a plain string (lowercase, spaces → hyphens).
3494fn slugish(s: &str) -> String {
3495    s.trim()
3496        .to_lowercase()
3497        .chars()
3498        .map(|c| if c.is_whitespace() { '-' } else { c })
3499        .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '/' || *c == '_')
3500        .collect()
3501}
3502
3503/// Cross-file asset-manifest integrity (the `--all` sweep). Text-only: it never
3504/// hashes a byte or reads an asset file's contents — byte presence and hash
3505/// correctness are `dbmd assets verify`, not `validate`, so a fresh clone with
3506/// no restored bytes still passes. Cross-checks `assets.jsonl` against every
3507/// content file's `asset`/`assets` declarations.
3508fn check_assets(store: &Store, parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
3509    use crate::assets;
3510
3511    let manifest_rel = Path::new(assets::MANIFEST_FILE);
3512    let manifest_abs = store.root.join(assets::MANIFEST_FILE);
3513
3514    // Lenient manifest read: a malformed line is reported, not fatal.
3515    let mut manifest: BTreeMap<String, assets::AssetRecord> = BTreeMap::new();
3516    if let Ok(text) = std::fs::read_to_string(&manifest_abs) {
3517        for (i, line) in text.lines().enumerate() {
3518            if line.trim().is_empty() {
3519                continue;
3520            }
3521            match serde_json::from_str::<assets::AssetRecord>(line) {
3522                Ok(rec) => {
3523                    manifest.insert(rec.path.clone(), rec);
3524                }
3525                Err(e) => push(
3526                    issues,
3527                    Severity::Error,
3528                    codes::ASSET_MANIFEST_MALFORMED,
3529                    manifest_rel,
3530                    Some((i as u32) + 1),
3531                    None,
3532                    format!("invalid {} record: {e}", assets::MANIFEST_FILE),
3533                    Some("run `dbmd assets scan` to rebuild the manifest".to_string()),
3534                    vec![],
3535                ),
3536            }
3537        }
3538    }
3539
3540    // Per-wrapper declarations: every declared asset must be in the manifest and
3541    // must not point at a markdown content file.
3542    let mut declared: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
3543    for (rel, p) in parsed {
3544        let Some(map) = &p.fm else {
3545            continue;
3546        };
3547        for decl in assets::declarations_from_yaml_map(map) {
3548            let norm = match assets::normalize_asset_path(&decl.path) {
3549                Ok(n) => n,
3550                Err(_) => continue, // a bad declared path is surfaced by `scan`, not here
3551            };
3552            declared.insert(norm.clone());
3553            let is_md = Path::new(&norm)
3554                .extension()
3555                .and_then(|e| e.to_str())
3556                .map(|e| e.eq_ignore_ascii_case("md"))
3557                .unwrap_or(false);
3558            if is_md {
3559                push(
3560                    issues,
3561                    Severity::Warning,
3562                    codes::ASSET_PATH_IS_CONTENT,
3563                    rel,
3564                    None,
3565                    Some("asset".to_string()),
3566                    format!("asset path `{norm}` points at a markdown content file"),
3567                    Some("assets are raw binaries; reference a non-markdown path".to_string()),
3568                    vec![PathBuf::from(&norm)],
3569                );
3570            }
3571            if !manifest.contains_key(&norm) {
3572                push(
3573                    issues,
3574                    Severity::Error,
3575                    codes::ASSET_UNDECLARED,
3576                    rel,
3577                    None,
3578                    Some("asset".to_string()),
3579                    format!(
3580                        "references asset `{norm}` with no record in {}",
3581                        assets::MANIFEST_FILE
3582                    ),
3583                    Some("run `dbmd assets scan` to catalog it".to_string()),
3584                    vec![PathBuf::from(&norm)],
3585                );
3586            }
3587        }
3588    }
3589
3590    // Per-record: wrapper existence + orphan detection.
3591    for (path, rec) in &manifest {
3592        for w in &rec.wrappers {
3593            if !store.root.join(w).is_file() {
3594                push(
3595                    issues,
3596                    Severity::Error,
3597                    codes::ASSET_WRAPPER_BROKEN,
3598                    Path::new(path),
3599                    None,
3600                    None,
3601                    format!("manifest record for `{path}` names a missing wrapper `{w}`"),
3602                    Some("run `dbmd assets scan` to reconcile the manifest".to_string()),
3603                    vec![PathBuf::from(w)],
3604                );
3605            }
3606        }
3607        if !declared.contains(path) {
3608            push(
3609                issues,
3610                Severity::Warning,
3611                codes::ASSET_MANIFEST_ORPHAN,
3612                Path::new(path),
3613                None,
3614                None,
3615                format!(
3616                    "`{path}` is in {} but no wrapper references it",
3617                    assets::MANIFEST_FILE
3618                ),
3619                Some("run `dbmd assets scan` to drop the orphan, or add a wrapper".to_string()),
3620                vec![],
3621            );
3622        }
3623    }
3624}
3625
3626/// Push a fully-formed [`Issue`].
3627#[allow(clippy::too_many_arguments)]
3628fn push(
3629    issues: &mut Vec<Issue>,
3630    severity: Severity,
3631    code: &'static str,
3632    file: &Path,
3633    line: Option<u32>,
3634    key: Option<String>,
3635    message: String,
3636    suggestion: Option<String>,
3637    related: Vec<PathBuf>,
3638) {
3639    issues.push(Issue {
3640        severity,
3641        code,
3642        file: file.to_path_buf(),
3643        line,
3644        key,
3645        message,
3646        suggestion,
3647        related,
3648    });
3649}
3650
3651/// 1-based line of a top-level frontmatter key inside the YAML block, offset to
3652/// the file (the YAML starts at file line 2). `None` if not found.
3653fn fm_key_line(fm_yaml: &str, key: &str) -> Option<u32> {
3654    for (i, line) in fm_yaml.lines().enumerate() {
3655        let trimmed = line.trim_start();
3656        // A top-level key line: `key:` with no leading list dash.
3657        if let Some(rest) = trimmed.strip_prefix(key) {
3658            if rest.starts_with(':') && line.starts_with(key) {
3659                // +2: file line 1 is the opening `---`, YAML line 0 → file line 2.
3660                return Some((i as u32) + 2);
3661            }
3662        }
3663    }
3664    None
3665}
3666
3667/// The line a *field-absence* issue (a required key that is missing entirely)
3668/// anchors to: the key's line when present, else line `1` — the frontmatter
3669/// block's opening `---`. A missing key has no line of its own; anchoring it to
3670/// the block top gives the agent (and the `EXPECTED` golden) a stable, non-null
3671/// line to point at instead of an unhelpful `null`.
3672fn fm_key_line_or_top(fm_yaml: &str, key: &str) -> Option<u32> {
3673    fm_key_line(fm_yaml, key).or(Some(1))
3674}
3675
3676/// A stable sort order for issues: by file, then line, then code. Keeps `--json`
3677/// output deterministic across runs.
3678fn issue_order(a: &Issue, b: &Issue) -> std::cmp::Ordering {
3679    a.file
3680        .cmp(&b.file)
3681        .then(a.line.cmp(&b.line))
3682        .then(a.code.cmp(b.code))
3683        .then(a.key.cmp(&b.key))
3684}
3685
3686// ═════════════════════════════════════════════════════════════════════════════
3687//  Tests
3688// ═════════════════════════════════════════════════════════════════════════════
3689
3690#[cfg(test)]
3691mod tests {
3692    use super::*;
3693    use crate::parser::{Config, FieldSpec};
3694    use std::fs;
3695    use tempfile::TempDir;
3696
3697    #[test]
3698    fn split_frontmatter_tolerates_leading_bom() {
3699        // Regression (finding #19 cross-module): a UTF-8 BOM before the opening
3700        // fence must not make validate treat the file as frontmatter-less while
3701        // the catalog indexes it. Pre-fix `first.trim_end() != "---"` was true
3702        // for `\u{feff}---` and the function returned None.
3703        let text = "\u{feff}---\ntype: contact\nsummary: hi\n---\nbody\n";
3704        let parsed = split_frontmatter(text);
3705        assert!(
3706            parsed.is_some(),
3707            "a leading BOM must not hide frontmatter from validate"
3708        );
3709        let (yaml, body, close_line) = parsed.unwrap();
3710        assert_eq!(yaml, "type: contact\nsummary: hi\n");
3711        assert_eq!(body, "body");
3712        assert_eq!(close_line, 4, "BOM is inline on line 1, not a new line");
3713    }
3714
3715    /// A test store builder over a real tempdir. Every helper writes real files
3716    /// so the assertions exercise real behavior, not mocks.
3717    struct Fixture {
3718        dir: TempDir,
3719        config: Config,
3720    }
3721
3722    impl Fixture {
3723        /// A fresh store with a **valid** `DB.md` (the identity contract:
3724        /// `type: db-md` + `scope` + `owner`) and the three layer dirs. A valid
3725        /// DB.md keeps `check_db_md` silent so a "clean store" fixture is truly
3726        /// clean; tests that want a broken DB.md write their own via `write`.
3727        fn new() -> Self {
3728            let dir = TempDir::new().unwrap();
3729            fs::write(
3730                dir.path().join("DB.md"),
3731                "---\ntype: db-md\nscope: company\nowner: Test\n---\n",
3732            )
3733            .unwrap();
3734            for layer in ["sources", "records"] {
3735                fs::create_dir_all(dir.path().join(layer)).unwrap();
3736            }
3737            Fixture {
3738                dir,
3739                config: Config::default(),
3740            }
3741        }
3742
3743        /// A store with no `DB.md` marker.
3744        fn bare() -> Self {
3745            let dir = TempDir::new().unwrap();
3746            Fixture {
3747                dir,
3748                config: Config::default(),
3749            }
3750        }
3751
3752        /// Write a file at a store-relative path, creating parent dirs.
3753        fn write(&self, rel: &str, contents: &str) {
3754            let abs = self.dir.path().join(rel);
3755            fs::create_dir_all(abs.parent().unwrap()).unwrap();
3756            fs::write(abs, contents).unwrap();
3757        }
3758
3759        fn store(&self) -> Store {
3760            Store {
3761                root: self.dir.path().to_path_buf(),
3762                config: self.config.clone(),
3763            }
3764        }
3765
3766        fn store_all(&self) -> Vec<Issue> {
3767            validate_all(&self.store()).unwrap()
3768        }
3769
3770        /// Write the canonical `index.md` + `index.jsonl` at every level via the
3771        /// real builder ([`crate::index::Index::rebuild_all`]) — the same
3772        /// projection a `dbmd index rebuild` produces. Use this (rather than a
3773        /// hand-typed sidecar line) whenever a test asserts a *clean* store, so
3774        /// the sidecar carries the COMPLETE per-field projection and the fixture
3775        /// can't silently drift from what the index writer emits.
3776        fn rebuild_indexes(&self) {
3777            crate::index::Index::rebuild_all(&self.store()).unwrap();
3778        }
3779    }
3780
3781    /// True if any issue has this code.
3782    fn has(issues: &[Issue], code: &str) -> bool {
3783        issues.iter().any(|i| i.code == code)
3784    }
3785
3786    /// Count issues with a code.
3787    fn count(issues: &[Issue], code: &str) -> usize {
3788        issues.iter().filter(|i| i.code == code).count()
3789    }
3790
3791    /// The first issue with a code, or panic.
3792    fn find<'a>(issues: &'a [Issue], code: &str) -> &'a Issue {
3793        issues
3794            .iter()
3795            .find(|i| i.code == code)
3796            .unwrap_or_else(|| panic!("expected an issue with code {code}; got {issues:#?}"))
3797    }
3798
3799    /// A minimal valid `contact` body for reuse.
3800    fn valid_contact(summary: &str) -> String {
3801        format!(
3802            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{summary}\"\nname: A\n---\n\n# A\n"
3803        )
3804    }
3805
3806    // ── store marker ──────────────────────────────────────────────────────────
3807
3808    #[test]
3809    fn not_a_store_when_db_md_absent() {
3810        let fx = Fixture::bare();
3811        let issues = fx.store_all();
3812        assert_eq!(issues.len(), 1, "only NOT_A_STORE expected: {issues:#?}");
3813        assert_eq!(issues[0].code, codes::NOT_A_STORE);
3814        assert!(issues[0].is_error());
3815    }
3816
3817    #[test]
3818    fn working_set_also_reports_not_a_store() {
3819        let fx = Fixture::bare();
3820        let issues = validate_working_set(&fx.store(), None).unwrap();
3821        assert!(has(&issues, codes::NOT_A_STORE));
3822    }
3823
3824    #[test]
3825    fn clean_store_has_no_issues() {
3826        let fx = Fixture::new();
3827        fx.write("records/contacts/a.md", &valid_contact("A contact"));
3828        // Build the canonical indexes (complete per-field jsonl included) the
3829        // same way `dbmd index rebuild` does, so a freshly-rebuilt store is
3830        // proven clean across every projected field, not just summary/type.
3831        fx.rebuild_indexes();
3832        let issues = fx.store_all();
3833        assert!(
3834            issues.is_empty(),
3835            "expected a clean store, got: {issues:#?}"
3836        );
3837    }
3838
3839    // ── DB.md structure ───────────────────────────────────────────────────────
3840
3841    /// The `Fixture::new` DB.md is valid → no `DB_MD_*` issue. This pins the
3842    /// "valid identity file is silent" half (a bug that flagged a valid DB.md
3843    /// would fail here).
3844    #[test]
3845    fn valid_db_md_emits_no_structure_issue() {
3846        let fx = Fixture::new();
3847        let issues = fx.store_all();
3848        assert!(
3849            !has(&issues, codes::DB_MD_BAD_TYPE)
3850                && !has(&issues, codes::DB_MD_MISSING_FIELD)
3851                && !has(&issues, codes::DB_MD_UNKNOWN_SECTION),
3852            "a valid DB.md (type: db-md + scope + owner, recognized sections) is silent: {issues:#?}"
3853        );
3854    }
3855
3856    /// A DB.md whose `type:` isn't `db-md` → `DB_MD_BAD_TYPE`, keyed on `type`,
3857    /// anchored to the `type:` line (file line 2). Failing to read the type, or
3858    /// accepting a non-`db-md` type, breaks this.
3859    #[test]
3860    fn db_md_wrong_type_is_error() {
3861        let fx = Fixture::new();
3862        fx.write("DB.md", "---\ntype: notes\nscope: company\nowner: T\n---\n");
3863        let issues = fx.store_all();
3864        let i = find(&issues, codes::DB_MD_BAD_TYPE);
3865        assert!(i.is_error());
3866        assert_eq!(i.file, PathBuf::from("DB.md"));
3867        assert_eq!(i.key.as_deref(), Some("type"));
3868        assert_eq!(i.line, Some(2), "anchors to the `type:` line");
3869    }
3870
3871    /// A DB.md missing `scope` and `owner` → one `DB_MD_MISSING_FIELD` per
3872    /// absent field, each keyed on its field name, anchored to the block top.
3873    #[test]
3874    fn db_md_missing_scope_and_owner_each_report() {
3875        let fx = Fixture::new();
3876        fx.write("DB.md", "---\ntype: db-md\n---\n");
3877        let issues = fx.store_all();
3878        assert_eq!(
3879            count(&issues, codes::DB_MD_MISSING_FIELD),
3880            2,
3881            "both scope and owner absent → two issues: {issues:#?}"
3882        );
3883        let keys: BTreeSet<Option<String>> = issues
3884            .iter()
3885            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
3886            .map(|i| i.key.clone())
3887            .collect();
3888        assert_eq!(
3889            keys,
3890            BTreeSet::from([Some("scope".to_string()), Some("owner".to_string())]),
3891            "one issue keyed on each missing field"
3892        );
3893        for i in issues
3894            .iter()
3895            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
3896        {
3897            assert!(i.is_error());
3898            assert_eq!(i.line, Some(1), "absent field anchors to the block top");
3899        }
3900    }
3901
3902    /// A present-but-blank required field is still missing (`DB_MD_MISSING_FIELD`),
3903    /// anchored to its own line — guarding against an "is the key textually
3904    /// present?" shortcut that would miss `owner:` with an empty value.
3905    #[test]
3906    fn db_md_blank_required_field_is_missing() {
3907        let fx = Fixture::new();
3908        fx.write(
3909            "DB.md",
3910            "---\ntype: db-md\nscope: company\nowner: \"\"\n---\n",
3911        );
3912        let issues = fx.store_all();
3913        let i = find(&issues, codes::DB_MD_MISSING_FIELD);
3914        assert_eq!(i.key.as_deref(), Some("owner"));
3915        assert_eq!(
3916            i.line,
3917            Some(4),
3918            "a present-but-empty field anchors to its line"
3919        );
3920        assert!(
3921            count(&issues, codes::DB_MD_MISSING_FIELD) == 1,
3922            "scope is present and non-empty → only owner reported"
3923        );
3924    }
3925
3926    /// An unrecognized `##` section → `DB_MD_UNKNOWN_SECTION` (warning), anchored
3927    /// to the heading's file line; the three recognized sections stay silent.
3928    #[test]
3929    fn db_md_unknown_section_is_warning() {
3930        let fx = Fixture::new();
3931        fx.write(
3932            "DB.md",
3933            // line 1 `---`, 2 type, 3 scope, 4 owner, 5 `---`, 6 blank,
3934            // 7 `## Agent instructions`, 8 blank, 9 prose, 10 blank,
3935            // 11 `## Glossary`.
3936            "---\ntype: db-md\nscope: company\nowner: T\n---\n\n## Agent instructions\n\nbe good\n\n## Glossary\n\nterms\n",
3937        );
3938        let issues = fx.store_all();
3939        let i = find(&issues, codes::DB_MD_UNKNOWN_SECTION);
3940        assert!(!i.is_error(), "unknown section is a warning, not an error");
3941        assert_eq!(i.severity, Severity::Warning);
3942        assert_eq!(
3943            i.line,
3944            Some(11),
3945            "anchors to the `## Glossary` heading line"
3946        );
3947        assert!(
3948            i.message.contains("Glossary"),
3949            "the message names the offending section: {}",
3950            i.message
3951        );
3952        // The recognized `## Agent instructions` section did NOT fire.
3953        assert_eq!(
3954            count(&issues, codes::DB_MD_UNKNOWN_SECTION),
3955            1,
3956            "only the unrecognized section is flagged: {issues:#?}"
3957        );
3958    }
3959
3960    /// A DB.md with no frontmatter at all → `DB_MD_BAD_TYPE` plus both
3961    /// `DB_MD_MISSING_FIELD`s (no provable type, no provable fields).
3962    #[test]
3963    fn db_md_no_frontmatter_reports_type_and_both_fields() {
3964        let fx = Fixture::new();
3965        fx.write("DB.md", "# just a heading, no frontmatter\n");
3966        let issues = fx.store_all();
3967        assert!(has(&issues, codes::DB_MD_BAD_TYPE));
3968        assert_eq!(count(&issues, codes::DB_MD_MISSING_FIELD), 2);
3969    }
3970
3971    // ── frontmatter ─────────────────────────────────────────────────────────
3972
3973    #[test]
3974    fn missing_type_is_error() {
3975        let fx = Fixture::new();
3976        fx.write(
3977            "records/contacts/a.md",
3978            "---\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\n# A\n",
3979        );
3980        let issues = fx.store_all();
3981        assert!(has(&issues, codes::FM_MISSING_TYPE));
3982        assert!(find(&issues, codes::FM_MISSING_TYPE).is_error());
3983    }
3984
3985    #[test]
3986    fn missing_universal_timestamps_are_errors_on_content_files() {
3987        let fx = Fixture::new();
3988        fx.write(
3989            "records/contacts/a.md",
3990            "---\ntype: contact\nsummary: x\nname: A\n---\n\n# A\n",
3991        );
3992        let issues = fx.store_all();
3993
3994        let missing_created = find(&issues, codes::FM_MISSING_CREATED);
3995        assert_eq!(missing_created.key.as_deref(), Some("created"));
3996        assert!(missing_created.is_error());
3997
3998        let missing_updated = find(&issues, codes::FM_MISSING_UPDATED);
3999        assert_eq!(missing_updated.key.as_deref(), Some("updated"));
4000        assert!(missing_updated.is_error());
4001    }
4002
4003    #[test]
4004    fn meta_files_do_not_require_universal_timestamps() {
4005        let fx = Fixture::new();
4006        let issues = fx.store_all();
4007
4008        assert!(
4009            !has(&issues, codes::FM_MISSING_CREATED),
4010            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
4011        );
4012        assert!(
4013            !has(&issues, codes::FM_MISSING_UPDATED),
4014            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
4015        );
4016    }
4017
4018    #[test]
4019    fn content_file_with_no_frontmatter_block_reports_type_and_summary() {
4020        let fx = Fixture::new();
4021        fx.write(
4022            "records/profiles/a.md",
4023            "# Just a heading\n\nNo frontmatter here.\n",
4024        );
4025        let issues = fx.store_all();
4026        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
4027        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4028    }
4029
4030    #[test]
4031    fn content_file_with_empty_frontmatter_reports_type_and_summary() {
4032        let fx = Fixture::new();
4033        fx.write("records/profiles/a.md", "---\n---\n\nbody\n");
4034        let issues = fx.store_all();
4035        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
4036        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4037    }
4038
4039    #[test]
4040    fn malformed_yaml_is_error_and_suppresses_field_checks() {
4041        let fx = Fixture::new();
4042        // A tab inside a mapping value is invalid YAML.
4043        fx.write(
4044            "records/contacts/a.md",
4045            "---\ntype: contact\n  bad: : : :\n: : nope\n---\n\nbody\n",
4046        );
4047        let issues = fx.store_all();
4048        let issue = find(&issues, codes::FM_MALFORMED_YAML);
4049        assert!(issue.is_error());
4050        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4051        // When YAML doesn't parse we don't *also* claim the summary is missing;
4052        // the agent fixes the YAML first.
4053        assert!(
4054            !has(&issues, codes::SUMMARY_MISSING),
4055            "malformed YAML should suppress SUMMARY_MISSING: {issues:#?}"
4056        );
4057    }
4058
4059    #[test]
4060    fn bad_created_timestamp_is_error() {
4061        let fx = Fixture::new();
4062        fx.write(
4063            "records/contacts/a.md",
4064            "---\ntype: contact\ncreated: not-a-date\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
4065        );
4066        let issues = fx.store_all();
4067        let issue = find(&issues, codes::FM_BAD_TIMESTAMP);
4068        assert_eq!(issue.key.as_deref(), Some("created"));
4069        assert!(issue.is_error());
4070    }
4071
4072    #[test]
4073    fn date_only_created_is_rejected_but_type_date_field_accepted() {
4074        let fx = Fixture::new();
4075        // `created` must be a full RFC3339 datetime → a date-only value is bad.
4076        // `last_touch` is a type-specific date field → date-only is fine.
4077        fx.write(
4078            "records/contacts/a.md",
4079            "---\ntype: contact\ncreated: 2026-05-22\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\nlast_touch: 2026-05-22\n---\n\n# A\n",
4080        );
4081        let issues = fx.store_all();
4082        let created_issues: Vec<_> = issues
4083            .iter()
4084            .filter(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created"))
4085            .collect();
4086        assert_eq!(
4087            created_issues.len(),
4088            1,
4089            "date-only `created` must fail: {issues:#?}"
4090        );
4091        assert!(
4092            !issues.iter().any(
4093                |i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("last_touch")
4094            ),
4095            "date-only `last_touch` is valid: {issues:#?}"
4096        );
4097    }
4098
4099    // ── summary ─────────────────────────────────────────────────────────────
4100
4101    #[test]
4102    fn summary_missing_empty_multiline_toolong() {
4103        let fx = Fixture::new();
4104        fx.write(
4105            "records/profiles/missing.md",
4106            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\n---\n\nbody\n",
4107        );
4108        fx.write(
4109            "records/profiles/empty.md",
4110            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"   \"\n---\n\nbody\n",
4111        );
4112        let long = "x".repeat(201);
4113        fx.write(
4114            "records/profiles/long.md",
4115            &format!("---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{long}\"\n---\n\nbody\n"),
4116        );
4117        let issues = fx.store_all();
4118        assert!(has(&issues, codes::SUMMARY_MISSING));
4119        assert_eq!(
4120            find(&issues, codes::SUMMARY_MISSING).file,
4121            PathBuf::from("records/profiles/missing.md")
4122        );
4123        assert!(has(&issues, codes::SUMMARY_EMPTY));
4124        assert!(has(&issues, codes::SUMMARY_TOO_LONG));
4125        assert_eq!(
4126            find(&issues, codes::SUMMARY_TOO_LONG).severity,
4127            Severity::Warning
4128        );
4129    }
4130
4131    #[test]
4132    fn summary_multiline_via_yaml_block_scalar() {
4133        let fx = Fixture::new();
4134        // A literal block scalar produces a value with a newline.
4135        fx.write(
4136            "records/profiles/a.md",
4137            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: |\n  line one\n  line two\n---\n\nbody\n",
4138        );
4139        let issues = fx.store_all();
4140        assert!(has(&issues, codes::SUMMARY_MULTILINE), "{issues:#?}");
4141    }
4142
4143    #[test]
4144    fn summary_exactly_200_chars_is_ok() {
4145        let fx = Fixture::new();
4146        let s = "y".repeat(200);
4147        fx.write(
4148            "wiki/people/a.md",
4149            &format!("---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{s}\"\n---\n\nbody\n"),
4150        );
4151        let issues = fx.store_all();
4152        assert!(
4153            !has(&issues, codes::SUMMARY_TOO_LONG),
4154            "200 is the bound, inclusive: {issues:#?}"
4155        );
4156    }
4157
4158    #[test]
4159    fn meta_files_need_no_summary() {
4160        let fx = Fixture::new();
4161        // The root/layer/type indexes + log carry no summary and must not be
4162        // flagged. (A lone DB.md store with one contact and full indexes.)
4163        fx.write("records/contacts/a.md", &valid_contact("A contact"));
4164        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n# I\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4165        fx.write(
4166            "records/index.md",
4167            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4168        );
4169        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — A contact\n");
4170        fx.write(
4171            "records/contacts/index.jsonl",
4172            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"A contact\"}\n",
4173        );
4174        fx.write("log.md", "---\ntype: log\n---\n\n# Log\n");
4175        let issues = fx.store_all();
4176        assert!(!has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4177    }
4178
4179    // ── tags ────────────────────────────────────────────────────────────────
4180
4181    #[test]
4182    fn nested_tags_warns_flat_tags_ok() {
4183        let fx = Fixture::new();
4184        fx.write(
4185            "records/contacts/nested.md",
4186            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags:\n  - good\n  - [nested, list]\n---\n\n# A\n",
4187        );
4188        fx.write(
4189            "records/contacts/flat.md",
4190            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags: [customer, vip]\n---\n\n# A\n",
4191        );
4192        let issues = fx.store_all();
4193        let tag_issues: Vec<_> = issues
4194            .iter()
4195            .filter(|i| i.code == codes::TAGS_MALFORMED)
4196            .collect();
4197        assert_eq!(
4198            tag_issues.len(),
4199            1,
4200            "only the nested-tags file should warn: {issues:#?}"
4201        );
4202        assert_eq!(
4203            tag_issues[0].file,
4204            PathBuf::from("records/contacts/nested.md")
4205        );
4206        assert_eq!(tag_issues[0].severity, Severity::Warning);
4207    }
4208
4209    // ── wiki-links ────────────────────────────────────────────────────────────
4210
4211    #[test]
4212    fn short_form_wiki_link_is_error() {
4213        let fx = Fixture::new();
4214        let mut body = valid_contact("links to a short form");
4215        body.push_str("\nSee [[sarah-chen]] for details.\n");
4216        fx.write("records/contacts/a.md", &body);
4217        let issues = fx.store_all();
4218        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
4219        assert!(issue.is_error());
4220        assert!(issue.message.contains("sarah-chen"));
4221        // A short-form link must NOT also be reported broken — fix the form first.
4222        assert!(
4223            !issues
4224                .iter()
4225                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.message.contains("sarah-chen")),
4226            "short-form should suppress broken: {issues:#?}"
4227        );
4228    }
4229
4230    #[test]
4231    fn broken_full_path_wiki_link_is_error() {
4232        let fx = Fixture::new();
4233        let mut body = valid_contact("links to a missing file");
4234        body.push_str("\nSee [[records/contacts/ghost]].\n");
4235        fx.write("records/contacts/a.md", &body);
4236        let issues = fx.store_all();
4237        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
4238        assert!(issue.is_error());
4239        assert!(issue.message.contains("records/contacts/ghost"));
4240        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4241    }
4242
4243    #[test]
4244    fn traversal_full_path_wiki_link_is_rejected_before_probe() {
4245        let fx = Fixture::new();
4246        let mut body = valid_contact("links with traversal");
4247        body.push_str("\nSee [[records/contacts/../../ghost]].\n");
4248        fx.write("records/contacts/a.md", &body);
4249        let issues = fx.store_all();
4250        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
4251        assert!(issue.message.contains("not a safe store-relative path"));
4252        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4253    }
4254
4255    #[test]
4256    fn valid_full_path_wiki_link_passes() {
4257        let fx = Fixture::new();
4258        fx.write("records/contacts/target.md", &valid_contact("target"));
4259        let mut body = valid_contact("links to target");
4260        body.push_str("\nSee [[records/contacts/target]].\n");
4261        fx.write("wiki/people/a.md", &body);
4262        let issues = fx.store_all();
4263        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4264        assert!(!has(&issues, codes::WIKI_LINK_SHORT_FORM), "{issues:#?}");
4265    }
4266
4267    #[test]
4268    fn md_extension_wiki_link_warns_and_resolves() {
4269        let fx = Fixture::new();
4270        fx.write("records/contacts/target.md", &valid_contact("target"));
4271        let mut body = valid_contact("links with extension");
4272        body.push_str("\nSee [[records/contacts/target.md]].\n");
4273        fx.write("records/contacts/a.md", &body);
4274        let issues = fx.store_all();
4275        let issue = find(&issues, codes::WIKI_LINK_HAS_EXTENSION);
4276        assert_eq!(issue.severity, Severity::Warning);
4277        assert_eq!(
4278            issue.suggestion.as_deref(),
4279            Some("drop the extension: [[records/contacts/target]]")
4280        );
4281        // The target exists once `.md` is stripped → not broken.
4282        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4283    }
4284
4285    #[test]
4286    fn wiki_links_in_code_fences_are_ignored() {
4287        let fx = Fixture::new();
4288        let mut body = valid_contact("has a fenced example");
4289        body.push_str("\n```\n[[sarah-chen]]\n```\n");
4290        fx.write("wiki/people/a.md", &body);
4291        let issues = fx.store_all();
4292        assert!(
4293            !has(&issues, codes::WIKI_LINK_SHORT_FORM),
4294            "fenced wiki-links must be ignored: {issues:#?}"
4295        );
4296    }
4297
4298    #[test]
4299    fn flow_form_link_list_in_frontmatter_is_error() {
4300        let fx = Fixture::new();
4301        fx.write(
4302            "records/meetings/m.md",
4303            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees: [[[records/contacts/a]], [[records/contacts/b]]]\n---\n\n# M\n",
4304        );
4305        let issues = fx.store_all();
4306        let issue = find(&issues, codes::WIKI_LINK_FLOW_FORM_LIST);
4307        assert!(issue.is_error());
4308        assert_eq!(issue.key.as_deref(), Some("attendees"));
4309    }
4310
4311    #[test]
4312    fn block_form_link_list_in_frontmatter_is_not_flow_form() {
4313        let fx = Fixture::new();
4314        fx.write("records/contacts/a.md", &valid_contact("a"));
4315        fx.write("records/contacts/b.md", &valid_contact("b"));
4316        fx.write(
4317            "records/meetings/m.md",
4318            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees:\n  - [[records/contacts/a]]\n  - [[records/contacts/b]]\n---\n\n# M\n",
4319        );
4320        let issues = fx.store_all();
4321        assert!(
4322            !has(&issues, codes::WIKI_LINK_FLOW_FORM_LIST),
4323            "{issues:#?}"
4324        );
4325        // Block-form link targets are still integrity-checked (both exist here).
4326        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4327    }
4328
4329    #[test]
4330    fn frontmatter_short_form_link_field_is_error() {
4331        let fx = Fixture::new();
4332        // `related` is a *custom* (non-schema) wiki-link field, so it goes
4333        // through the generic doctrine path → a short form is WIKI_LINK_SHORT_FORM.
4334        fx.write(
4335            "records/synthesis/a.md",
4336            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: \"[[sarah-chen]]\"\n---\n\n# A\n",
4337        );
4338        let issues = fx.store_all();
4339        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
4340        assert!(issue.is_error());
4341        assert_eq!(issue.key.as_deref(), Some("related"));
4342    }
4343
4344    #[test]
4345    fn unquoted_frontmatter_link_is_recognized() {
4346        // An UNQUOTED `[[...]]` parses in YAML as a nested sequence, not a
4347        // string. The validator must still see it as a wiki-link (text-based
4348        // extraction). A short-form custom field must report SHORT_FORM, and a
4349        // full-path one with a missing target must report BROKEN.
4350        let fx = Fixture::new();
4351        fx.write(
4352            "records/synthesis/short.md",
4353            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[sarah-chen]]\n---\n\n# A\n",
4354        );
4355        fx.write(
4356            "records/synthesis/broken.md",
4357            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[records/contacts/ghost]]\n---\n\n# A\n",
4358        );
4359        let issues = fx.store_all();
4360        assert!(
4361            issues.iter().any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
4362                && i.file == Path::new("records/synthesis/short.md")
4363                && i.key.as_deref() == Some("related")),
4364            "unquoted short-form frontmatter link must be caught: {issues:#?}"
4365        );
4366        assert!(
4367            issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN
4368                && i.file == Path::new("records/synthesis/broken.md")),
4369            "unquoted full-path frontmatter link to a missing file must be caught: {issues:#?}"
4370        );
4371    }
4372
4373    #[test]
4374    fn short_form_in_declared_link_field_is_prefix_mismatch_not_double_reported() {
4375        // A short-form value in a *declared* link field (a `### contact` schema
4376        // with `company link to records/companies/`) is SCHEMA_LINK_PREFIX_MISMATCH
4377        // (the target isn't under the prefix), and must NOT also be reported as a
4378        // bare WIKI_LINK_SHORT_FORM — the schema path owns that field once.
4379        let mut fx = Fixture::new();
4380        fx.config.schemas.insert(
4381            "contact".into(),
4382            Schema {
4383                fields: vec![FieldSpec {
4384                    name: "company".into(),
4385                    link_prefix: Some(PathBuf::from("records/companies")),
4386                    ..Default::default()
4387                }],
4388                ..Default::default()
4389            },
4390        );
4391        fx.write(
4392            "records/contacts/a.md",
4393            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[northstar]]\"\n---\n\n# A\n",
4394        );
4395        let issues = fx.store_all();
4396        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
4397        assert_eq!(issue.key.as_deref(), Some("company"));
4398        // The same link must NOT also be double-reported via the generic path.
4399        assert!(
4400            !issues
4401                .iter()
4402                .any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
4403                    && i.key.as_deref() == Some("company")),
4404            "schema link fields are checked once, by the schema path: {issues:#?}"
4405        );
4406    }
4407
4408    #[test]
4409    fn schema_link_field_with_md_extension_still_warns() {
4410        let mut fx = Fixture::new();
4411        fx.config.schemas.insert(
4412            "contact".into(),
4413            Schema {
4414                fields: vec![FieldSpec {
4415                    name: "company".into(),
4416                    link_prefix: Some(PathBuf::from("records/companies")),
4417                    ..Default::default()
4418                }],
4419                ..Default::default()
4420            },
4421        );
4422        fx.write(
4423            "records/companies/acme.md",
4424            "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: Acme\nname: Acme\n---\n\n# Acme\n",
4425        );
4426        fx.write(
4427            "records/contacts/a.md",
4428            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[records/companies/acme.md]]\"\n---\n\n# A\n",
4429        );
4430        let issues = fx.store_all();
4431        let issue = issues
4432            .iter()
4433            .find(|i| {
4434                i.code == codes::WIKI_LINK_HAS_EXTENSION && i.key.as_deref() == Some("company")
4435            })
4436            .unwrap_or_else(|| panic!("schema link extension warning missing: {issues:#?}"));
4437        assert_eq!(issue.severity, Severity::Warning);
4438        assert!(
4439            !issues
4440                .iter()
4441                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.key.as_deref() == Some("company")),
4442            "extensionless existence check should still find acme.md: {issues:#?}"
4443        );
4444    }
4445
4446    // ── schema: explicit DB.md schema (required / shape / enum) ───────────────
4447
4448    #[test]
4449    fn explicit_schema_required_shape_enum() {
4450        let fx = {
4451            let mut fx = Fixture::new();
4452            // contact schema: name required, email required+email shape,
4453            // status enum: active|inactive
4454            let schema = Schema {
4455                fields: vec![
4456                    FieldSpec {
4457                        name: "name".into(),
4458                        required: true,
4459                        ..Default::default()
4460                    },
4461                    FieldSpec {
4462                        name: "email".into(),
4463                        required: true,
4464                        shape: Some(Shape::Email),
4465                        ..Default::default()
4466                    },
4467                    FieldSpec {
4468                        name: "status".into(),
4469                        enum_values: Some(vec!["active".into(), "inactive".into()]),
4470                        ..Default::default()
4471                    },
4472                ],
4473                ..Default::default()
4474            };
4475            fx.config.schemas.insert("contact".into(), schema);
4476            fx
4477        };
4478        fx.write(
4479            "records/contacts/a.md",
4480            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nemail: not-an-email\nstatus: archived\n---\n\n# A\n",
4481        );
4482        let issues = fx.store_all();
4483        // name absent → MISSING_REQUIRED
4484        assert!(
4485            issues
4486                .iter()
4487                .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
4488                    && i.key.as_deref() == Some("name")),
4489            "{issues:#?}"
4490        );
4491        // email malformed → SHAPE_MISMATCH
4492        assert!(
4493            issues.iter().any(
4494                |i| i.code == codes::SCHEMA_SHAPE_MISMATCH && i.key.as_deref() == Some("email")
4495            ),
4496            "{issues:#?}"
4497        );
4498        // status archived not in enum → ENUM_VIOLATION
4499        assert!(
4500            issues
4501                .iter()
4502                .any(|i| i.code == codes::SCHEMA_ENUM_VIOLATION
4503                    && i.key.as_deref() == Some("status")),
4504            "{issues:#?}"
4505        );
4506    }
4507
4508    #[test]
4509    fn schema_without_link_field_allows_plain_value() {
4510        // A `contact` schema with no `company` link field means a plain `company`
4511        // string is fine — schema enforcement is exactly what the store declares,
4512        // nothing implicit.
4513        let mut fx = Fixture::new();
4514        fx.config.schemas.insert(
4515            "contact".into(),
4516            Schema {
4517                fields: vec![FieldSpec {
4518                    name: "name".into(),
4519                    required: true,
4520                    ..Default::default()
4521                }],
4522                ..Default::default()
4523            },
4524        );
4525        fx.write(
4526            "records/contacts/a.md",
4527            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
4528        );
4529        let issues = fx.store_all();
4530        assert!(
4531            !has(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH),
4532            "no declared link field for `company` → a plain value is fine: {issues:#?}"
4533        );
4534    }
4535
4536    #[test]
4537    fn schema_link_field_plain_value_is_prefix_mismatch() {
4538        // The surviving link-enforcement path: a declared `link to <prefix>/`
4539        // field with a plain-string value is SCHEMA_LINK_PREFIX_MISMATCH.
4540        let mut fx = Fixture::new();
4541        fx.config.schemas.insert(
4542            "contact".into(),
4543            Schema {
4544                fields: vec![FieldSpec {
4545                    name: "company".into(),
4546                    link_prefix: Some(PathBuf::from("records/companies")),
4547                    ..Default::default()
4548                }],
4549                ..Default::default()
4550            },
4551        );
4552        fx.write(
4553            "records/contacts/a.md",
4554            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
4555        );
4556        let issues = fx.store_all();
4557        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
4558        assert_eq!(issue.key.as_deref(), Some("company"));
4559        assert!(issue
4560            .suggestion
4561            .as_deref()
4562            .unwrap()
4563            .contains("records/companies/"));
4564    }
4565
4566    #[test]
4567    fn schema_shape_int_and_url_and_currency() {
4568        let mut fx = Fixture::new();
4569        fx.config.schemas.insert(
4570            "widget".into(),
4571            Schema {
4572                fields: vec![
4573                    FieldSpec {
4574                        name: "qty".into(),
4575                        shape: Some(Shape::Int),
4576                        ..Default::default()
4577                    },
4578                    FieldSpec {
4579                        name: "site".into(),
4580                        shape: Some(Shape::Url),
4581                        ..Default::default()
4582                    },
4583                    FieldSpec {
4584                        name: "price".into(),
4585                        shape: Some(Shape::Currency),
4586                        ..Default::default()
4587                    },
4588                ],
4589                ..Default::default()
4590            },
4591        );
4592        // `USD 100` is the corpus-realistic shape (an `expense.currency`-style
4593        // ISO code + amount). It must pass — it used to spuriously fail.
4594        fx.write(
4595            "records/widgets/ok.md",
4596            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nqty: 5\nsite: https://example.com\nprice: \"USD 1,234.50\"\n---\n\n# ok\n",
4597        );
4598        // `free` is non-numeric; `inf`/`NaN`/3-decimal used to slip through
4599        // because the old impl leaned on `f64::parse`. `price: inf` here guards
4600        // the under-rejection half of the finding.
4601        fx.write(
4602            "records/widgets/bad.md",
4603            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nqty: five\nsite: ftp://nope\nprice: inf\n---\n\n# bad\n",
4604        );
4605        let issues = fx.store_all();
4606        let bad_shape: Vec<_> = issues
4607            .iter()
4608            .filter(|i| {
4609                i.code == codes::SCHEMA_SHAPE_MISMATCH
4610                    && i.file == Path::new("records/widgets/bad.md")
4611            })
4612            .map(|i| i.key.clone().unwrap_or_default())
4613            .collect();
4614        assert!(bad_shape.contains(&"qty".to_string()), "{issues:#?}");
4615        assert!(bad_shape.contains(&"site".to_string()), "{issues:#?}");
4616        assert!(
4617            bad_shape.contains(&"price".to_string()),
4618            "inf must be rejected as currency: {issues:#?}"
4619        );
4620        assert!(
4621            !issues.iter().any(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH
4622                && i.file == Path::new("records/widgets/ok.md")),
4623            "valid shapes (incl. `USD 1,234.50`) must not fire: {issues:#?}"
4624        );
4625    }
4626
4627    #[test]
4628    fn schema_shape_or_enum_field_with_non_scalar_value_is_shape_mismatch() {
4629        let mut fx = Fixture::new();
4630        fx.config.schemas.insert(
4631            "contact".into(),
4632            Schema {
4633                fields: vec![
4634                    FieldSpec {
4635                        name: "email".into(),
4636                        required: true,
4637                        shape: Some(Shape::Email),
4638                        ..Default::default()
4639                    },
4640                    FieldSpec {
4641                        name: "status".into(),
4642                        enum_values: Some(vec!["active".into(), "inactive".into()]),
4643                        ..Default::default()
4644                    },
4645                ],
4646                ..Default::default()
4647            },
4648        );
4649        // A required EMAIL field and an ENUM field, each holding a LIST. Both
4650        // used to slip through entirely (`scalar_string` → None → the shape and
4651        // enum bodies silently no-op); now they flag SCHEMA_SHAPE_MISMATCH.
4652        fx.write(
4653            "records/contacts/bad.md",
4654            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nemail:\n  - a@b.com\n  - c@d.com\nstatus:\n  - active\n---\n\n# bad\n",
4655        );
4656        let issues = fx.store_all();
4657        let mismatched: Vec<_> = issues
4658            .iter()
4659            .filter(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH)
4660            .map(|i| i.key.clone().unwrap_or_default())
4661            .collect();
4662        assert!(
4663            mismatched.contains(&"email".to_string()),
4664            "list-valued required email must flag: {issues:#?}"
4665        );
4666        assert!(
4667            mismatched.contains(&"status".to_string()),
4668            "list-valued enum must flag: {issues:#?}"
4669        );
4670    }
4671
4672    #[test]
4673    fn is_currency_accepts_codes_and_rejects_non_numeric() {
4674        // Symbols and 3-letter ISO codes both strip; plain numbers pass.
4675        for ok in [
4676            "100",
4677            "1234.56",
4678            "$1,234.50",
4679            "USD 100", // the finding's headline probe — used to be false
4680            "usd 100", // case-insensitive code
4681            "EUR 9.50",
4682            "£12",
4683            "¥1000",
4684            "-5.00", // signed amounts are real (refunds)
4685            "+5",
4686            "1,000,000",
4687        ] {
4688            assert!(is_currency(ok), "expected currency: {ok:?}");
4689        }
4690        // Non-numeric floats `f64::parse` would accept, and the > 2-decimal /
4691        // bare-code / exponent cases the docstring forbids.
4692        for bad in [
4693            "inf", "-inf", "infinity", "NaN", "nan",    // f64 accepts these; we must not
4694            "12.999", // 3 decimals
4695            "1.2345", // 4 decimals
4696            "USD",    // bare code, no amount
4697            "$",      // bare symbol
4698            "free", "", " ", "1e3",      // exponent form
4699            "1.",       // trailing dot, no fractional digits
4700            ".5",       // leading dot, no integer digits
4701            "1 000",    // space as separator is not a thousands separator
4702            "USDD 100", // 4-letter "code" must not strip
4703        ] {
4704            assert!(!is_currency(bad), "expected NOT currency: {bad:?}");
4705        }
4706    }
4707
4708    // ── policies ───────────────────────────────────────────────────────────
4709
4710    #[test]
4711    fn ignored_type_present_is_info() {
4712        let mut fx = Fixture::new();
4713        fx.config.ignored_types.push("temp".into());
4714        fx.write(
4715            "records/temps/x.md",
4716            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
4717        );
4718        let issues = fx.store_all();
4719        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_PRESENT);
4720        assert_eq!(issue.severity, Severity::Info);
4721        assert!(!issue.is_error());
4722        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4723    }
4724
4725    #[test]
4726    fn conclusion_record_derived_from_ignored_type_warns() {
4727        let mut fx = Fixture::new();
4728        fx.config.ignored_types.push("temp".into());
4729        fx.write(
4730            "records/temps/x.md",
4731            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
4732        );
4733        // The policy now gates on `meta-type: conclusion` (not the retired
4734        // `type: wiki-page`): a conclusion record that derives from an
4735        // ignored-type record warns.
4736        fx.write(
4737            "records/synthesis/t.md",
4738            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: derived\nderived_from: \"[[records/temps/x]]\"\n---\n\n# t\n",
4739        );
4740        let issues = fx.store_all();
4741        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_DERIVED);
4742        assert_eq!(issue.severity, Severity::Warning);
4743        assert_eq!(issue.key.as_deref(), Some("derived_from"));
4744        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4745    }
4746
4747    /// The shared `derived_from_ignored_type` entry point — the single
4748    /// policy-decision both `dbmd validate` (read) and `dbmd write` (write-time
4749    /// warning) now route through, so they cannot diverge. This pins its
4750    /// contract directly: the meta-type gate (now `meta-type: conclusion`, not
4751    /// the retired `type: wiki-page`), the empty-ignored-types gate, a positive
4752    /// match carrying the resolved target type, and a non-ignored target
4753    /// rejected.
4754    #[test]
4755    fn derived_from_ignored_type_is_the_shared_policy_decision() {
4756        let mut fx = Fixture::new();
4757        fx.config.ignored_types.push("secret".into());
4758        // An ignored-type record …
4759        fx.write(
4760            "records/secrets/s.md",
4761            "---\ntype: secret\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: hush\n---\n\n# s\n",
4762        );
4763        // … and a non-ignored record.
4764        fx.write(
4765            "records/contacts/c.md",
4766            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nname: C\n---\n\n# c\n",
4767        );
4768        let store = fx.store();
4769
4770        // Positive: a conclusion record deriving from the ignored-type record
4771        // matches, and the hit carries both the target (as written) and its
4772        // resolved type.
4773        let hit =
4774            derived_from_ignored_type(&store, "conclusion", std::iter::once("records/secrets/s"))
4775                .expect("conclusion → ignored-type record must match");
4776        assert_eq!(hit.target, "records/secrets/s");
4777        assert_eq!(hit.target_type, "secret");
4778
4779        // Meta-type gate: a non-`conclusion` meta-type never triggers, even with
4780        // the same ignored-type target.
4781        assert_eq!(
4782            derived_from_ignored_type(&store, "fact", std::iter::once("records/secrets/s")),
4783            None,
4784            "only conclusion derivation is policed"
4785        );
4786
4787        // Target gate: a conclusion deriving from a non-ignored record is fine.
4788        assert_eq!(
4789            derived_from_ignored_type(&store, "conclusion", std::iter::once("records/contacts/c")),
4790            None,
4791            "deriving from a non-ignored type is allowed"
4792        );
4793
4794        // First match wins across multiple targets (here the second is the hit).
4795        let hit = derived_from_ignored_type(
4796            &store,
4797            "conclusion",
4798            ["records/contacts/c", "records/secrets/s"],
4799        )
4800        .expect("a later ignored-type target must still be found");
4801        assert_eq!(hit.target, "records/secrets/s");
4802
4803        // Empty-policy gate: with no `### Ignored types`, nothing is policed.
4804        fx.config.ignored_types.clear();
4805        let store = fx.store();
4806        assert_eq!(
4807            derived_from_ignored_type(&store, "conclusion", std::iter::once("records/secrets/s")),
4808            None,
4809            "an empty ignored-types policy short-circuits"
4810        );
4811    }
4812
4813    // ── duplicates ───────────────────────────────────────────────────────────
4814
4815    #[test]
4816    fn dup_id_is_hard_error_with_related() {
4817        let fx = Fixture::new();
4818        fx.write(
4819            "records/contacts/a.md",
4820            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
4821        );
4822        fx.write(
4823            "records/contacts/b.md",
4824            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
4825        );
4826        let issues = fx.store_all();
4827        // Reporting rule #1: ONE issue per collision group, keyed on the
4828        // lexicographically smallest path (`a.md`), partner in `related`.
4829        assert_eq!(
4830            count(&issues, codes::DUP_ID),
4831            1,
4832            "one issue per group: {issues:#?}"
4833        );
4834        let a = issues.iter().find(|i| i.code == codes::DUP_ID).unwrap();
4835        assert_eq!(a.file, PathBuf::from("records/contacts/a.md"));
4836        assert!(a.is_error());
4837        assert_eq!(a.key.as_deref(), Some("id"));
4838        assert_eq!(
4839            a.line,
4840            Some(3),
4841            "anchors to the `id` line on the reported file"
4842        );
4843        assert_eq!(a.related, vec![PathBuf::from("records/contacts/b.md")]);
4844    }
4845
4846    #[test]
4847    fn dup_id_not_fired_in_working_set() {
4848        // DUP_* is an --all-only cross-file check; the working set must not run it.
4849        let fx = Fixture::new();
4850        fx.write(
4851            "records/contacts/a.md",
4852            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
4853        );
4854        fx.write(
4855            "records/contacts/b.md",
4856            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
4857        );
4858        // Log says both changed since epoch, so they're in the working set.
4859        fx.write(
4860            "log.md",
4861            "---\ntype: log\n---\n\n## [2026-05-22 10:00] create | records/contacts/a\nx\n\n## [2026-05-22 10:01] create | records/contacts/b\nx\n",
4862        );
4863        let issues = validate_working_set(&fx.store(), None).unwrap();
4864        assert!(
4865            !has(&issues, codes::DUP_ID),
4866            "DUP_ID is --all only: {issues:#?}"
4867        );
4868    }
4869
4870    #[test]
4871    fn dup_unique_key_single_field_is_warning() {
4872        let mut fx = Fixture::new();
4873        // contact declares `- unique: email`.
4874        fx.config.schemas.insert(
4875            "contact".into(),
4876            Schema {
4877                unique_keys: vec![vec!["email".into()]],
4878                ..Default::default()
4879            },
4880        );
4881        for (f, name) in [("a", "A"), ("b", "B")] {
4882            fx.write(
4883                &format!("records/contacts/{f}.md"),
4884                &format!("---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: s\nname: {name}\nemail: dup@x.com\n---\n\n# {name}\n"),
4885            );
4886        }
4887        let issues = fx.store_all();
4888        // One issue per group (rule #1), keyed on the smallest path, anchored to
4889        // the single `email` field.
4890        assert_eq!(count(&issues, codes::DUP_UNIQUE_KEY), 1);
4891        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
4892        assert_eq!(dup.severity, Severity::Warning);
4893        assert_eq!(dup.file, PathBuf::from("records/contacts/a.md"));
4894        assert_eq!(dup.key.as_deref(), Some("email"));
4895        assert_eq!(dup.related, vec![PathBuf::from("records/contacts/b.md")]);
4896    }
4897
4898    #[test]
4899    fn dup_unique_key_compound_and_clean_when_one_field_differs() {
4900        let mut fx = Fixture::new();
4901        // expense declares `- unique: date, amount, vendor` (a compound key).
4902        fx.config.schemas.insert(
4903            "expense".into(),
4904            Schema {
4905                unique_keys: vec![vec!["date".into(), "amount".into(), "vendor".into()]],
4906                ..Default::default()
4907            },
4908        );
4909        fx.write("records/companies/acme.md", "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: c\nname: Acme\n---\n# A\n");
4910        let exp = |f: &str, amount: &str| {
4911            format!(
4912            "---\ntype: expense\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: e\ndate: 2026-05-01\namount: {amount}\nvendor: \"[[records/companies/acme]]\"\n---\n\n# {f}\n"
4913        )
4914        };
4915        fx.write("records/expenses/e1.md", &exp("e1", "100"));
4916        fx.write("records/expenses/e2.md", &exp("e2", "100"));
4917        fx.write("records/expenses/e3.md", &exp("e3", "200")); // different amount
4918        let issues = fx.store_all();
4919        // One issue for the e1+e2 group (rule #1), keyed on the smallest path
4920        // (e1) with e2 in `related`; e3 differs on amount and never appears.
4921        assert_eq!(
4922            count(&issues, codes::DUP_UNIQUE_KEY),
4923            1,
4924            "only e1+e2 collide, one issue: {issues:#?}"
4925        );
4926        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
4927        assert_eq!(dup.file, PathBuf::from("records/expenses/e1.md"));
4928        assert_eq!(
4929            dup.line,
4930            Some(1),
4931            "compound-key collision anchors to line 1"
4932        );
4933        assert_eq!(dup.related, vec![PathBuf::from("records/expenses/e2.md")]);
4934        assert!(
4935            !issues.iter().any(|i| i.code == codes::DUP_UNIQUE_KEY
4936                && i.related.contains(&PathBuf::from("records/expenses/e3.md"))),
4937            "e3 differs on amount and must not collide: {issues:#?}"
4938        );
4939    }
4940
4941    #[test]
4942    fn dup_unique_key_list_field_is_order_independent() {
4943        let mut fx = Fixture::new();
4944        // meeting declares `- unique: date, attendees`; the list field is a set.
4945        fx.config.schemas.insert(
4946            "meeting".into(),
4947            Schema {
4948                unique_keys: vec![vec!["date".into(), "attendees".into()]],
4949                ..Default::default()
4950            },
4951        );
4952        fx.write("records/contacts/a.md", &valid_contact("a"));
4953        fx.write("records/contacts/b.md", &valid_contact("b"));
4954        let m = |f: &str, order: &str| {
4955            let attendees = if order == "ab" {
4956                "  - [[records/contacts/a]]\n  - [[records/contacts/b]]"
4957            } else {
4958                "  - [[records/contacts/b]]\n  - [[records/contacts/a]]"
4959            };
4960            format!(
4961                "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nattendees:\n{attendees}\n---\n\n# {f}\n"
4962            )
4963        };
4964        fx.write("records/meetings/m1.md", &m("m1", "ab"));
4965        fx.write("records/meetings/m2.md", &m("m2", "ba"));
4966        let issues = fx.store_all();
4967        // The attendee SET is order-independent, so m1 (ab) and m2 (ba) collide
4968        // → a single issue on the smaller path.
4969        assert_eq!(
4970            count(&issues, codes::DUP_UNIQUE_KEY),
4971            1,
4972            "same date + same attendee set (any order) collide as one issue: {issues:#?}"
4973        );
4974        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
4975        assert_eq!(dup.file, PathBuf::from("records/meetings/m1.md"));
4976        assert_eq!(dup.related, vec![PathBuf::from("records/meetings/m2.md")]);
4977    }
4978
4979    // ── indexes ───────────────────────────────────────────────────────────────
4980
4981    #[test]
4982    fn missing_indexes_at_all_three_levels() {
4983        let fx = Fixture::new();
4984        fx.write("records/contacts/a.md", &valid_contact("a"));
4985        let issues = fx.store_all();
4986        // root, layer (records), and type-folder (records/contacts) all missing.
4987        // The type-folder INDEX_MISSING is keyed on the FOLDER path (not its
4988        // would-be index.md), per the field convention `EXPECTED` pins.
4989        let missing_files: BTreeSet<PathBuf> = issues
4990            .iter()
4991            .filter(|i| i.code == codes::INDEX_MISSING)
4992            .map(|i| i.file.clone())
4993            .collect();
4994        assert!(
4995            missing_files.contains(&PathBuf::from("index.md")),
4996            "{issues:#?}"
4997        );
4998        assert!(
4999            missing_files.contains(&PathBuf::from("records/index.md")),
5000            "{issues:#?}"
5001        );
5002        assert!(
5003            missing_files.contains(&PathBuf::from("records/contacts")),
5004            "{issues:#?}"
5005        );
5006        // When the index.md is entirely absent we do NOT additionally fire
5007        // INDEX_JSONL_MISSING — one INDEX_MISSING covers the folder (rule #4).
5008        assert!(!has(&issues, codes::INDEX_JSONL_MISSING), "{issues:#?}");
5009    }
5010
5011    #[test]
5012    fn index_stale_entry_and_missing_entry() {
5013        let fx = Fixture::new();
5014        fx.write(
5015            "records/contacts/present.md",
5016            &valid_contact("present contact"),
5017        );
5018        // Indexes for the parents (root/layer) present so we isolate type-folder.
5019        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5020        fx.write(
5021            "records/index.md",
5022            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5023        );
5024        // Type-folder index lists a GHOST (stale) and omits `present` (missing).
5025        fx.write(
5026            "records/contacts/index.md",
5027            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/ghost]] — gone\n",
5028        );
5029        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/present.md\",\"type\":\"contact\",\"summary\":\"present contact\"}\n");
5030        let issues = fx.store_all();
5031        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
5032        assert!(stale.message.contains("ghost"));
5033        assert!(stale.is_error());
5034        let missing = find(&issues, codes::INDEX_MISSING_ENTRY);
5035        assert!(
5036            missing.message.contains("present.md"),
5037            "{}",
5038            missing.message
5039        );
5040    }
5041
5042    #[test]
5043    fn index_md_entry_with_traversal_path_is_stale_not_probe() {
5044        let fx = Fixture::new();
5045        fx.write("records/contacts/a.md", &valid_contact("a"));
5046        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5047        fx.write(
5048            "records/index.md",
5049            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5050        );
5051        fx.write(
5052            "records/contacts/index.md",
5053            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/../../ghost]] — unsafe\n",
5054        );
5055        fx.write(
5056            "records/contacts/index.jsonl",
5057            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5058        );
5059        let issues = fx.store_all();
5060        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
5061        assert!(stale.message.contains("not a safe store-relative path"));
5062    }
5063
5064    #[test]
5065    fn index_summary_mismatch() {
5066        let fx = Fixture::new();
5067        fx.write("records/contacts/a.md", &valid_contact("the real summary"));
5068        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5069        fx.write(
5070            "records/index.md",
5071            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5072        );
5073        fx.write(
5074            "records/contacts/index.md",
5075            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a STALE summary\n",
5076        );
5077        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"the real summary\"}\n");
5078        let issues = fx.store_all();
5079        let issue = find(&issues, codes::INDEX_SUMMARY_MISMATCH);
5080        assert!(issue.is_error());
5081        assert_eq!(issue.related, vec![PathBuf::from("records/contacts/a.md")]);
5082    }
5083
5084    #[test]
5085    fn index_summary_match_passes() {
5086        let fx = Fixture::new();
5087        fx.write("records/contacts/a.md", &valid_contact("matching summary"));
5088        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5089        fx.write(
5090            "records/index.md",
5091            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5092        );
5093        fx.write(
5094            "records/contacts/index.md",
5095            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — matching summary\n",
5096        );
5097        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"matching summary\"}\n");
5098        let issues = fx.store_all();
5099        assert!(!has(&issues, codes::INDEX_SUMMARY_MISMATCH), "{issues:#?}");
5100    }
5101
5102    #[test]
5103    fn index_entry_with_tag_suffix_matches_summary() {
5104        let fx = Fixture::new();
5105        fx.write("records/contacts/a.md", &valid_contact("clean summary"));
5106        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5107        fx.write(
5108            "records/index.md",
5109            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5110        );
5111        // Entry carries the renderer's `  ·  #tag` suffix (the EXACT double-spaced
5112        // delimiter `crate::index::format_md_entry` emits for a tagged file),
5113        // which must be stripped before comparing against the file's summary.
5114        fx.write(
5115            "records/contacts/index.md",
5116            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — clean summary  ·  #customer\n",
5117        );
5118        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"clean summary\"}\n");
5119        let issues = fx.store_all();
5120        assert!(
5121            !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
5122            "tag suffix should be stripped: {issues:#?}"
5123        );
5124    }
5125
5126    #[test]
5127    fn index_entry_single_spaced_middot_tail_is_part_of_summary() {
5128        // Regression (the finding): a tagless file whose `summary` legitimately
5129        // ends in a single-spaced ` · #word` tail round-trips through `index
5130        // rebuild` verbatim (the renderer appends NO `  ·  #tag` block, since the
5131        // file has no tags). The validator must NOT mistake that single-spaced
5132        // tail for the renderer's tag suffix, or it reports a spurious — and
5133        // unfixable — INDEX_SUMMARY_MISMATCH on a freshly rebuilt store.
5134        let fx = Fixture::new();
5135        fx.write(
5136            "records/contacts/a.md",
5137            &valid_contact("Standup notes · #standup"),
5138        );
5139        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5140        fx.write(
5141            "records/index.md",
5142            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5143        );
5144        fx.write(
5145            "records/contacts/index.md",
5146            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — Standup notes · #standup\n",
5147        );
5148        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"Standup notes · #standup\"}\n");
5149        let issues = fx.store_all();
5150        assert!(
5151            !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
5152            "a single-spaced middot tail is part of the summary, not a tag block: {issues:#?}"
5153        );
5154    }
5155
5156    #[test]
5157    fn index_jsonl_desync_missing_file_in_jsonl() {
5158        let fx = Fixture::new();
5159        fx.write("records/contacts/a.md", &valid_contact("a"));
5160        fx.write("records/contacts/b.md", &valid_contact("b"));
5161        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (2 files)\n");
5162        fx.write(
5163            "records/index.md",
5164            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5165        );
5166        fx.write(
5167            "records/contacts/index.md",
5168            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n- [[records/contacts/b]] — b\n",
5169        );
5170        // jsonl only lists `a` → `b` is a desync (the twin must be complete).
5171        fx.write(
5172            "records/contacts/index.jsonl",
5173            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5174        );
5175        let issues = fx.store_all();
5176        let desync = find(&issues, codes::INDEX_JSONL_DESYNC);
5177        assert!(desync.message.contains("b.md"), "{}", desync.message);
5178    }
5179
5180    #[test]
5181    fn index_jsonl_desync_record_points_at_missing_file() {
5182        let fx = Fixture::new();
5183        fx.write("records/contacts/a.md", &valid_contact("a"));
5184        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5185        fx.write(
5186            "records/index.md",
5187            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5188        );
5189        fx.write(
5190            "records/contacts/index.md",
5191            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
5192        );
5193        fx.write(
5194            "records/contacts/index.jsonl",
5195            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
5196        );
5197        let issues = fx.store_all();
5198        assert!(
5199            issues
5200                .iter()
5201                .any(|i| i.code == codes::INDEX_JSONL_DESYNC && i.message.contains("ghost.md")),
5202            "{issues:#?}"
5203        );
5204    }
5205
5206    #[test]
5207    fn index_jsonl_record_with_traversal_path_is_desync_not_probe() {
5208        let fx = Fixture::new();
5209        fx.write("records/contacts/a.md", &valid_contact("a"));
5210        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5211        fx.write(
5212            "records/index.md",
5213            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5214        );
5215        fx.write(
5216            "records/contacts/index.md",
5217            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
5218        );
5219        fx.write(
5220            "records/contacts/index.jsonl",
5221            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/../../ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
5222        );
5223        let issues = fx.store_all();
5224        assert!(
5225            issues.iter().any(|i| i.code == codes::INDEX_JSONL_DESYNC
5226                && i.message.contains("not a safe store-relative path")),
5227            "{issues:#?}"
5228        );
5229    }
5230
5231    #[test]
5232    fn index_jsonl_stale_summary() {
5233        let fx = Fixture::new();
5234        fx.write("records/contacts/a.md", &valid_contact("real summary"));
5235        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5236        fx.write(
5237            "records/index.md",
5238            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5239        );
5240        fx.write(
5241            "records/contacts/index.md",
5242            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — real summary\n",
5243        );
5244        // jsonl summary disagrees with the file frontmatter.
5245        fx.write(
5246            "records/contacts/index.jsonl",
5247            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"OUTDATED\"}\n",
5248        );
5249        let issues = fx.store_all();
5250        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5251        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
5252        assert!(stale.key.as_deref().unwrap().contains("summary"));
5253    }
5254
5255    /// The whole point of `INDEX_JSONL_STALE`: a sidecar field the query/search
5256    /// path actually reads (`email`, `domain`, the `(date,amount,vendor)` dedup
5257    /// tuple, `tags`, `updated`, `links`, `company` …) that disagrees with the
5258    /// `.md` is STALE — even when `summary` and `type` are perfectly correct.
5259    /// Pre-fix the validator only diffed summary+type, so a sidecar with a wrong
5260    /// `email` validated clean and answered `--where email=…` with a phantom
5261    /// value present in no file. This is the direct regression guard.
5262    #[test]
5263    fn index_jsonl_stale_queryable_field_email() {
5264        let fx = Fixture::new();
5265        let contact = "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"a contact\"\nname: A\nemail: real@correct.com\n---\n\n# A\n";
5266        fx.write("records/contacts/a.md", contact);
5267        // Start from the canonical, fully-correct sidecar set …
5268        fx.rebuild_indexes();
5269        let jsonl_path = fx.dir.path().join("records/contacts/index.jsonl");
5270        let good = fs::read_to_string(&jsonl_path).unwrap();
5271        // sanity: the canonical store is clean (no STALE on a fresh rebuild).
5272        assert!(
5273            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
5274            "freshly-rebuilt sidecar must not be stale"
5275        );
5276        // … then desync ONLY the email so it's the single differing field.
5277        assert!(
5278            good.contains("real@correct.com"),
5279            "sidecar projects email: {good}"
5280        );
5281        fx.write(
5282            "records/contacts/index.jsonl",
5283            &good.replace("real@correct.com", "STALE-WRONG@evil.com"),
5284        );
5285
5286        let issues = fx.store_all();
5287        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5288        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
5289        // The mismatch is reported precisely on `email`, and summary/type — which
5290        // still match — are NOT named.
5291        let key = stale.key.as_deref().unwrap();
5292        assert!(
5293            key.contains("email"),
5294            "expected `email` in stale key, got {key:?}"
5295        );
5296        assert!(!key.contains("summary"), "summary still matches: {key:?}");
5297        assert!(!key.contains("type"), "type still matches: {key:?}");
5298    }
5299
5300    /// Broaden the guard across the typed/list/timestamp projections at once:
5301    /// a wrong `tags`, `updated`, and a custom dedup field (`amount`) are each
5302    /// caught, with all three named in one issue.
5303    #[test]
5304    fn index_jsonl_stale_typed_and_list_fields() {
5305        let fx = Fixture::new();
5306        let expense = "---\ntype: expense\ncreated: 2026-05-20T08:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"office chairs\"\ntags: [furniture, q2]\namount: 1299\nvendor: Acme\ndate: 2026-05-20\n---\n\n# Expense\n";
5307        fx.write("records/expenses/e.md", expense);
5308        fx.rebuild_indexes();
5309        let jsonl_path = fx.dir.path().join("records/expenses/index.jsonl");
5310        let good = fs::read_to_string(&jsonl_path).unwrap();
5311        assert!(
5312            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
5313            "freshly-rebuilt sidecar must not be stale"
5314        );
5315        // Desync a list field (tags), a timestamp (updated), and a number (amount).
5316        let stale_line = good
5317            .replace("\"q2\"", "\"WRONG-TAG\"")
5318            .replace("2026-05-22T10:00:00-07:00", "2099-01-01T00:00:00-07:00")
5319            .replace("1299", "9999");
5320        fx.write("records/expenses/index.jsonl", &stale_line);
5321
5322        let issues = fx.store_all();
5323        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5324        let key = stale.key.as_deref().unwrap();
5325        for expected in ["amount", "tags", "updated"] {
5326            assert!(
5327                key.contains(expected),
5328                "expected `{expected}` in stale key, got {key:?}"
5329            );
5330        }
5331    }
5332
5333    #[test]
5334    fn index_orphan_in_noncanonical_folder() {
5335        let fx = Fixture::new();
5336        fx.write("records/contacts/a.md", &valid_contact("a"));
5337        // Build the canonical indexes so they aren't reported as orphans.
5338        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5339        fx.write(
5340            "records/index.md",
5341            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5342        );
5343        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
5344        fx.write(
5345            "records/contacts/index.jsonl",
5346            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5347        );
5348        // An index.md inside a sub-sub-folder (operator territory) is an orphan.
5349        fx.write(
5350            "records/contacts/subfolder/index.md",
5351            "---\ntype: index\nscope: type-folder\n---\n\n# stray\n",
5352        );
5353        let issues = fx.store_all();
5354        let orphan = find(&issues, codes::INDEX_ORPHAN);
5355        assert_eq!(orphan.severity, Severity::Warning);
5356        assert_eq!(
5357            orphan.file,
5358            PathBuf::from("records/contacts/subfolder/index.md")
5359        );
5360    }
5361
5362    #[test]
5363    fn index_wrong_scope() {
5364        let fx = Fixture::new();
5365        fx.write("records/contacts/a.md", &valid_contact("a"));
5366        // Root index declares the wrong scope.
5367        fx.write("index.md", "---\ntype: index\nscope: layer\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5368        fx.write(
5369            "records/index.md",
5370            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5371        );
5372        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
5373        fx.write(
5374            "records/contacts/index.jsonl",
5375            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5376        );
5377        let issues = fx.store_all();
5378        let issue = find(&issues, codes::INDEX_WRONG_SCOPE);
5379        assert_eq!(issue.severity, Severity::Warning);
5380        assert_eq!(issue.file, PathBuf::from("index.md"));
5381    }
5382
5383    #[test]
5384    fn capped_type_folder_index_does_not_flag_missing_entries() {
5385        // Over the 500-entry cap, omitted entries are expected, not an error.
5386        let fx = Fixture::new();
5387        for i in 0..501 {
5388            fx.write(
5389                &format!("records/contacts/c{i:04}.md"),
5390                &valid_contact(&format!("contact {i}")),
5391            );
5392        }
5393        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (501 files)\n");
5394        fx.write(
5395            "records/index.md",
5396            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5397        );
5398        // Type-folder index lists only ONE entry + a More footer.
5399        fx.write(
5400            "records/contacts/index.md",
5401            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/c0000]] — contact 0\n\n## More\n\nThis folder has 501 files.\n",
5402        );
5403        // jsonl must still be complete — write all 501 lines.
5404        let mut jsonl = String::new();
5405        for i in 0..501 {
5406            jsonl.push_str(&format!(
5407                "{{\"path\":\"records/contacts/c{i:04}.md\",\"type\":\"contact\",\"summary\":\"contact {i}\"}}\n"
5408            ));
5409        }
5410        fx.write("records/contacts/index.jsonl", &jsonl);
5411        let issues = fx.store_all();
5412        assert!(
5413            !has(&issues, codes::INDEX_MISSING_ENTRY),
5414            "over the cap, missing browse entries are expected: {issues:#?}"
5415        );
5416        // But the jsonl is complete → no desync.
5417        assert!(
5418            !has(&issues, codes::INDEX_JSONL_DESYNC),
5419            "{:#?}",
5420            issues
5421                .iter()
5422                .filter(|i| i.code == codes::INDEX_JSONL_DESYNC)
5423                .collect::<Vec<_>>()
5424        );
5425    }
5426
5427    // ── log ────────────────────────────────────────────────────────────────
5428
5429    #[test]
5430    fn log_bad_timestamp_unknown_kind_out_of_order() {
5431        let fx = Fixture::new();
5432        fx.write(
5433            "log.md",
5434            concat!(
5435                "---\ntype: log\n---\n\n# Log\n\n",
5436                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5437                "## [2026-05-27 09:00] update | records/contacts/b\nx\n\n", // out of order
5438                "## [2026-05-27 11:00] frobnicate | records/contacts/c\nx\n\n", // unknown kind
5439                "## [not-a-date] create | records/contacts/d\nx\n",         // bad timestamp
5440            ),
5441        );
5442        let issues = fx.store_all();
5443        assert!(has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
5444        assert_eq!(
5445            find(&issues, codes::LOG_OUT_OF_ORDER).severity,
5446            Severity::Warning
5447        );
5448        let unknown = find(&issues, codes::LOG_UNKNOWN_KIND);
5449        assert_eq!(unknown.severity, Severity::Warning);
5450        assert!(unknown.message.contains("frobnicate"));
5451        assert!(unknown
5452            .suggestion
5453            .as_deref()
5454            .is_some_and(|s| s.contains("create")));
5455        let bad = find(&issues, codes::LOG_BAD_TIMESTAMP);
5456        assert!(bad.is_error());
5457    }
5458
5459    #[test]
5460    fn log_validate_entry_without_object_is_well_formed() {
5461        let fx = Fixture::new();
5462        fx.write(
5463            "log.md",
5464            "---\ntype: log\n---\n\n## [2026-05-27 10:00] validate\nPASS\n",
5465        );
5466        let issues = fx.store_all();
5467        assert!(!has(&issues, codes::LOG_BAD_TIMESTAMP), "{issues:#?}");
5468        assert!(!has(&issues, codes::LOG_UNKNOWN_KIND), "{issues:#?}");
5469    }
5470
5471    #[test]
5472    fn log_in_order_is_clean() {
5473        let fx = Fixture::new();
5474        fx.write(
5475            "log.md",
5476            concat!(
5477                "---\ntype: log\n---\n\n",
5478                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5479                "## [2026-05-27 10:05] update | records/contacts/a\nx\n",
5480            ),
5481        );
5482        let issues = fx.store_all();
5483        assert!(!has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
5484    }
5485
5486    #[test]
5487    fn log_not_checked_in_working_set() {
5488        // log.md ordering is an --all-only check.
5489        let fx = Fixture::new();
5490        fx.write(
5491            "log.md",
5492            concat!(
5493                "---\ntype: log\n---\n\n",
5494                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5495                "## [2026-05-27 09:00] update | records/contacts/a\nx\n",
5496            ),
5497        );
5498        let issues = validate_working_set(&fx.store(), None).unwrap();
5499        assert!(
5500            !has(&issues, codes::LOG_OUT_OF_ORDER),
5501            "log ordering is --all only: {issues:#?}"
5502        );
5503    }
5504
5505    // ── working-set scoping ───────────────────────────────────────────────────
5506
5507    #[test]
5508    fn working_set_validates_only_changed_files() {
5509        let fx = Fixture::new();
5510        // `dirty` has a bad timestamp; `clean_but_unlogged` also does but is NOT
5511        // in the log → working set must skip it.
5512        fx.write(
5513            "records/contacts/dirty.md",
5514            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5515        );
5516        fx.write(
5517            "records/contacts/unlogged.md",
5518            "---\ntype: contact\ncreated: ALSO-BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5519        );
5520        fx.write(
5521            "log.md",
5522            "---\ntype: log\n---\n\n## [2026-05-22 10:00] update | records/contacts/dirty\nedited\n",
5523        );
5524        let issues = validate_working_set(&fx.store(), None).unwrap();
5525        assert!(
5526            issues.iter().any(|i| i.code == codes::FM_BAD_TIMESTAMP
5527                && i.file == Path::new("records/contacts/dirty.md")),
5528            "{issues:#?}"
5529        );
5530        assert!(
5531            !issues
5532                .iter()
5533                .any(|i| i.file == Path::new("records/contacts/unlogged.md")),
5534            "unlogged file must not be in the working set: {issues:#?}"
5535        );
5536    }
5537
5538    #[test]
5539    fn working_set_includes_incoming_linkers_to_changed_path() {
5540        let fx = Fixture::new();
5541        // `changed` was renamed/removed (logged). `linker` points at it with a
5542        // now-broken link and was NOT itself logged — but must be pulled in.
5543        fx.write(
5544            "wiki/people/linker.md",
5545            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: links to a removed page\n---\n\nSee [[records/contacts/changed]].\n",
5546        );
5547        // `changed.md` does NOT exist on disk (removed).
5548        fx.write(
5549            "log.md",
5550            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/changed\nremoved\n",
5551        );
5552        let issues = validate_working_set(&fx.store(), None).unwrap();
5553        assert!(
5554            issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN
5555                && i.file == Path::new("wiki/people/linker.md")),
5556            "incoming linker to a removed path must be validated: {issues:#?}"
5557        );
5558    }
5559
5560    #[test]
5561    fn working_set_respects_explicit_since_cutoff() {
5562        let fx = Fixture::new();
5563        fx.write(
5564            "records/contacts/old.md",
5565            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5566        );
5567        fx.write(
5568            "records/contacts/new.md",
5569            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5570        );
5571        fx.write(
5572            "log.md",
5573            concat!(
5574                "---\ntype: log\n---\n\n",
5575                "## [2026-05-20 10:00] update | records/contacts/old\nx\n\n",
5576                "## [2026-05-25 10:00] update | records/contacts/new\nx\n",
5577            ),
5578        );
5579        // Cutoff after `old` but before `new`.
5580        let since = DateTime::parse_from_rfc3339("2026-05-22T00:00:00+00:00").unwrap();
5581        let issues = validate_working_set(&fx.store(), Some(since)).unwrap();
5582        assert!(
5583            issues
5584                .iter()
5585                .any(|i| i.file == Path::new("records/contacts/new.md")),
5586            "{issues:#?}"
5587        );
5588        assert!(
5589            !issues
5590                .iter()
5591                .any(|i| i.file == Path::new("records/contacts/old.md")),
5592            "old change is before the cutoff: {issues:#?}"
5593        );
5594    }
5595
5596    #[test]
5597    fn working_set_default_since_is_last_validate_entry() {
5598        let fx = Fixture::new();
5599        // `before` changed before the last validate; `after` changed after.
5600        fx.write(
5601            "records/contacts/before.md",
5602            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5603        );
5604        fx.write(
5605            "records/contacts/after.md",
5606            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5607        );
5608        fx.write(
5609            "log.md",
5610            concat!(
5611                "---\ntype: log\n---\n\n",
5612                "## [2026-05-20 10:00] update | records/contacts/before\nx\n\n",
5613                "## [2026-05-21 10:00] validate\nPASS\n\n",
5614                "## [2026-05-22 10:00] update | records/contacts/after\nx\n",
5615            ),
5616        );
5617        let issues = validate_working_set(&fx.store(), None).unwrap();
5618        assert!(
5619            issues
5620                .iter()
5621                .any(|i| i.file == Path::new("records/contacts/after.md")),
5622            "{issues:#?}"
5623        );
5624        assert!(
5625            !issues
5626                .iter()
5627                .any(|i| i.file == Path::new("records/contacts/before.md")),
5628            "change before the last validate entry is outside the default window: {issues:#?}"
5629        );
5630    }
5631
5632    // ── ordering / determinism ────────────────────────────────────────────────
5633
5634    #[test]
5635    fn issues_are_sorted_by_file_then_line() {
5636        let fx = Fixture::new();
5637        fx.write("wiki/people/z.md", "---\ntype: wiki-page\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
5638        fx.write("wiki/people/a.md", "---\ntype: wiki-page\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
5639        let issues = fx.store_all();
5640        let files: Vec<&PathBuf> = issues.iter().map(|i| &i.file).collect();
5641        let mut sorted = files.clone();
5642        sorted.sort();
5643        assert_eq!(
5644            files, sorted,
5645            "issues must be emitted in a stable file order"
5646        );
5647    }
5648
5649    // ── boundaries: codes validate must NOT emit ──────────────────────────────
5650
5651    #[test]
5652    fn frozen_page_is_not_a_validate_error() {
5653        // POLICY_FROZEN_PAGE is a *write-time* refusal, never a validate finding.
5654        // A clean file listed in `### Frozen pages` must validate clean.
5655        let mut fx = Fixture::new();
5656        fx.config
5657            .frozen_pages
5658            .push(PathBuf::from("records/decisions/d.md"));
5659        fx.write(
5660            "records/decisions/d.md",
5661            "---\ntype: decision\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a finalized decision\n---\n\n# D\n",
5662        );
5663        let issues = fx.store_all();
5664        assert!(
5665            !has(&issues, codes::POLICY_FROZEN_PAGE),
5666            "frozen pages are enforced at write-time, not by validate: {issues:#?}"
5667        );
5668    }
5669
5670    #[test]
5671    fn wiki_link_ambiguous_is_never_emitted_under_full_path_doctrine() {
5672        // The full-path doctrine makes ambiguity impossible; the defensive code
5673        // must never fire on a normal store.
5674        let fx = Fixture::new();
5675        fx.write("records/contacts/sarah-chen.md", &valid_contact("sarah"));
5676        let mut body = valid_contact("links to sarah");
5677        body.push_str("\nSee [[records/contacts/sarah-chen]].\n");
5678        fx.write("wiki/people/p.md", &body);
5679        let issues = fx.store_all();
5680        assert!(!has(&issues, codes::WIKI_LINK_AMBIGUOUS), "{issues:#?}");
5681    }
5682
5683    // ── unknown-type / unknown-field passthrough ──────────────────────────────
5684
5685    #[test]
5686    fn unknown_type_passes_through() {
5687        // A custom type is ambient context: it has a `type`, so no
5688        // FM_MISSING_TYPE, and with no matching schema there are no schema
5689        // errors. Only the universal contract (summary, timestamps) applies.
5690        let fx = Fixture::new();
5691        fx.write(
5692            "records/proposals/x.md",
5693            "---\ntype: proposal\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a proposal\ncustom_field: anything\nbudget: 5000\n---\n\n# Proposal\n",
5694        );
5695        let issues = fx.store_all();
5696        assert!(!has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
5697        assert!(!has(&issues, codes::SCHEMA_MISSING_REQUIRED), "{issues:#?}");
5698        assert!(!has(&issues, codes::SCHEMA_SHAPE_MISMATCH), "{issues:#?}");
5699        // The unknown fields don't trip anything.
5700        assert!(
5701            !issues
5702                .iter()
5703                .any(|i| i.key.as_deref() == Some("custom_field")
5704                    || i.key.as_deref() == Some("budget")),
5705            "unknown fields are ambient context: {issues:#?}"
5706        );
5707    }
5708
5709    // ── find_links_to prefix-collision safety (working set) ───────────────────
5710
5711    #[test]
5712    fn incoming_linker_scan_does_not_prefix_match() {
5713        // A changed `records/contacts/sarah` must NOT pull in a file that only
5714        // links to `records/contacts/sarah-chen` (a longer path sharing a prefix).
5715        let fx = Fixture::new();
5716        fx.write(
5717            "wiki/people/only-sarah-chen.md",
5718            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
5719        );
5720        // The log says `records/contacts/sarah` (the shorter path) changed.
5721        fx.write(
5722            "log.md",
5723            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah\nremoved\n",
5724        );
5725        let issues = validate_working_set(&fx.store(), None).unwrap();
5726        assert!(
5727            !issues
5728                .iter()
5729                .any(|i| i.file == Path::new("wiki/people/only-sarah-chen.md")),
5730            "a prefix-sharing link must not pull a file into the working set: {issues:#?}"
5731        );
5732    }
5733
5734    #[test]
5735    fn incoming_linker_scan_pulls_in_catalog_index_md() {
5736        // CONTRACT: the working-set incoming-linker scan rides the embedded-
5737        // ripgrep `Store::find_links_to`, which scans EVERY `.md` (including
5738        // `index.md` catalogs) — NOT the walk-and-read over `walk_content_files`,
5739        // which excludes `index.md`. A type-folder `index.md` that lists a now-
5740        // deleted target must be pulled into the working set so its dangling
5741        // catalog entry is flagged `WIKI_LINK_BROKEN`. The old walk-and-read
5742        // implementation skipped `index.md` and let this broken link survive the
5743        // loop silently; this test fails if anyone reverts to that path.
5744        let fx = Fixture::new();
5745        // A catalog that still lists the deleted contact (a real, common stale
5746        // state after a `delete`). No other file references the target, so the
5747        // catalog is the ONLY incoming linker — if it isn't scanned, nothing is.
5748        fx.write(
5749            "records/contacts/index.md",
5750            "---\ntype: index\n---\n\n- [[records/contacts/sarah-chen]] — Sarah Chen\n",
5751        );
5752        // The log says `records/contacts/sarah-chen` was deleted.
5753        fx.write(
5754            "log.md",
5755            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n",
5756        );
5757        let issues = validate_working_set(&fx.store(), None).unwrap();
5758        assert!(
5759            issues
5760                .iter()
5761                .any(|i| i.file == Path::new("records/contacts/index.md")
5762                    && i.code == codes::WIKI_LINK_BROKEN),
5763            "the catalog `index.md` linking to the deleted target must be pulled \
5764             into the working set and flagged WIKI_LINK_BROKEN (proves the scan \
5765             uses embedded-ripgrep `Store::find_links_to`, not the index-skipping \
5766             walk-and-read): {issues:#?}"
5767        );
5768    }
5769
5770    #[test]
5771    fn incoming_linker_scan_covers_the_whole_changed_set_in_one_pass() {
5772        // CONTRACT (the O(changed × store) fix): the working-set scan finds
5773        // incoming linkers for EVERY changed object, and does so via the single
5774        // batch pass `Store::find_links_to_any` — not one full store read per
5775        // changed object. This test pins the behavior that makes the single-pass
5776        // correct: with two DISTINCT deleted targets, the linker to EACH is pulled
5777        // into the working set and flagged. A regression that scanned for only the
5778        // first/last changed object, or that dropped the batch union, would leave
5779        // one of the two broken links unreported and fail here.
5780        let fx = Fixture::new();
5781        // Linker A → deleted target #1 (in the body).
5782        fx.write(
5783            "wiki/people/refers-sarah.md",
5784            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
5785        );
5786        // Linker B → deleted target #2 (in a typed frontmatter field — an edge the
5787        // sidecar `links` projection would miss, which is why this must be a
5788        // content scan, not a sidecar read).
5789        fx.write(
5790            "records/meetings/2026/05/kickoff.md",
5791            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\ncompany: \"[[records/companies/acme]]\"\n---\n\n# Kickoff\n",
5792        );
5793        // The log says BOTH targets were deleted in this window.
5794        fx.write(
5795            "log.md",
5796            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n\n## [2026-05-22 10:05] delete | records/companies/acme\nremoved\n",
5797        );
5798
5799        let issues = validate_working_set(&fx.store(), None).unwrap();
5800        assert!(
5801            issues
5802                .iter()
5803                .any(|i| i.file == Path::new("wiki/people/refers-sarah.md")
5804                    && i.code == codes::WIKI_LINK_BROKEN),
5805            "linker to the FIRST deleted target must be pulled in and flagged: {issues:#?}"
5806        );
5807        assert!(
5808            issues.iter().any(
5809                |i| i.file == Path::new("records/meetings/2026/05/kickoff.md")
5810                    && i.code == codes::WIKI_LINK_BROKEN
5811            ),
5812            "linker to the SECOND deleted target (typed-field edge) must also be \
5813             pulled in and flagged — proves the scan covers the whole changed set, \
5814             not just one object: {issues:#?}"
5815        );
5816    }
5817
5818    #[test]
5819    fn frontmatter_block_sequence_links_each_get_their_own_line() {
5820        // Each block-sequence wiki-link reports on its own source line.
5821        let fx = Fixture::new();
5822        // Neither target exists → two WIKI_LINK_BROKEN, on different lines.
5823        fx.write(
5824            "records/meetings/m.md",
5825            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nparticipants:\n  - [[records/contacts/ghost1]]\n  - [[records/contacts/ghost2]]\n---\n\n# M\n",
5826        );
5827        let issues = fx.store_all();
5828        let broken_lines: BTreeSet<Option<u32>> = issues
5829            .iter()
5830            .filter(|i| i.code == codes::WIKI_LINK_BROKEN)
5831            .map(|i| i.line)
5832            .collect();
5833        assert_eq!(
5834            broken_lines.len(),
5835            2,
5836            "two distinct broken-link lines: {issues:#?}"
5837        );
5838    }
5839
5840    // ── Regression: null / non-scalar created/updated ────────────────────────
5841
5842    #[test]
5843    fn null_created_is_missing_not_silently_passed() {
5844        // Regression: a present-but-`null` `created:` previously slipped past
5845        // both FM_MISSING_CREATED (only `!contains_key` was checked) and
5846        // FM_BAD_TIMESTAMP (`scalar_string(null)` is None → branch no-oped).
5847        let fx = Fixture::new();
5848        fx.write(
5849            "records/contacts/a.md",
5850            "---\ntype: contact\ncreated:\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5851        );
5852        let issues = fx.store_all();
5853        assert!(
5854            has(&issues, codes::FM_MISSING_CREATED),
5855            "null `created:` must read as missing: {issues:#?}"
5856        );
5857    }
5858
5859    #[test]
5860    fn sequence_created_is_bad_timestamp() {
5861        // A non-scalar `created: [2026]` is not a timestamp string → FM_BAD_TIMESTAMP.
5862        let fx = Fixture::new();
5863        fx.write(
5864            "records/contacts/a.md",
5865            "---\ntype: contact\ncreated: [2026]\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5866        );
5867        let issues = fx.store_all();
5868        assert!(
5869            issues
5870                .iter()
5871                .any(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created")),
5872            "a sequence `created:` must be FM_BAD_TIMESTAMP: {issues:#?}"
5873        );
5874    }
5875
5876    // ── Regression: schema required null / empty-collection ──────────────────
5877
5878    #[test]
5879    fn required_field_null_or_empty_collection_is_missing() {
5880        // Regression: a plain required field (no shape/enum) holding YAML null
5881        // (`name:`), an empty list (`name: []`), or an empty mapping (`name: {}`)
5882        // previously validated with 0 issues — `scalar_string` returned None and
5883        // `.unwrap_or(false)` treated the value as non-empty.
5884        for value in ["", " []", " {}"] {
5885            let mut fx = Fixture::new();
5886            fx.config.schemas.insert(
5887                "contact".into(),
5888                Schema {
5889                    fields: vec![FieldSpec {
5890                        name: "name".into(),
5891                        required: true,
5892                        ..Default::default()
5893                    }],
5894                    ..Default::default()
5895                },
5896            );
5897            fx.write(
5898                "records/contacts/a.md",
5899                &format!(
5900                    "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname:{value}\n---\n\n# A\n"
5901                ),
5902            );
5903            let issues = fx.store_all();
5904            assert!(
5905                issues
5906                    .iter()
5907                    .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
5908                        && i.key.as_deref() == Some("name")),
5909                "required `name:{value}` must be SCHEMA_MISSING_REQUIRED: {issues:#?}"
5910            );
5911        }
5912    }
5913
5914    // ── Regression: WIKI_LINK_BROKEN on raw source files ─────────────────────
5915
5916    #[test]
5917    fn wiki_link_to_raw_source_file_resolves() {
5918        // Regression: a body link to a raw `.eml`/`.pdf` source kept verbatim
5919        // under `sources/` was flagged WIKI_LINK_BROKEN because the existence
5920        // probe only ever stat'd `{bare}.md`. It must resolve the literal path.
5921        let fx = Fixture::new();
5922        fx.write("sources/emails/2026-05-22-elena.eml", "raw email bytes\n");
5923        fx.write(
5924            "records/contacts/a.md",
5925            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\nSee [[sources/emails/2026-05-22-elena.eml]] for context.\n",
5926        );
5927        let issues = fx.store_all();
5928        assert!(
5929            !issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN),
5930            "a link to an existing raw source file must not be broken: {issues:#?}"
5931        );
5932    }
5933
5934    // ── Regression: unreadable (non-UTF-8) content file ──────────────────────
5935
5936    #[test]
5937    fn non_utf8_content_file_is_reported() {
5938        // Regression: a content file with invalid UTF-8 bytes made
5939        // check_content_file return None silently, so the store passed with exit
5940        // 0. It must surface FM_UNREADABLE instead of passing vacuously.
5941        let fx = Fixture::new();
5942        let abs = fx.dir.path().join("records/notes/corrupt.md");
5943        fs::create_dir_all(abs.parent().unwrap()).unwrap();
5944        fs::write(&abs, [0xFF, 0xFE, 0x00, 0x01]).unwrap();
5945        let issues = validate_working_set(&fx.store(), None).unwrap();
5946        assert!(
5947            has(&issues, codes::FM_UNREADABLE),
5948            "an unreadable content file must be reported, not silently skipped: {issues:#?}"
5949        );
5950    }
5951
5952    // ── Regression: code-fence char/run tracking ─────────────────────────────
5953
5954    #[test]
5955    fn tilde_fence_containing_backtick_fence_does_not_invert() {
5956        // Regression: a `~~~` block legally contains ``` lines (documenting a
5957        // backtick fence); a naive toggle inverted `in_fence` and checked the
5958        // demo `[[fake]]` inside the code block as a live link. The link inside
5959        // BOTH fences must be skipped.
5960        let body = "~~~markdown\n```\n[[fake-link]]\n```\n~~~\n";
5961        let links = extract_wiki_links(body);
5962        assert!(
5963            links.is_empty(),
5964            "wiki-link inside a nested code fence must be skipped: {links:?}"
5965        );
5966    }
5967
5968    // ── Regression: --all skips in-layer `log/` folder ───────────────────────
5969
5970    #[test]
5971    fn all_sweep_visits_in_layer_log_folder() {
5972        // Regression: `validate --all` pruned every dir named `log`, so a real
5973        // content folder like `records/log/` was invisible to the full sweep —
5974        // reporting FEWER errors than the default scope. A frontmatter-less file
5975        // there must still surface FM_MISSING_TYPE under --all.
5976        let fx = Fixture::new();
5977        fx.write("records/log/2026-06-01-pricing.md", "no frontmatter here\n");
5978        let issues = fx.store_all();
5979        assert!(
5980            has(&issues, codes::FM_MISSING_TYPE),
5981            "--all must validate files under an in-layer `log/` folder: {issues:#?}"
5982        );
5983    }
5984
5985    // ── Regression: flow-form list with whitespace ───────────────────────────
5986
5987    #[test]
5988    fn flow_form_link_list_with_spaces_is_flagged() {
5989        // Regression: `attendees: [ [[a]] ]` parses to the same nested-sequence
5990        // mis-encoding as `[[[a]]]` but evaded the literal `starts_with("[[[")`
5991        // text test. The value-based detector must catch the whitespace variant.
5992        let keys = detect_flow_form_link_lists("attendees: [ [[records/contacts/elena]] ]\n");
5993        assert!(
5994            keys.iter().any(|k| k == "attendees"),
5995            "spaced flow-form list must be detected: {keys:?}"
5996        );
5997    }
5998
5999    // ── Regression: INDEX_SUMMARY_MISMATCH middot tail ───────────────────────
6000
6001    #[test]
6002    fn middot_hashtag_summary_tail_round_trips() {
6003        // Regression: a tagless summary that legitimately ends in a single-spaced
6004        // ` · #word` tail round-trips through the renderer verbatim, but the loose
6005        // ` · ` strip mistook it for the tag block and reported a spurious,
6006        // unfixable INDEX_SUMMARY_MISMATCH. The strip must use the renderer's
6007        // exact double-spaced `  ·  ` delimiter.
6008        assert_eq!(
6009            extract_index_entry_summary("— Standup notes · #standup").as_deref(),
6010            Some("Standup notes · #standup"),
6011            "a single-spaced middot tail is part of the summary, not a tag block"
6012        );
6013        // The renderer's real double-spaced tag suffix IS still stripped.
6014        assert_eq!(
6015            extract_index_entry_summary("— Renewal champion  ·  #renewal #acme").as_deref(),
6016            Some("Renewal champion"),
6017            "the renderer's double-spaced `  ·  #tag` suffix is stripped"
6018        );
6019    }
6020
6021    // ── Regression: shape Url / Email edge cases ─────────────────────────────
6022
6023    #[test]
6024    fn url_shape_accepts_short_http_and_rejects_bare_scheme() {
6025        assert!(is_url("http://x"), "an 8-char http URL is valid");
6026        assert!(is_url("https://x"), "a 9-char https URL is valid");
6027        assert!(!is_url("http://"), "a bare scheme with no host is rejected");
6028        assert!(!is_url("https://"), "a bare https scheme is rejected");
6029    }
6030
6031    #[test]
6032    fn email_shape_rejects_double_at() {
6033        assert!(!is_email("sarah@@acme.com"), "double-@ domain is rejected");
6034        assert!(!is_email("a@b@c.com"), "two @ signs are rejected");
6035        assert!(is_email("sarah@acme.com"), "a normal address still passes");
6036    }
6037
6038    // ── Regression: working-set vs --all agree on log.md links ───────────────
6039
6040    #[test]
6041    fn working_set_does_not_flag_log_md_body_links() {
6042        // Regression: the working-set incoming-linker scan runs root `log.md`
6043        // through the body wiki-link check, flagging a historical `[[deleted]]`
6044        // mention as WIKI_LINK_BROKEN — an error `--all` never reports and that
6045        // the append-only log can't have "fixed". The root meta files must be
6046        // excluded from the body link check, matching --all.
6047        let fx = Fixture::new();
6048        fx.write("records/contacts/a.md", &valid_contact("A"));
6049        fx.write(
6050            "log.md",
6051            "---\ntype: log\n---\n\n## [2026-06-01 10:00] delete | records/contacts/ghost\n\nRemoved [[records/contacts/ghost]] per cleanup.\n",
6052        );
6053        let issues = validate_working_set(&fx.store(), None).unwrap();
6054        assert!(
6055            !issues
6056                .iter()
6057                .any(|i| i.code == codes::WIKI_LINK_BROKEN
6058                    && i.file == std::path::Path::new("log.md")),
6059            "a broken wiki-link inside append-only log.md must not be flagged: {issues:#?}"
6060        );
6061    }
6062
6063    // ── Regression: DB.md schema field lint ──────────────────────────────────
6064
6065    #[test]
6066    fn schema_duplicate_field_name_is_flagged() {
6067        let mut fx = Fixture::new();
6068        fx.config.schemas.insert(
6069            "contact".into(),
6070            Schema {
6071                fields: vec![
6072                    FieldSpec {
6073                        name: "name".into(),
6074                        required: true,
6075                        ..Default::default()
6076                    },
6077                    FieldSpec {
6078                        name: "name".into(),
6079                        ..Default::default()
6080                    },
6081                ],
6082                ..Default::default()
6083            },
6084        );
6085        let issues = fx.store_all();
6086        assert!(
6087            issues
6088                .iter()
6089                .any(|i| i.code == codes::DB_MD_SCHEMA_FIELD && i.key.as_deref() == Some("name")),
6090            "a duplicate schema field name must be flagged: {issues:#?}"
6091        );
6092    }
6093
6094    #[test]
6095    fn schema_unknown_modifier_is_info() {
6096        let mut fx = Fixture::new();
6097        fx.config.schemas.insert(
6098            "contact".into(),
6099            Schema {
6100                fields: vec![FieldSpec {
6101                    name: "name".into(),
6102                    unknown_modifiers: vec!["requierd".into()],
6103                    ..Default::default()
6104                }],
6105                ..Default::default()
6106            },
6107        );
6108        let issues = fx.store_all();
6109        assert!(
6110            issues.iter().any(|i| i.code == codes::DB_MD_SCHEMA_FIELD
6111                && i.severity == Severity::Info
6112                && i.key.as_deref() == Some("name")),
6113            "an unrecognized schema modifier must surface as Info: {issues:#?}"
6114        );
6115    }
6116
6117    /// Every code in `mod codes` must appear as a row in SPEC.md § Validation —
6118    /// the SPEC table is the declared "complete vocabulary" an agent branches on,
6119    /// and the module doc-comment promises this code implements "exactly those
6120    /// codes — no more, no fewer." This guards against the code/SPEC drift where a
6121    /// new validation code is added to the engine but never documented.
6122    #[test]
6123    fn every_code_constant_is_documented_in_spec() {
6124        // Parse the canonical constant *values* straight out of this module's
6125        // source, so a future `pub const X: &str = "X";` is covered with no test
6126        // edit. Format is uniform: `    pub const NAME: &str = "VALUE";`.
6127        let this_src = include_str!("validate.rs");
6128        let mut codes_in_module: Vec<String> = Vec::new();
6129        let mut in_codes_mod = false;
6130        for line in this_src.lines() {
6131            let t = line.trim();
6132            if t.starts_with("pub mod codes") {
6133                in_codes_mod = true;
6134                continue;
6135            }
6136            // The `mod codes` block ends at its closing brace at column 0.
6137            if in_codes_mod && line == "}" {
6138                break;
6139            }
6140            if in_codes_mod {
6141                if let Some(rest) = t.strip_prefix("pub const ") {
6142                    // rest = `NAME: &str = "VALUE";`
6143                    let value = rest
6144                        .split_once('=')
6145                        .map(|(_, v)| v.trim())
6146                        .and_then(|v| v.strip_prefix('"'))
6147                        .and_then(|v| v.strip_suffix("\";"))
6148                        .unwrap_or_else(|| panic!("unparseable code constant line: {line:?}"));
6149                    codes_in_module.push(value.to_string());
6150                }
6151            }
6152        }
6153        assert!(
6154            codes_in_module.len() >= 36,
6155            "parsed only {} code constants from `mod codes`; the parser likely \
6156             broke against a source-format change",
6157            codes_in_module.len()
6158        );
6159
6160        // SPEC.md lives at the repo root, two levels up from this crate's manifest.
6161        let spec_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../../SPEC.md");
6162        let spec = fs::read_to_string(&spec_path)
6163            .unwrap_or_else(|e| panic!("cannot read {}: {e}", spec_path.display()));
6164
6165        // Each code must appear as a SPEC § Validation table cell: `` | `CODE` | ``.
6166        let missing: Vec<&String> = codes_in_module
6167            .iter()
6168            .filter(|code| !spec.contains(&format!("| `{code}` |")))
6169            .collect();
6170        assert!(
6171            missing.is_empty(),
6172            "validation codes emitted by the engine but absent from SPEC.md \
6173             § Validation (the declared complete vocabulary): {missing:?}"
6174        );
6175    }
6176}