Skip to main content

dbmd_core/
validate.rs

1//! `validate` — the validation engine.
2//!
3//! The canonical issue-code vocabulary is **SPEC.md § Validation** (that table
4//! is the single source of truth). This module implements exactly those codes
5//! — no more, no fewer. If a code is added here it must be added to the SPEC
6//! table in the same change. The codes are exposed as the [`codes`] constants
7//! so call sites never spell a code as a bare string literal.
8//!
9//! **Two scopes.** [`validate_working_set`] is the loop default: content files
10//! changed since `since`, plus any file whose wiki-links target a changed path.
11//! The changed set and the per-file checks are O(changed); the incoming linkers
12//! are found by a *single* embedded-ripgrep pass over the store for the whole
13//! changed set at once ([`Store::find_links_to_any`], one scan — not a full read
14//! per changed object, and not the parse-the-tree walk `--all` does). On this
15//! changed-set path it never builds the global cross-file state.
16//!
17//! The **one** exception is the vacuous-pass guard: when the change log records
18//! no objects since the cutoff and no explicit `--since` was given (a fresh
19//! store, a missing/empty `log.md`, or external edits never logged), the default
20//! call falls back to a single per-file content sweep ([`Store::walk`]) so an
21//! externally edited or freshly copied store cannot pass validation vacuously.
22//! That fallback is O(store) by design; the O(changed) guarantee is about the
23//! normal post-write path, not this safety net.
24//!
25//! [`validate_all`] is the full SWEEP: it adds the checks that need the global
26//! cross-file state — entity-dedup `DUP_*`, every-index sync, and `log.md`
27//! ordering.
28//!
29//! ## Why this module is self-contained
30//!
31//! Validation does its own frontmatter split, YAML parse, wiki-link scan,
32//! log-header parse, and file walk here, reading only the two public,
33//! caller-populated fields of a [`Store`]: [`Store::root`] and
34//! [`Store::config`] — rather than routing through the sibling modules
35//! ([`crate::parser`], [`crate::store`], [`crate::log`], [`crate::index`]).
36//! Keeping the checks local lets the validator report precise, per-issue
37//! diagnostics (exact codes, file, and context) without coupling its output to
38//! incidental behavior of the shared readers; the public surface and the
39//! emitted issue vocabulary are the contract.
40
41use std::collections::{BTreeMap, BTreeSet, HashMap};
42use std::path::{Component, Path, PathBuf};
43
44use chrono::{DateTime, FixedOffset, NaiveDateTime};
45use serde_norway::Value;
46
47use crate::parser::{Schema, Shape};
48use crate::store::Store;
49
50/// Severity of a validation [`Issue`]. Any [`Severity::Error`] fails validation
51/// (non-zero exit); warnings and info do not.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum Severity {
54    /// Blocks: a hard violation of the format or doctrine.
55    Error,
56    /// A decision point the agent resolves at its discretion.
57    Warning,
58    /// Visibility only; never affects exit status.
59    Info,
60}
61
62/// A single structured validation finding. Agent-primary and machine-parseable
63/// via `--json`; `suggestion` is a deterministic remediation hint the agent
64/// applies without guessing.
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub struct Issue {
67    /// The severity; only [`Severity::Error`] fails validation.
68    pub severity: Severity,
69    /// The structured code, e.g. `"WIKI_LINK_SHORT_FORM"` — one of [`codes`].
70    pub code: &'static str,
71    /// The file the issue is about.
72    pub file: PathBuf,
73    /// The 1-based line, when applicable.
74    pub line: Option<u32>,
75    /// The frontmatter key, when the issue is about a specific field.
76    pub key: Option<String>,
77    /// A human-readable message.
78    pub message: String,
79    /// A deterministic remediation hint, when one exists.
80    pub suggestion: Option<String>,
81    /// Other files involved (e.g. the duplicate partner in a collision).
82    pub related: Vec<PathBuf>,
83}
84
85impl Issue {
86    /// True if this issue fails validation (i.e. its severity is
87    /// [`Severity::Error`]).
88    pub fn is_error(&self) -> bool {
89        matches!(self.severity, Severity::Error)
90    }
91}
92
93/// The canonical validation issue codes — one constant per row of the SPEC.md
94/// § Validation table. Call sites reference these instead of bare strings so
95/// the code and the SPEC table can never silently drift.
96pub mod codes {
97    /// path has no `DB.md`; not a db.md store.
98    pub const NOT_A_STORE: &str = "NOT_A_STORE";
99    /// the store's `DB.md` is not `type: db-md`.
100    pub const DB_MD_BAD_TYPE: &str = "DB_MD_BAD_TYPE";
101    /// the store's `DB.md` frontmatter lacks `scope` or `owner`.
102    pub const DB_MD_MISSING_FIELD: &str = "DB_MD_MISSING_FIELD";
103    /// `DB.md` has an `##` section other than the three recognized ones.
104    pub const DB_MD_UNKNOWN_SECTION: &str = "DB_MD_UNKNOWN_SECTION";
105    /// a `DB.md ## Schemas` field declaration is malformed (empty or duplicate
106    /// field name) or carries an unrecognized modifier.
107    pub const DB_MD_SCHEMA_FIELD: &str = "DB_MD_SCHEMA_FIELD";
108    /// content file has no `type:`.
109    pub const FM_MISSING_TYPE: &str = "FM_MISSING_TYPE";
110    /// content file has no `created:`.
111    pub const FM_MISSING_CREATED: &str = "FM_MISSING_CREATED";
112    /// content file has no `updated:`.
113    pub const FM_MISSING_UPDATED: &str = "FM_MISSING_UPDATED";
114    /// content file can't be read (not valid UTF-8, or an I/O error).
115    pub const FM_UNREADABLE: &str = "FM_UNREADABLE";
116    /// frontmatter block isn't valid YAML.
117    pub const FM_MALFORMED_YAML: &str = "FM_MALFORMED_YAML";
118    /// `created` or `updated` isn't ISO-8601.
119    pub const FM_BAD_TIMESTAMP: &str = "FM_BAD_TIMESTAMP";
120    /// `meta-type` is present but not one of fact / operational / conclusion.
121    pub const FM_BAD_META_TYPE: &str = "FM_BAD_META_TYPE";
122    /// content file has no `summary`.
123    pub const SUMMARY_MISSING: &str = "SUMMARY_MISSING";
124    /// `summary` present but empty.
125    pub const SUMMARY_EMPTY: &str = "SUMMARY_EMPTY";
126    /// `summary` contains newlines.
127    pub const SUMMARY_MULTILINE: &str = "SUMMARY_MULTILINE";
128    /// `summary` > 200 chars.
129    pub const SUMMARY_TOO_LONG: &str = "SUMMARY_TOO_LONG";
130    /// wiki-link target isn't a full store-relative path.
131    pub const WIKI_LINK_SHORT_FORM: &str = "WIKI_LINK_SHORT_FORM";
132    /// wiki-link target file doesn't exist.
133    pub const WIKI_LINK_BROKEN: &str = "WIKI_LINK_BROKEN";
134    /// wiki-link target matches multiple files (defensive).
135    pub const WIKI_LINK_AMBIGUOUS: &str = "WIKI_LINK_AMBIGUOUS";
136    /// wiki-link target carries a `.md` extension — drop it.
137    pub const WIKI_LINK_HAS_EXTENSION: &str = "WIKI_LINK_HAS_EXTENSION";
138    /// frontmatter list uses inline `[[[a]], [[b]]]` — use block form.
139    pub const WIKI_LINK_FLOW_FORM_LIST: &str = "WIKI_LINK_FLOW_FORM_LIST";
140    /// two files declare the same explicit `id`.
141    pub const DUP_ID: &str = "DUP_ID";
142    /// two records of a type collide on a `DB.md ## Schemas` `unique:` key.
143    pub const DUP_UNIQUE_KEY: &str = "DUP_UNIQUE_KEY";
144    /// a `DB.md` schema requires a field that's absent.
145    pub const SCHEMA_MISSING_REQUIRED: &str = "SCHEMA_MISSING_REQUIRED";
146    /// a value doesn't match the schema's shape modifier.
147    pub const SCHEMA_SHAPE_MISMATCH: &str = "SCHEMA_SHAPE_MISMATCH";
148    /// a `link to <prefix>/` field has a plain or wrong-prefix value.
149    pub const SCHEMA_LINK_PREFIX_MISMATCH: &str = "SCHEMA_LINK_PREFIX_MISMATCH";
150    /// a value isn't in the schema's `enum`.
151    pub const SCHEMA_ENUM_VIOLATION: &str = "SCHEMA_ENUM_VIOLATION";
152    /// a write was attempted on a `### Frozen pages` path (write-time).
153    pub const POLICY_FROZEN_PAGE: &str = "POLICY_FROZEN_PAGE";
154    /// a file with an `### Ignored types` type exists.
155    pub const POLICY_IGNORED_TYPE_PRESENT: &str = "POLICY_IGNORED_TYPE_PRESENT";
156    /// a `meta-type: conclusion` record derives from an ignored-type record.
157    pub const POLICY_IGNORED_TYPE_DERIVED: &str = "POLICY_IGNORED_TYPE_DERIVED";
158    /// a `log.md` entry header timestamp is unparseable.
159    pub const LOG_BAD_TIMESTAMP: &str = "LOG_BAD_TIMESTAMP";
160    /// a `log.md` entry kind isn't recognized.
161    pub const LOG_UNKNOWN_KIND: &str = "LOG_UNKNOWN_KIND";
162    /// `log.md` entries aren't in non-decreasing time order (possible rewrite).
163    pub const LOG_OUT_OF_ORDER: &str = "LOG_OUT_OF_ORDER";
164    /// a non-empty canonical folder lacks `index.md`.
165    pub const INDEX_MISSING: &str = "INDEX_MISSING";
166    /// an `index.md` lists a file that no longer exists.
167    pub const INDEX_STALE_ENTRY: &str = "INDEX_STALE_ENTRY";
168    /// a file isn't listed in its folder's `index.md`.
169    pub const INDEX_MISSING_ENTRY: &str = "INDEX_MISSING_ENTRY";
170    /// an `index.md` sits in an empty / non-canonical folder.
171    pub const INDEX_ORPHAN: &str = "INDEX_ORPHAN";
172    /// an index's `scope:` doesn't match its filesystem location.
173    pub const INDEX_WRONG_SCOPE: &str = "INDEX_WRONG_SCOPE";
174    /// an index entry's text doesn't match the target file's `summary`.
175    pub const INDEX_SUMMARY_MISMATCH: &str = "INDEX_SUMMARY_MISMATCH";
176    /// a type-folder's `index.jsonl` twin is missing.
177    pub const INDEX_JSONL_MISSING: &str = "INDEX_JSONL_MISSING";
178    /// a file isn't in the `index.jsonl`, or a jsonl record points at a missing
179    /// file.
180    pub const INDEX_JSONL_DESYNC: &str = "INDEX_JSONL_DESYNC";
181    /// a `index.jsonl` record's fields don't match the file's frontmatter.
182    pub const INDEX_JSONL_STALE: &str = "INDEX_JSONL_STALE";
183    /// `tags` isn't a flat YAML list of short scalar labels.
184    pub const TAGS_MALFORMED: &str = "TAGS_MALFORMED";
185    /// a line in `assets.jsonl` is not a valid asset record.
186    pub const ASSET_MANIFEST_MALFORMED: &str = "ASSET_MANIFEST_MALFORMED";
187    /// a content file references an `asset`/`assets` path with no record in
188    /// `assets.jsonl` (run `dbmd assets scan`).
189    pub const ASSET_UNDECLARED: &str = "ASSET_UNDECLARED";
190    /// an `assets.jsonl` record names a wrapper file that does not exist.
191    pub const ASSET_WRAPPER_BROKEN: &str = "ASSET_WRAPPER_BROKEN";
192    /// an `assets.jsonl` record's path is referenced by no wrapper.
193    pub const ASSET_MANIFEST_ORPHAN: &str = "ASSET_MANIFEST_ORPHAN";
194    /// an `asset`/`assets` path points at a tracked markdown content file.
195    pub const ASSET_PATH_IS_CONTENT: &str = "ASSET_PATH_IS_CONTENT";
196}
197
198/// The SPEC's `summary` length bound (chars). Over it → `SUMMARY_TOO_LONG`.
199const MAX_SUMMARY_LEN: usize = 200;
200
201/// Recognized `log.md` entry kinds (SPEC § `log.md`). Anything else →
202/// `LOG_UNKNOWN_KIND` (warning, not error).
203const RECOGNIZED_LOG_KINDS: &[&str] = &[
204    "ingest",
205    "create",
206    "update",
207    "delete",
208    "rename",
209    "link",
210    "validate",
211    "index-rebuild",
212    "contradiction",
213];
214
215// ─────────────────────────────────────────────────────────────────────────────
216//  Public entrypoints
217// ─────────────────────────────────────────────────────────────────────────────
218
219/// **Loop default.** Validate the working set: content files changed since
220/// `since` (default: the last `validate` entry in `log.md`), plus any file whose
221/// wiki-links target a changed/renamed/removed path. Per-file *checks* only —
222/// none of the cross-file global passes (entity-dedup, every-index sync,
223/// `log.md` ordering) that `--all` adds. If the default call finds no logged
224/// changed objects, it falls back to a per-file content sweep so an externally
225/// edited or freshly copied store cannot pass vacuously.
226///
227/// **Cost.** The changed set is read from `log.md` — O(changed): every
228/// `create`/`update`/`ingest`/`rename`/`delete`/`link` entry newer than the
229/// cutoff names an object. Per-file frontmatter + link-doctrine checks then run
230/// over that set plus its incoming linkers — also O(changed). The one part that
231/// is *not* O(changed) is discovering those incoming linkers: a link to a
232/// changed path can live in the body or a typed frontmatter field of any file,
233/// so it is found by a **single** embedded-ripgrep pass over the store
234/// ([`Store::find_links_to_any`]) for the whole changed set at once — one store
235/// scan, flat in the changed-set size. (It was previously a full store read
236/// *per* changed object — `O(changed × store)`; that is the blow-up this path
237/// no longer pays.) The unavoidable single content scan is the same shape as
238/// free-text `dbmd search`; the sidecar `links` projection can't replace it
239/// because it omits body/typed-field edges.
240pub fn validate_working_set(
241    store: &Store,
242    since: Option<DateTime<FixedOffset>>,
243) -> crate::Result<Vec<Issue>> {
244    if !store_marker_present(store) {
245        return Ok(vec![not_a_store_issue(store)]);
246    }
247
248    let cutoff = match since {
249        Some(ts) => Some(ts),
250        None => last_validate_at(store),
251    };
252
253    // 1. Changed objects, straight from the log (O(changed) — never a walk).
254    let changed = changed_objects_since(store, cutoff);
255    if changed.is_empty() && since.is_none() {
256        return validate_content_sweep(store);
257    }
258
259    // 2. Add every file with an incoming wiki-link to a changed/renamed/removed
260    //    path (the linker may now be stale even though it didn't change). The
261    //    incoming-linker scan is `Store::find_links_to_any` — ONE embedded-ripgrep
262    //    pass over the store for the WHOLE changed set (one `.md` walk, one
263    //    presence-only/early-exit scan per file), not one walk per object. This
264    //    is the fix for the `O(changed × store)` blow-up that calling
265    //    `find_links_to` in a loop produced (a full store read per changed
266    //    object); the cost is now a single store scan regardless of how many
267    //    objects changed. A returned self-link is harmlessly deduped by the set
268    //    (the object is already inserted below).
269    let changed_targets: Vec<PathBuf> = changed.iter().cloned().collect();
270    let mut working: BTreeSet<PathBuf> = changed;
271    for linker in store.find_links_to_any(&changed_targets)? {
272        working.insert(linker);
273    }
274
275    let mut issues = Vec::new();
276    for rel in &working {
277        let abs = store.root.join(rel);
278        // A changed path can be a *deletion* — skip files that no longer exist;
279        // the incoming-linker scan above already flagged links into them.
280        if !abs.is_file() {
281            continue;
282        }
283        // `None` basename index: the working-set pass does not build the
284        // store-wide basename map (that is a `--all`-only structure), so a bare
285        // short-form target is reported as plain `WIKI_LINK_SHORT_FORM` and the
286        // `--all` sweep does the ambiguity upgrade.
287        check_content_file(store, rel, &abs, None, &mut issues);
288    }
289    issues.sort_by(issue_order);
290    Ok(issues)
291}
292
293fn validate_content_sweep(store: &Store) -> crate::Result<Vec<Issue>> {
294    let mut issues = Vec::new();
295    for rel in store.walk()? {
296        let abs = store.root.join(&rel);
297        check_content_file(store, &rel, &abs, None, &mut issues);
298    }
299    issues.sort_by(issue_order);
300    Ok(issues)
301}
302
303/// **Full SWEEP (O(store)).** Validate every file, every link, and every index,
304/// adding the cross-file checks that need global state: entity-dedup `DUP_*`,
305/// every-index sync (md + jsonl), and `log.md` ordering. CI / recovery, not the
306/// loop.
307pub fn validate_all(store: &Store) -> crate::Result<Vec<Issue>> {
308    if !store_marker_present(store) {
309        return Ok(vec![not_a_store_issue(store)]);
310    }
311
312    let mut issues = Vec::new();
313
314    // Store-identity file: `DB.md` shape (type / required fields / section
315    // headers). A single root file, checked once in the sweep — not a content
316    // file (it carries no `summary`), so it is not part of `walk_content_files`.
317    check_db_md(store, &mut issues);
318
319    let files = walk_content_files(&store.root);
320
321    // The basename index makes the short-form wiki-link check able to upgrade a
322    // bare-basename target to `WIKI_LINK_AMBIGUOUS` when it matches ≥2 files.
323    // Built once from the already-gathered sweep list (no extra walk); only the
324    // `--all` path has it (the working-set path stays O(changed)).
325    let basenames = build_basename_index(&files);
326
327    // Per-file checks over the whole store.
328    let mut parsed: Vec<(PathBuf, Parsed)> = Vec::new();
329    for rel in &files {
330        let abs = store.root.join(rel);
331        if let Some(p) = check_content_file(store, rel, &abs, Some(&basenames), &mut issues) {
332            parsed.push((rel.clone(), p));
333        }
334    }
335
336    // Cross-file: hard `id` + soft schema-declared `unique:` dedup collisions.
337    check_duplicates(store, &parsed, &mut issues);
338
339    // Cross-file: hierarchical index.md + index.jsonl sync.
340    check_indexes(store, &files, &mut issues);
341
342    // Cross-file: log.md well-formedness + ordering.
343    check_log(store, &mut issues);
344
345    // Cross-file: asset manifest (assets.jsonl) integrity against wrapper
346    // declarations. Text-only, no hashing, no byte reads — a SWEEP check like
347    // dedup. Byte presence/correctness is `dbmd assets verify`, not validate, so
348    // a fresh clone with no restored bytes still passes here.
349    check_assets(store, &parsed, &mut issues);
350
351    issues.sort_by(issue_order);
352    Ok(issues)
353}
354
355// ─────────────────────────────────────────────────────────────────────────────
356//  Per-file content checks (shared by both scopes)
357// ─────────────────────────────────────────────────────────────────────────────
358
359/// What `validate_all`'s cross-file pass needs from a per-file parse: the
360/// parsed YAML mapping (for dedup keys) and the raw frontmatter text (for
361/// text-based wiki-link extraction). The body and fence-line are consumed
362/// inline during the per-file pass and not carried here.
363struct Parsed {
364    /// The parsed top-level YAML mapping, keyed by string. `None` ⇒ malformed
365    /// YAML (a `FM_MALFORMED_YAML` was already emitted).
366    fm: Option<BTreeMap<String, Value>>,
367    /// The raw frontmatter YAML text (between the fences) — the source for
368    /// text-based wiki-link extraction in dedup.
369    fm_yaml: String,
370}
371
372/// Run every per-file check on one content file, pushing issues. Returns the
373/// parsed file so `validate_all` can reuse it for cross-file checks. Returns
374/// `None` only when the file is unreadable or has no frontmatter block at all
375/// (which for a content file is itself reported).
376fn check_content_file(
377    store: &Store,
378    rel: &Path,
379    abs: &Path,
380    basenames: Option<&BasenameIndex>,
381    issues: &mut Vec<Issue>,
382) -> Option<Parsed> {
383    let text = match std::fs::read_to_string(abs) {
384        Ok(t) => t,
385        Err(e) => {
386            // The file exists in the walk but can't be read as UTF-8 text
387            // (invalid bytes) or hit an I/O error. Returning `None` silently
388            // here let a store whose only content file was binary garbage pass
389            // `dbmd validate` with exit 0 — the exact vacuous-pass the fallback
390            // sweep exists to prevent. Report it so the agent gets an actionable
391            // diagnostic naming the unreadable file (and `index rebuild`, which
392            // hard-fails on the same file, isn't the only signal).
393            let detail = if e.kind() == std::io::ErrorKind::InvalidData {
394                "file is not valid UTF-8 text".to_string()
395            } else {
396                format!("file could not be read: {e}")
397            };
398            push(
399                issues,
400                Severity::Error,
401                codes::FM_UNREADABLE,
402                rel,
403                None,
404                None,
405                format!("content file is unreadable: {detail}"),
406                Some(
407                    "save the file as UTF-8 text, or remove it if it isn't a db.md content file"
408                        .into(),
409                ),
410                vec![],
411            );
412            return None;
413        }
414    };
415
416    let is_content = is_content_file(rel);
417
418    let (fm_yaml, body, fm_end_line) = match split_frontmatter(&text) {
419        Some(split) => split,
420        None => {
421            // No frontmatter at all. For a content file that means there's no
422            // `type:` and no `summary:` — report both the way a parsed-but-empty
423            // file would, so the agent gets the same actionable codes.
424            if is_content {
425                push(
426                    issues,
427                    Severity::Error,
428                    codes::FM_MISSING_TYPE,
429                    rel,
430                    None,
431                    Some("type".into()),
432                    "content file has no frontmatter `type:`".into(),
433                    Some("add a YAML frontmatter block with `type:`".into()),
434                    vec![],
435                );
436                push(
437                    issues,
438                    Severity::Error,
439                    codes::SUMMARY_MISSING,
440                    rel,
441                    None,
442                    Some("summary".into()),
443                    "content file has no `summary`".into(),
444                    Some("run `dbmd fm init`".into()),
445                    vec![],
446                );
447            }
448            return None;
449        }
450    };
451
452    // Parse the YAML block.
453    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
454        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
455        // An empty frontmatter block parses as Null; treat as an empty mapping.
456        Ok(Value::Null) => Some(BTreeMap::new()),
457        Ok(_) => {
458            // A scalar / sequence at the top level isn't a frontmatter mapping.
459            // Anchor to line 1 — the frontmatter block's opening `---`; the whole
460            // block is opaque, so there is no single offending field line.
461            push(
462                issues,
463                Severity::Error,
464                codes::FM_MALFORMED_YAML,
465                rel,
466                Some(1),
467                None,
468                "frontmatter is not a YAML mapping".into(),
469                Some("repair the frontmatter YAML mapping, then rerun `dbmd validate`".into()),
470                vec![],
471            );
472            None
473        }
474        Err(e) => {
475            // Anchor to line 1 (the opening `---`): an unparseable block has no
476            // single offending field line; the agent re-reads the whole block.
477            push(
478                issues,
479                Severity::Error,
480                codes::FM_MALFORMED_YAML,
481                rel,
482                Some(1),
483                None,
484                format!("frontmatter block isn't valid YAML: {e}"),
485                Some("repair the frontmatter YAML block, then rerun `dbmd validate`".into()),
486                vec![],
487            );
488            None
489        }
490    };
491
492    if let Some(map) = &fm {
493        // The detailed frontmatter checks only run when the YAML parsed.
494        check_frontmatter(store, rel, map, &fm_yaml, basenames, issues, is_content);
495    }
496
497    // Wiki-link doctrine checks run on the body of content files. They are NOT
498    // run on:
499    //   - the root append-only meta files `log.md`/`DB.md` — they reach this
500    //     function only via the working-set incoming-linker scan (`walk_all_md`
501    //     includes them), and `validate --all` never link-checks their bodies. A
502    //     historical `[[deleted-page]]` mention in a `log.md` note, or a `[[…]]`
503    //     in DB.md's `## Agent instructions`, must not be `WIKI_LINK_BROKEN`; the
504    //     log is append-only, so "fix the link" can't even be applied.
505    //   - the derived catalogs `index.md`/`index.jsonl` — their "links" are
506    //     GENERATED catalog entries, not authored body wiki-links. A folder's
507    //     `index.md` is pulled into the working set as an incoming linker (an
508    //     entry `[[records/contacts/a]]` IS a wiki-link to a member, so touching
509    //     or deleting any member drags its folder `index.md` in). Its integrity
510    //     is the job of `check_indexes` under `--all`, which reports a dangling
511    //     entry as `INDEX_STALE_ENTRY` ("run `dbmd index rebuild`"). Body-link-
512    //     checking it here instead emitted `WIKI_LINK_BROKEN` ("create the
513    //     target") for the SAME condition — a different code with the OPPOSITE
514    //     remedy across the loop default vs the sweep, steering an agent to
515    //     recreate deleted data. `walk_content_files` skips `index.md` under
516    //     `--all` for exactly this reason; the working-set scope must match.
517    // Without these guards the two scopes disagree on the same store.
518    if !is_root_meta_file(rel) && !is_index_catalog_file(rel) {
519        check_body_wiki_links(store, rel, &body, fm_end_line, basenames, issues);
520    }
521
522    Some(Parsed { fm, fm_yaml })
523}
524
525/// All frontmatter-level checks for a content file with valid YAML.
526fn check_frontmatter(
527    store: &Store,
528    rel: &Path,
529    fm: &BTreeMap<String, Value>,
530    fm_yaml: &str,
531    basenames: Option<&BasenameIndex>,
532    issues: &mut Vec<Issue>,
533    is_content: bool,
534) {
535    let type_ = fm.get("type").and_then(scalar_string);
536
537    // ── type ────────────────────────────────────────────────────────────────
538    if is_content && type_.is_none() {
539        push(
540            issues,
541            Severity::Error,
542            codes::FM_MISSING_TYPE,
543            rel,
544            fm_key_line_or_top(fm_yaml, "type"),
545            Some("type".into()),
546            "content file has no `type:`".into(),
547            Some("add a `type:` field (e.g. `type: contact`)".into()),
548            vec![],
549        );
550    }
551
552    // ── meta-type (records-only epistemic class; closed enum) ─────────────────
553    // Present-but-out-of-enum is an error; absent is fine (effective default
554    // `fact`). Sources don't normally carry one, but validating the value when
555    // present is layer-agnostic and harmless.
556    if is_content {
557        // Branch on the raw value, NOT `and_then(scalar_string)`. Pre-filtering
558        // through `scalar_string` made a list/mapping value (which returns `None`)
559        // short-circuit the whole check, so a structurally-wrong `meta-type`
560        // slipped through clean AND was silently reclassified as the default
561        // `fact` by the rest of the toolkit. Absent or explicit-`null` is fine
562        // (effective default `fact`); a present non-null value must be a scalar in
563        // the closed enum. This mirrors the sibling timestamp check below, which
564        // was already hardened against the same non-scalar escape.
565        if let Some(v) = fm.get("meta-type").filter(|v| !v.is_null()) {
566            match scalar_string(v) {
567                Some(mt) if matches!(mt.as_str(), "fact" | "operational" | "conclusion") => {}
568                Some(mt) => push(
569                    issues,
570                    Severity::Error,
571                    codes::FM_BAD_META_TYPE,
572                    rel,
573                    fm_key_line_or_top(fm_yaml, "meta-type"),
574                    Some("meta-type".into()),
575                    format!("`meta-type: {mt}` is not one of fact / operational / conclusion"),
576                    Some(
577                        "use one of: fact, operational, conclusion (or omit for the default `fact`)"
578                            .into(),
579                    ),
580                    vec![],
581                ),
582                None => push(
583                    issues,
584                    Severity::Error,
585                    codes::FM_BAD_META_TYPE,
586                    rel,
587                    fm_key_line_or_top(fm_yaml, "meta-type"),
588                    Some("meta-type".into()),
589                    "`meta-type` is not one of fact / operational / conclusion: expected a scalar \
590                     string, found a list or mapping"
591                        .to_string(),
592                    Some(
593                        "use one of: fact, operational, conclusion (or omit for the default `fact`)"
594                            .into(),
595                    ),
596                    vec![],
597                ),
598            }
599        }
600    }
601
602    // ── summary (universal on content files) ──────────────────────────────────
603    if is_content {
604        check_summary(rel, fm, fm_yaml, issues);
605    }
606
607    // ── timestamps: created / updated ─────────────────────────────────────────
608    // The `created`/`updated` contract is content-file-only; meta files
609    // (`DB.md`, `log.md`, index twins) legitimately carry no such timestamps.
610    if is_content {
611        for (key, missing_code) in [
612            ("created", codes::FM_MISSING_CREATED),
613            ("updated", codes::FM_MISSING_UPDATED),
614        ] {
615            // A key that is absent, or present-but-`null`, has *no* timestamp →
616            // `FM_MISSING_*`. The toolkit's parser also treats a null value as
617            // "no timestamp", so a null `created:` must read as missing, not
618            // silently pass.
619            let value = fm.get(key);
620            let missing = value.is_none() || value.is_some_and(Value::is_null);
621            if missing {
622                push(
623                    issues,
624                    Severity::Error,
625                    missing_code,
626                    rel,
627                    fm_key_line_or_top(fm_yaml, key),
628                    Some(key.into()),
629                    format!("content file has no `{key}:` timestamp"),
630                    Some(format!(
631                        "set `{key}` to an RFC3339 timestamp, e.g. 2026-05-27T08:00:00-07:00"
632                    )),
633                    vec![],
634                );
635            } else if let Some(v) = value {
636                // Present and non-null. A scalar is checked for ISO-8601; a
637                // sequence/mapping is not a timestamp string at all and so
638                // cannot be ISO-8601 → `FM_BAD_TIMESTAMP` (it must not slip
639                // through the way it did when `scalar_string` returned `None`
640                // and the branch silently no-oped).
641                match scalar_string(v) {
642                    Some(s) if is_iso8601(&s) => {}
643                    Some(s) => push(
644                        issues,
645                        Severity::Error,
646                        codes::FM_BAD_TIMESTAMP,
647                        rel,
648                        fm_key_line(fm_yaml, key),
649                        Some(key.into()),
650                        format!("`{key}` is not ISO-8601: {s:?}"),
651                        Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
652                        vec![],
653                    ),
654                    None => push(
655                        issues,
656                        Severity::Error,
657                        codes::FM_BAD_TIMESTAMP,
658                        rel,
659                        fm_key_line(fm_yaml, key),
660                        Some(key.into()),
661                        format!(
662                            "`{key}` is not ISO-8601: expected a timestamp string, found a list or mapping"
663                        ),
664                        Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
665                        vec![],
666                    ),
667                }
668            }
669        }
670    }
671    // ── tags shape ────────────────────────────────────────────────────────────
672    if let Some(tags) = fm.get("tags") {
673        if !is_flat_scalar_list(tags) {
674            push(
675                issues,
676                Severity::Warning,
677                codes::TAGS_MALFORMED,
678                rel,
679                fm_key_line(fm_yaml, "tags"),
680                Some("tags".into()),
681                "`tags` must be a flat YAML list of short scalar labels".into(),
682                Some("use block form: one `- <tag>` per line".into()),
683                vec![],
684            );
685        }
686    }
687
688    // ── inline flow-form wiki-link lists in frontmatter ──────────────────────
689    for key in detect_flow_form_link_lists(fm_yaml) {
690        push(
691            issues,
692            Severity::Error,
693            codes::WIKI_LINK_FLOW_FORM_LIST,
694            rel,
695            fm_key_line(fm_yaml, &key),
696            Some(key.clone()),
697            format!("`{key}` uses inline flow form `[[[a]], [[b]]]`"),
698            Some("use YAML block-sequence form: one `- [[...]]` per line".into()),
699            vec![],
700        );
701    }
702
703    // ── frontmatter wiki-link fields: doctrine + integrity ───────────────────
704    // Skip keys that have an explicit `link to` schema spec — those are checked
705    // (with prefix enforcement) in `check_schema`, and double-reporting the same
706    // link via two paths would be noise.
707    let schema_link_keys: BTreeSet<String> =
708        effective_schema(store, type_.as_deref().unwrap_or(""))
709            .map(|s| {
710                s.fields
711                    .iter()
712                    .filter(|f| f.link_prefix.is_some())
713                    .map(|f| f.name.clone())
714                    .collect()
715            })
716            .unwrap_or_default();
717    for (key, link) in frontmatter_link_fields_text(fm_yaml, 2) {
718        if schema_link_keys.contains(&key) {
719            continue;
720        }
721        check_wiki_link(
722            store,
723            rel,
724            &link,
725            Some(link.line),
726            Some(&key),
727            basenames,
728            issues,
729        );
730    }
731
732    // ── policies: ignored types ──────────────────────────────────────────────
733    if let Some(t) = &type_ {
734        if store.config.ignored_types.iter().any(|it| it == t) {
735            push(
736                issues,
737                Severity::Info,
738                codes::POLICY_IGNORED_TYPE_PRESENT,
739                rel,
740                fm_key_line(fm_yaml, "type"),
741                Some("type".into()),
742                format!("file has ignored type `{t}` (per DB.md ## Policies)"),
743                Some(
744                    "change the `type`, or remove it from DB.md `### Ignored types` if it should be managed"
745                        .into(),
746                ),
747                // The policy source: `DB.md` declares the ignored type.
748                vec![PathBuf::from("DB.md")],
749            );
750        }
751        // A conclusion record (`meta-type: conclusion`) deriving from an
752        // ignored-type record → warning. The decision lives in the shared
753        // `derived_from_ignored_type` entry point; this side only supplies the
754        // `derived_from` targets (with their line, which the issue carries) and
755        // renders the finding.
756        let meta_type = fm
757            .get("meta-type")
758            .and_then(scalar_string)
759            .unwrap_or_else(|| "fact".to_string());
760        for link in frontmatter_links_for_key(fm_yaml, "derived_from", 2) {
761            if let Some(hit) =
762                derived_from_ignored_type(store, &meta_type, std::iter::once(link.target.as_str()))
763            {
764                push(
765                    issues,
766                    Severity::Warning,
767                    codes::POLICY_IGNORED_TYPE_DERIVED,
768                    rel,
769                    Some(link.line),
770                    Some("derived_from".into()),
771                    format!(
772                        "conclusion record derives from ignored-type record `{}` (type `{}`)",
773                        hit.target, hit.target_type
774                    ),
775                    Some(
776                        "drop this `derived_from` link, or remove the target type from DB.md `### Ignored types`"
777                            .into(),
778                    ),
779                    // The ignored-type source record, plus `DB.md` (the policy
780                    // source that lists the ignored type).
781                    vec![
782                        PathBuf::from(format!("{}.md", hit.target)),
783                        PathBuf::from("DB.md"),
784                    ],
785                );
786            }
787        }
788    }
789
790    // ── schema enforcement: DB.md ## Schemas (the only schema source) ─────────
791    if let Some(t) = &type_ {
792        if let Some(schema) = effective_schema(store, t) {
793            check_schema(store, rel, fm, fm_yaml, &schema, issues);
794        }
795    }
796}
797
798/// `summary` rules: required, non-empty, single-line, ≤ 200 chars.
799fn check_summary(rel: &Path, fm: &BTreeMap<String, Value>, fm_yaml: &str, issues: &mut Vec<Issue>) {
800    let line = fm_key_line(fm_yaml, "summary");
801    match fm.get("summary") {
802        None => push(
803            issues,
804            Severity::Error,
805            codes::SUMMARY_MISSING,
806            rel,
807            // A missing `summary` key has no line of its own → anchor to the
808            // frontmatter block top (line 1), the EXPECTED field-absence rule.
809            fm_key_line_or_top(fm_yaml, "summary"),
810            Some("summary".into()),
811            "content file has no `summary`".into(),
812            Some("run `dbmd fm init`".into()),
813            vec![],
814        ),
815        Some(v) => {
816            let s = scalar_string(v).unwrap_or_default();
817            if s.trim().is_empty() {
818                push(
819                    issues,
820                    Severity::Error,
821                    codes::SUMMARY_EMPTY,
822                    rel,
823                    line,
824                    Some("summary".into()),
825                    "`summary` is present but empty".into(),
826                    Some("write a one-line summary, or run `dbmd fm init`".into()),
827                    vec![],
828                );
829            } else if s.contains('\n') {
830                push(
831                    issues,
832                    Severity::Error,
833                    codes::SUMMARY_MULTILINE,
834                    rel,
835                    line,
836                    Some("summary".into()),
837                    "`summary` must be one line (contains a newline)".into(),
838                    Some("collapse the summary to a single line".into()),
839                    vec![],
840                );
841            } else if s.chars().count() > MAX_SUMMARY_LEN {
842                push(
843                    issues,
844                    Severity::Warning,
845                    codes::SUMMARY_TOO_LONG,
846                    rel,
847                    line,
848                    Some("summary".into()),
849                    format!(
850                        "`summary` is {} chars (> {MAX_SUMMARY_LEN})",
851                        s.chars().count()
852                    ),
853                    Some(format!("trim the summary to ≤ {MAX_SUMMARY_LEN} chars")),
854                    vec![],
855                );
856            }
857        }
858    }
859}
860
861/// Wiki-link checks for a body. Per-link doctrine (`WIKI_LINK_*`).
862fn check_body_wiki_links(
863    store: &Store,
864    rel: &Path,
865    body: &str,
866    fm_end_line: u32,
867    basenames: Option<&BasenameIndex>,
868    issues: &mut Vec<Issue>,
869) {
870    for link in extract_wiki_links(body) {
871        // Body lines are offset past the frontmatter block. `link.line` is
872        // 1-based within `body`; the body starts at `fm_end_line + 1`.
873        let abs_line = fm_end_line + link.line;
874        check_wiki_link(store, rel, &link, Some(abs_line), None, basenames, issues);
875    }
876}
877
878/// A store-wide map from a file's bare basename (its stem, no `.md`) to every
879/// store-relative path carrying that basename. Built once per `validate --all`
880/// sweep so the short-form wiki-link check can distinguish a merely short-form
881/// target (`WIKI_LINK_SHORT_FORM`) from one that is *ambiguous* because the bare
882/// basename matches two or more files (`WIKI_LINK_AMBIGUOUS`, the defensive
883/// code). `None` in the working-set path — that loop is O(changed) and never
884/// walks the store, so it reports the plain short-form error without the scan.
885type BasenameIndex = HashMap<String, Vec<PathBuf>>;
886
887/// Build the [`BasenameIndex`] from the swept file list (already gathered by
888/// `validate_all`; no extra walk).
889fn build_basename_index(files: &[PathBuf]) -> BasenameIndex {
890    let mut idx: BasenameIndex = HashMap::new();
891    for rel in files {
892        if let Some(stem) = rel.file_stem().and_then(|s| s.to_str()) {
893            idx.entry(stem.to_string()).or_default().push(rel.clone());
894        }
895    }
896    idx
897}
898
899/// The shared per-wiki-link doctrine + integrity check used by both body links
900/// and frontmatter link-fields. `basenames` is `Some` only in the `--all`
901/// sweep, where a no-slash short-form target is upgraded to `WIKI_LINK_AMBIGUOUS`
902/// when its bare basename matches ≥2 files.
903fn check_wiki_link(
904    store: &Store,
905    rel: &Path,
906    link: &Link,
907    line: Option<u32>,
908    key: Option<&str>,
909    basenames: Option<&BasenameIndex>,
910    issues: &mut Vec<Issue>,
911) {
912    let bare = link.target.trim_end_matches(".md");
913
914    // Short-form: not a full store-relative path (no `/`, or first segment isn't
915    // a known layer).
916    if !is_full_store_path(bare) {
917        // Ambiguous (defensive) takes precedence over plain short-form when the
918        // target is a bare basename (no `/`) that matches ≥2 files in the store.
919        // Only computable in the sweep (where `basenames` is populated); the
920        // working-set path falls through to the plain short-form error.
921        if !bare.contains('/') {
922            if let Some(idx) = basenames {
923                if let Some(matches) = idx.get(bare) {
924                    if matches.len() >= 2 {
925                        let mut related = matches.clone();
926                        related.sort();
927                        push(
928                            issues,
929                            Severity::Error,
930                            codes::WIKI_LINK_AMBIGUOUS,
931                            rel,
932                            line,
933                            key.map(str::to_string),
934                            format!(
935                                "short-form wiki-link `[[{}]]` matches multiple files",
936                                link.target
937                            ),
938                            Some("use the full store-relative path to disambiguate".into()),
939                            related,
940                        );
941                        return;
942                    }
943                }
944            }
945        }
946        push(
947            issues,
948            Severity::Error,
949            codes::WIKI_LINK_SHORT_FORM,
950            rel,
951            line,
952            key.map(str::to_string),
953            format!(
954                "wiki-link `[[{}]]` is not a full store-relative path",
955                link.target
956            ),
957            short_form_suggestion(bare),
958            vec![],
959        );
960        // Don't also report broken; the agent must fix the form first.
961        return;
962    }
963
964    // `.md` extension → warning, then still check existence.
965    if link.target.ends_with(".md") {
966        push(
967            issues,
968            Severity::Warning,
969            codes::WIKI_LINK_HAS_EXTENSION,
970            rel,
971            line,
972            key.map(str::to_string),
973            format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
974            Some(format!("drop the extension: [[{bare}]]")),
975            vec![],
976        );
977    }
978
979    // Broken: target file doesn't exist (O(1) stat). Resolve the target the
980    // same way the graph engine does — the literal path first (so a link to a
981    // raw `.eml`/`.pdf` source kept verbatim under `sources/` resolves), then
982    // the `.md`-appended path.
983    match resolve_wiki_target(store, bare) {
984        TargetResolution::Exists => {}
985        TargetResolution::Missing => push(
986            issues,
987            Severity::Error,
988            codes::WIKI_LINK_BROKEN,
989            rel,
990            line,
991            key.map(str::to_string),
992            format!("wiki-link target `{bare}` doesn't exist"),
993            Some(format!(
994                "create `{bare}.md`, or point the link at an existing file"
995            )),
996            vec![],
997        ),
998        TargetResolution::Unsafe => push(
999            issues,
1000            Severity::Error,
1001            codes::WIKI_LINK_BROKEN,
1002            rel,
1003            line,
1004            key.map(str::to_string),
1005            format!("wiki-link target `{bare}` is not a safe store-relative path"),
1006            Some("use a full store-relative path under sources/ or records/".into()),
1007            vec![],
1008        ),
1009    }
1010}
1011
1012// ─────────────────────────────────────────────────────────────────────────────
1013//  Schema enforcement (user-declared DB.md ## Schemas — the only source)
1014// ─────────────────────────────────────────────────────────────────────────────
1015
1016/// The effective schema for a type: the store's explicit `DB.md ## Schemas`
1017/// block, or `None`. This is the **only** source of schema enforcement — the
1018/// toolkit ships no implicit or built-in per-type schema (SPEC § Schemas). A
1019/// store that wants its `contact` / `expense` / etc. fields enforced declares
1020/// them in `## Schemas`; the example schema pack in SPEC § Example types is a
1021/// copy-in starting point.
1022fn effective_schema(store: &Store, type_: &str) -> Option<Schema> {
1023    store.config.schemas.get(type_).cloned()
1024}
1025
1026/// Validate a file's frontmatter against a schema's [`FieldSpec`]s.
1027fn check_schema(
1028    store: &Store,
1029    rel: &Path,
1030    fm: &BTreeMap<String, Value>,
1031    fm_yaml: &str,
1032    schema: &Schema,
1033    issues: &mut Vec<Issue>,
1034) {
1035    for spec in &schema.fields {
1036        let present = fm.get(&spec.name);
1037        let line = fm_key_line(fm_yaml, &spec.name);
1038
1039        // Required. "Empty" means: the key is absent, or its value carries no
1040        // content — a YAML `null` (`name:`), an empty list (`name: []`), an
1041        // empty mapping (`name: {}`), or a blank/whitespace-only scalar
1042        // (`name: ""`). `scalar_string` returns `None` for null/list/mapping, so
1043        // a bare `.unwrap_or(false)` wrongly treated those as non-empty and let
1044        // a required field with a null or empty-collection value pass silently;
1045        // route them through `is_empty_value` instead.
1046        let is_empty = match present {
1047            None => true,
1048            Some(v) => is_empty_value(v),
1049        };
1050        if spec.required && is_empty {
1051            push(
1052                issues,
1053                Severity::Error,
1054                codes::SCHEMA_MISSING_REQUIRED,
1055                rel,
1056                // Absent key → anchor to the frontmatter top (line 1); a
1057                // present-but-empty value keeps its own line.
1058                fm_key_line_or_top(fm_yaml, &spec.name),
1059                Some(spec.name.clone()),
1060                format!("required field `{}` is absent or empty", spec.name),
1061                Some(format!("set `{}` to a non-empty value", spec.name)),
1062                vec![],
1063            );
1064            continue;
1065        }
1066        let Some(value) = present else { continue };
1067
1068        // An OPTIONAL field that is `null` or empty is simply unset — there is
1069        // no value to shape/enum/link-check. (The required+empty case already
1070        // returned above as `SCHEMA_MISSING_REQUIRED`.) Without this, an
1071        // `paid_at: null` on an `invoice` whose schema marks `paid_at (date)`
1072        // would wrongly fire `SCHEMA_SHAPE_MISMATCH` against the empty string.
1073        let value_empty = value.is_null()
1074            || scalar_string(value)
1075                .map(|s| s.trim().is_empty())
1076                .unwrap_or(false);
1077        if !spec.required && value_empty {
1078            continue;
1079        }
1080
1081        // link to <prefix>/ — extract the link target(s) from the raw frontmatter
1082        // text (unquoted `[[...]]` is a YAML nested-sequence, not a string).
1083        if let Some(prefix) = &spec.link_prefix {
1084            check_schema_link(store, rel, &spec.name, fm_yaml, prefix, line, issues);
1085            continue; // a link field is never also shape/enum-checked
1086        }
1087
1088        // A shape- or enum-constrained field expects a SCALAR. A YAML sequence
1089        // or mapping satisfies neither, and would otherwise slip through both
1090        // checks (`scalar_string` returns `None` for non-scalars, so the enum
1091        // and shape bodies silently no-op). Flag it as a shape mismatch rather
1092        // than let a structurally-wrong value validate clean. (Link fields,
1093        // which legitimately take block-form sequences, already `continue`d.)
1094        if (spec.shape.is_some() || spec.enum_values.is_some()) && scalar_string(value).is_none() {
1095            push(
1096                issues,
1097                Severity::Error,
1098                codes::SCHEMA_SHAPE_MISMATCH,
1099                rel,
1100                line,
1101                Some(spec.name.clone()),
1102                format!(
1103                    "`{}` must be a scalar value, found a list or mapping",
1104                    spec.name
1105                ),
1106                Some(format!("set `{}` to a single scalar value", spec.name)),
1107                vec![],
1108            );
1109            continue;
1110        }
1111
1112        // enum
1113        if let Some(allowed) = &spec.enum_values {
1114            if let Some(s) = scalar_string(value) {
1115                if !allowed.iter().any(|a| a == &s) {
1116                    push(
1117                        issues,
1118                        Severity::Error,
1119                        codes::SCHEMA_ENUM_VIOLATION,
1120                        rel,
1121                        line,
1122                        Some(spec.name.clone()),
1123                        format!("`{}` value {s:?} not in enum {allowed:?}", spec.name),
1124                        Some(format!("use one of: {}", allowed.join(", "))),
1125                        vec![],
1126                    );
1127                }
1128            }
1129            continue;
1130        }
1131
1132        // shape
1133        if let Some(shape) = spec.shape {
1134            check_schema_shape(rel, &spec.name, value, shape, line, issues);
1135        }
1136    }
1137}
1138
1139/// `link to <prefix>/` enforcement: the value must be a wiki-link whose target
1140/// starts with `<prefix>`. Reads the link target(s) from the raw frontmatter
1141/// text so unquoted `field: [[...]]` (a YAML nested-sequence, not a string) is
1142/// recognized exactly like the quoted form.
1143fn check_schema_link(
1144    store: &Store,
1145    rel: &Path,
1146    field: &str,
1147    fm_yaml: &str,
1148    prefix: &Path,
1149    line: Option<u32>,
1150    issues: &mut Vec<Issue>,
1151) {
1152    let prefix_str = prefix.to_string_lossy();
1153    let prefix_str = prefix_str.trim_end_matches('/');
1154    let suggestion = |target_leaf: &str| {
1155        Some(format!(
1156            "expected `link to {prefix_str}/`; replace with [[{prefix_str}/{target_leaf}]]"
1157        ))
1158    };
1159
1160    let links = frontmatter_links_for_key(fm_yaml, field, 2);
1161    if links.is_empty() {
1162        // No wiki-link in the field's value → it's a plain string.
1163        let raw = frontmatter_raw_value_for_key(fm_yaml, field, 2).unwrap_or_default();
1164        let raw = raw.trim().trim_matches('"').trim_matches('\'').trim();
1165        let leaf = slugish(raw);
1166        push(
1167            issues,
1168            Severity::Error,
1169            codes::SCHEMA_LINK_PREFIX_MISMATCH,
1170            rel,
1171            line,
1172            Some(field.to_string()),
1173            format!(
1174                "`{field}` is a plain string {raw:?}, expected a wiki-link under `{prefix_str}/`"
1175            ),
1176            suggestion(&leaf),
1177            vec![],
1178        );
1179        return;
1180    }
1181
1182    for link in links {
1183        if link.target.ends_with(".md") {
1184            let bare = link.target.trim_end_matches(".md");
1185            push(
1186                issues,
1187                Severity::Warning,
1188                codes::WIKI_LINK_HAS_EXTENSION,
1189                rel,
1190                Some(link.line),
1191                Some(field.to_string()),
1192                format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
1193                Some(format!("drop the extension: [[{bare}]]")),
1194                vec![],
1195            );
1196        }
1197        let bare = link.target.trim_end_matches(".md");
1198        if !path_under_prefix(bare, prefix_str) {
1199            let leaf = bare.rsplit('/').next().unwrap_or(bare);
1200            push(
1201                issues,
1202                Severity::Error,
1203                codes::SCHEMA_LINK_PREFIX_MISMATCH,
1204                rel,
1205                line,
1206                Some(field.to_string()),
1207                format!("`{field}` target `{bare}` is not under `{prefix_str}/`"),
1208                suggestion(leaf),
1209                vec![],
1210            );
1211        } else {
1212            // Correct prefix — still surface a broken target so the agent sees
1213            // one consistent vocabulary. Resolve like the graph engine (literal
1214            // path first, then `.md`) so a `link to sources/` field pointing at a
1215            // raw `.eml`/`.pdf` source isn't wrongly flagged broken.
1216            match resolve_wiki_target(store, bare) {
1217                TargetResolution::Exists => {}
1218                TargetResolution::Missing => push(
1219                    issues,
1220                    Severity::Error,
1221                    codes::WIKI_LINK_BROKEN,
1222                    rel,
1223                    line,
1224                    Some(field.to_string()),
1225                    format!("wiki-link target `{bare}` doesn't exist"),
1226                    Some(format!(
1227                        "create `{bare}.md`, or point the link at an existing file"
1228                    )),
1229                    vec![],
1230                ),
1231                TargetResolution::Unsafe => push(
1232                    issues,
1233                    Severity::Error,
1234                    codes::WIKI_LINK_BROKEN,
1235                    rel,
1236                    line,
1237                    Some(field.to_string()),
1238                    format!("wiki-link target `{bare}` is not a safe store-relative path"),
1239                    Some("use a full store-relative path under sources/ or records/".into()),
1240                    vec![],
1241                ),
1242            }
1243        }
1244    }
1245}
1246
1247/// Shape enforcement for a non-link, non-enum schema field.
1248fn check_schema_shape(
1249    rel: &Path,
1250    field: &str,
1251    value: &Value,
1252    shape: Shape,
1253    line: Option<u32>,
1254    issues: &mut Vec<Issue>,
1255) {
1256    let s = scalar_string(value).unwrap_or_default();
1257    let ok = match shape {
1258        Shape::String => true, // any scalar string
1259        Shape::Int => value.is_i64() || value.is_u64() || s.trim().parse::<i64>().is_ok(),
1260        Shape::Bool => value.is_bool() || matches!(s.trim(), "true" | "false"),
1261        Shape::Date => is_iso8601_date_or_datetime(&s),
1262        Shape::Email => is_email(&s),
1263        Shape::Currency => is_currency(&s),
1264        Shape::Url => is_url(&s),
1265    };
1266    if !ok {
1267        push(
1268            issues,
1269            Severity::Error,
1270            codes::SCHEMA_SHAPE_MISMATCH,
1271            rel,
1272            line,
1273            Some(field.to_string()),
1274            format!("`{field}` value {s:?} doesn't match shape {shape:?}"),
1275            Some(shape_suggestion(shape)),
1276            vec![],
1277        );
1278    }
1279}
1280
1281// ─────────────────────────────────────────────────────────────────────────────
1282//  Cross-file: entity-dedup collisions (validate_all only)
1283// ─────────────────────────────────────────────────────────────────────────────
1284
1285/// Hard `DUP_ID` + the soft, schema-declared `DUP_UNIQUE_KEY` collisions.
1286///
1287/// `DUP_ID` is universal (two files with the same explicit `id`).
1288/// `DUP_UNIQUE_KEY` is driven entirely by the store's `DB.md ## Schemas`: each
1289/// `- unique: <field>[, <field> …]` directive on a `### <type>` declares a
1290/// uniqueness constraint, and two records of that type whose declared values
1291/// collide warn. No type carries a built-in dedup key — the store opts in.
1292///
1293/// **Reporting precedence (rule #1 in `corpus-b-edges/EXPECTED/README.md`):** a
1294/// collision group of N files yields exactly ONE issue, not N. Its `file` is the
1295/// lexicographically smallest store-relative path in the group (a total order →
1296/// deterministic); `related` is the rest, sorted. A single-field key anchors to
1297/// that field's line on the reported file and carries it as `key`; a multi-field
1298/// key anchors to line 1 with a null key.
1299fn check_duplicates(store: &Store, parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
1300    // Path → frontmatter YAML, for resolving the anchor field's line on the
1301    // reported (smallest-path) member.
1302    let fm_yaml_of: HashMap<&PathBuf, &str> = parsed
1303        .iter()
1304        .map(|(rel, p)| (rel, p.fm_yaml.as_str()))
1305        .collect();
1306
1307    // ── DUP_ID (hard error): two files with the same explicit `id`. ──────────
1308    let mut by_id: HashMap<String, Vec<PathBuf>> = HashMap::new();
1309    for (rel, p) in parsed {
1310        if let Some(map) = &p.fm {
1311            if let Some(id) = map.get("id").and_then(scalar_string) {
1312                if !id.trim().is_empty() {
1313                    by_id.entry(id).or_default().push(rel.clone());
1314                }
1315            }
1316        }
1317    }
1318    for (id, files) in &by_id {
1319        if files.len() > 1 {
1320            let (reported, related) = canonical_and_related(files);
1321            let line = fm_yaml_of.get(&reported).and_then(|y| fm_key_line(y, "id"));
1322            push(
1323                issues,
1324                Severity::Error,
1325                codes::DUP_ID,
1326                &reported,
1327                line,
1328                Some("id".into()),
1329                format!("id {id:?} is declared by more than one file"),
1330                Some("give each file a unique `id` (or drop it to derive from the path)".into()),
1331                related,
1332            );
1333        }
1334    }
1335
1336    // ── DUP_UNIQUE_KEY (warning): schema-declared `unique:` collisions. ───────
1337    // Every constraint comes from the store's `## Schemas`; a type with no
1338    // `unique:` directive is never dedup-checked. Iteration over the BTreeMap is
1339    // key-ordered, so emitted issues are deterministic across runs.
1340    for (type_name, schema) in &store.config.schemas {
1341        for key_fields in &schema.unique_keys {
1342            soft_dup(parsed, issues, type_name, key_fields, &fm_yaml_of);
1343        }
1344    }
1345}
1346
1347/// Emit ONE `DUP_UNIQUE_KEY` warning per group of ≥2 files of `type_` whose
1348/// declared `key_fields` render to the same token tuple. Files missing any key
1349/// field are skipped — an incomplete key is never a collision.
1350///
1351/// Per reporting rule #1 the issue is keyed on the lexicographically smallest
1352/// store-relative path; `related` is the rest. A single-field key anchors to
1353/// that field's line on the reported file and carries it as `key`; a multi-field
1354/// key anchors to line 1 with a null key. `fm_yaml_of` resolves the field line.
1355fn soft_dup(
1356    parsed: &[(PathBuf, Parsed)],
1357    issues: &mut Vec<Issue>,
1358    type_: &str,
1359    key_fields: &[String],
1360    fm_yaml_of: &HashMap<&PathBuf, &str>,
1361) {
1362    if key_fields.is_empty() {
1363        return;
1364    }
1365    let mut groups: HashMap<Vec<String>, Vec<PathBuf>> = HashMap::new();
1366    for (rel, p) in parsed {
1367        let is_type =
1368            p.fm.as_ref()
1369                .and_then(|m| m.get("type"))
1370                .and_then(scalar_string)
1371                .map(|t| t == type_)
1372                .unwrap_or(false);
1373        if !is_type {
1374            continue;
1375        }
1376        if let Some(key) = dedup_key(p, key_fields) {
1377            groups.entry(key).or_default().push(rel.clone());
1378        }
1379    }
1380    // HashMap iteration is nondeterministic; sort by reported member so the
1381    // emitted issue order is stable across runs.
1382    let mut collisions: Vec<(PathBuf, Vec<PathBuf>)> = groups
1383        .values()
1384        .filter(|files| files.len() > 1)
1385        .map(|files| canonical_and_related(files))
1386        .collect();
1387    collisions.sort_by(|a, b| a.0.cmp(&b.0));
1388
1389    let fields_disp = key_fields.join(", ");
1390    for (reported, related) in collisions {
1391        // Single-field keys anchor to the field's line + carry the key; multi-
1392        // field keys anchor to line 1 with a null key.
1393        let (line, key) = if key_fields.len() == 1 {
1394            (
1395                fm_yaml_of
1396                    .get(&reported)
1397                    .and_then(|y| fm_key_line(y, &key_fields[0])),
1398                Some(key_fields[0].clone()),
1399            )
1400        } else {
1401            (Some(1), None)
1402        };
1403        let n = related.len();
1404        push(
1405            issues,
1406            Severity::Warning,
1407            codes::DUP_UNIQUE_KEY,
1408            &reported,
1409            line,
1410            key,
1411            format!("`{type_}` unique key ({fields_disp}) collides with {n} other record(s)"),
1412            Some("merge with `dbmd rename`, or cross-link with `dbmd link`".into()),
1413            related,
1414        );
1415    }
1416}
1417
1418/// Render a type's `unique:` key for one file: each field's dedup token in
1419/// order, or `None` if any field is absent/empty (an incomplete key never
1420/// collides).
1421fn dedup_key(p: &Parsed, key_fields: &[String]) -> Option<Vec<String>> {
1422    let mut out = Vec::with_capacity(key_fields.len());
1423    for f in key_fields {
1424        out.push(dedup_token(p, f)?);
1425    }
1426    Some(out)
1427}
1428
1429/// One field's normalized dedup token, or `None` when absent/empty. Wiki-link
1430/// values (single or block-sequence list) reduce to their lower-cased target
1431/// path(s); a list collapses to a sorted, de-duplicated set so item order never
1432/// matters. Plain scalars (and YAML scalar lists) lower-case and trim.
1433fn dedup_token(p: &Parsed, field: &str) -> Option<String> {
1434    // Wiki-links first — read from the raw frontmatter text so the unquoted
1435    // `field: [[...]]` (a YAML nested-sequence, not a string) is handled.
1436    let links = frontmatter_links_for_key(&p.fm_yaml, field, 2);
1437    if !links.is_empty() {
1438        let set: BTreeSet<String> = links
1439            .into_iter()
1440            .map(|l| l.target.trim_end_matches(".md").to_lowercase())
1441            .filter(|t| !t.is_empty())
1442            .collect();
1443        return if set.is_empty() {
1444            None
1445        } else {
1446            Some(set.into_iter().collect::<Vec<_>>().join(","))
1447        };
1448    }
1449    match p.fm.as_ref()?.get(field) {
1450        Some(Value::Sequence(items)) => {
1451            let set: BTreeSet<String> = items
1452                .iter()
1453                .filter_map(scalar_string)
1454                .map(|s| s.trim().to_lowercase())
1455                .filter(|t| !t.is_empty())
1456                .collect();
1457            if set.is_empty() {
1458                None
1459            } else {
1460                Some(set.into_iter().collect::<Vec<_>>().join(","))
1461            }
1462        }
1463        Some(v) => {
1464            let s = scalar_string(v)?.trim().to_lowercase();
1465            if s.is_empty() {
1466                None
1467            } else {
1468                Some(s)
1469            }
1470        }
1471        None => None,
1472    }
1473}
1474
1475/// Split a non-empty collision group into `(reported, related)`: the
1476/// lexicographically smallest store-relative path is the reported member; the
1477/// rest, sorted ascending, are `related`. Deterministic because store-relative
1478/// path is a total order — the property reporting rule #1 relies on.
1479fn canonical_and_related(files: &[PathBuf]) -> (PathBuf, Vec<PathBuf>) {
1480    let mut sorted = files.to_vec();
1481    sorted.sort();
1482    let reported = sorted[0].clone();
1483    let related = sorted[1..].to_vec();
1484    (reported, related)
1485}
1486
1487// ─────────────────────────────────────────────────────────────────────────────
1488//  Cross-file: hierarchical index.md + index.jsonl sync (validate_all only)
1489// ─────────────────────────────────────────────────────────────────────────────
1490
1491/// All `INDEX_*` and `INDEX_JSONL_*` checks across the three canonical levels.
1492fn check_indexes(store: &Store, files: &[PathBuf], issues: &mut Vec<Issue>) {
1493    // Group content files by their immediate parent folder (the type-folder,
1494    // *across date shards* — a sharded file's "type folder" is the folder right
1495    // under the layer). We key on the type-folder so shards roll up correctly.
1496    let mut type_folders: BTreeMap<PathBuf, Vec<PathBuf>> = BTreeMap::new();
1497    let mut layers_present: BTreeSet<&'static str> = BTreeSet::new();
1498    for rel in files {
1499        // The layer is the first path component — recorded independently of the
1500        // type-folder so a layer containing only loose files still requires an
1501        // `index.md`.
1502        if let Some(layer) = rel.iter().next().and_then(|s| s.to_str()) {
1503            match layer {
1504                "sources" => layers_present.insert("sources"),
1505                "records" => layers_present.insert("records"),
1506                _ => false,
1507            };
1508        }
1509        if let Some(tf) = type_folder_of(rel) {
1510            type_folders.entry(tf).or_default().push(rel.clone());
1511        }
1512    }
1513
1514    // ── Root index.md ─────────────────────────────────────────────────────────
1515    if !files.is_empty() {
1516        let root_index = store.root.join("index.md");
1517        if !root_index.is_file() {
1518            push(
1519                issues,
1520                Severity::Error,
1521                codes::INDEX_MISSING,
1522                Path::new("index.md"),
1523                None,
1524                None,
1525                "store has files but no root `index.md`".into(),
1526                Some("run `dbmd index rebuild`".into()),
1527                vec![],
1528            );
1529        } else {
1530            check_index_scope(store, Path::new("index.md"), "root", None, issues);
1531        }
1532    }
1533
1534    // ── Layer index.md ────────────────────────────────────────────────────────
1535    for layer in &layers_present {
1536        let layer_index_rel = PathBuf::from(layer).join("index.md");
1537        let abs = store.root.join(&layer_index_rel);
1538        if !abs.is_file() {
1539            push(
1540                issues,
1541                Severity::Error,
1542                codes::INDEX_MISSING,
1543                &layer_index_rel,
1544                None,
1545                None,
1546                format!("layer `{layer}/` has files but no `index.md`"),
1547                Some("run `dbmd index rebuild`".into()),
1548                vec![],
1549            );
1550        } else {
1551            check_index_scope(store, &layer_index_rel, "layer", Some(layer), issues);
1552        }
1553    }
1554
1555    // ── Type-folder index.md + index.jsonl ───────────────────────────────────
1556    for (tf, members) in &type_folders {
1557        let index_md_rel = tf.join("index.md");
1558        let index_md_abs = store.root.join(&index_md_rel);
1559        let index_md_present = index_md_abs.is_file();
1560        if !index_md_present {
1561            // The whole folder index is absent → a single `INDEX_MISSING` keyed
1562            // on the FOLDER (not the would-be `index.md` path). When the index is
1563            // entirely missing we do NOT additionally evaluate per-entry
1564            // completeness or the `index.jsonl` twin: one `INDEX_MISSING` covers
1565            // the folder (precedence rule #4 in `corpus-b-edges/EXPECTED`).
1566            push(
1567                issues,
1568                Severity::Error,
1569                codes::INDEX_MISSING,
1570                tf,
1571                None,
1572                None,
1573                format!("non-empty folder `{}` has no index.md", tf.display()),
1574                Some(format!(
1575                    "run `dbmd index rebuild --folder {}`",
1576                    tf.display()
1577                )),
1578                vec![],
1579            );
1580            continue;
1581        }
1582
1583        check_index_scope(store, &index_md_rel, "type-folder", tf.to_str(), issues);
1584        check_type_folder_index_md(store, tf, &index_md_rel, members, issues);
1585
1586        // index.jsonl twin — must exist and be complete (uncapped). Only checked
1587        // when the `index.md` is present (above): a folder whose entire index is
1588        // missing is one `INDEX_MISSING`, not also an `INDEX_JSONL_MISSING`.
1589        let jsonl_rel = tf.join("index.jsonl");
1590        let jsonl_abs = store.root.join(&jsonl_rel);
1591        if !jsonl_abs.is_file() {
1592            push(
1593                issues,
1594                Severity::Error,
1595                codes::INDEX_JSONL_MISSING,
1596                &jsonl_rel,
1597                None,
1598                None,
1599                format!("type-folder `{}/` has no `index.jsonl` twin", tf.display()),
1600                Some("run `dbmd index rebuild`".into()),
1601                vec![],
1602            );
1603        } else {
1604            check_type_folder_index_jsonl(store, tf, &jsonl_rel, members, issues);
1605        }
1606    }
1607
1608    // ── Loose files: content directly at a layer root (no type-folder). ──────
1609    // They are catalogued in the layer's own `index.jsonl` (the layer `index.md`
1610    // stays a type-folder rollup), so structured reads — `query`, dedup, `graph`
1611    // — see them the same way they see canonical files. Require that sidecar and
1612    // sync-check it, so a loose file is never silently absent from the catalog.
1613    // Only genuinely-loose files land here: `type_folder_of` already grouped
1614    // every file two-or-more levels under a layer into its type-folder above.
1615    let mut loose_by_layer: BTreeMap<PathBuf, Vec<PathBuf>> = BTreeMap::new();
1616    for rel in files {
1617        if !is_content_file(rel) || type_folder_of(rel).is_some() {
1618            continue;
1619        }
1620        if let Some(layer_dir) = loose_layer_dir(rel) {
1621            loose_by_layer
1622                .entry(layer_dir)
1623                .or_default()
1624                .push(rel.clone());
1625        }
1626    }
1627    for (layer_dir, members) in &loose_by_layer {
1628        let jsonl_rel = layer_dir.join("index.jsonl");
1629        if !store.root.join(&jsonl_rel).is_file() {
1630            push(
1631                issues,
1632                Severity::Error,
1633                codes::INDEX_JSONL_MISSING,
1634                &jsonl_rel,
1635                None,
1636                None,
1637                format!(
1638                    "loose files at `{}/` are not catalogued — the layer has no `index.jsonl`",
1639                    layer_dir.display()
1640                ),
1641                Some("run `dbmd index rebuild`".into()),
1642                members.clone(),
1643            );
1644        } else {
1645            // `check_type_folder_index_jsonl` ignores its `tf` arg (`let _ = tf`)
1646            // and only checks jsonl-vs-files-vs-frontmatter — exactly the layer
1647            // sidecar's contract, so it is reused verbatim.
1648            check_type_folder_index_jsonl(store, layer_dir, &jsonl_rel, members, issues);
1649        }
1650    }
1651
1652    // ── Orphan index.md: an index file in a folder with no content. ──────────
1653    for rel in walk_index_files(&store.root) {
1654        let parent = rel.parent().unwrap_or(Path::new("")).to_path_buf();
1655        let parent_str = parent.to_string_lossy().to_string();
1656        let is_canonical = parent_str.is_empty() // root
1657            || matches!(parent_str.as_str(), "sources" | "records")
1658            || type_folders.contains_key(&parent);
1659        if !is_canonical {
1660            push(
1661                issues,
1662                Severity::Warning,
1663                codes::INDEX_ORPHAN,
1664                &rel,
1665                None,
1666                None,
1667                format!(
1668                    "`{}` sits in an empty or non-canonical folder",
1669                    rel.display()
1670                ),
1671                Some("remove it, or run `dbmd index rebuild`".into()),
1672                vec![],
1673            );
1674        }
1675    }
1676}
1677
1678/// Check a type-folder `index.md`'s entries against the folder's actual files:
1679/// stale entries (target gone), missing entries (file not listed), and
1680/// summary mismatches.
1681fn check_type_folder_index_md(
1682    store: &Store,
1683    tf: &Path,
1684    index_rel: &Path,
1685    members: &[PathBuf],
1686    issues: &mut Vec<Issue>,
1687) {
1688    let abs = store.root.join(index_rel);
1689    let Ok(text) = std::fs::read_to_string(&abs) else {
1690        return;
1691    };
1692    let entries = parse_index_entries(&text);
1693
1694    let listed: BTreeSet<PathBuf> = entries
1695        .iter()
1696        .map(|e| PathBuf::from(e.target.trim_end_matches(".md")))
1697        .collect();
1698
1699    // Stale entries + summary mismatch.
1700    for entry in &entries {
1701        let bare = entry.target.trim_end_matches(".md");
1702        // Resolve like the graph engine (literal path first, then `.md`) so an
1703        // index entry naming a raw `.eml`/`.pdf` source isn't reported stale.
1704        let target_abs = match resolved_target_abs(store, bare) {
1705            Some(abs) => abs,
1706            None => {
1707                if matches!(resolve_wiki_target(store, bare), TargetResolution::Unsafe) {
1708                    push(
1709                        issues,
1710                        Severity::Error,
1711                        codes::INDEX_STALE_ENTRY,
1712                        index_rel,
1713                        Some(entry.line),
1714                        None,
1715                        format!("index entry `[[{bare}]]` is not a safe store-relative path"),
1716                        Some("run `dbmd index rebuild`".into()),
1717                        vec![],
1718                    );
1719                } else {
1720                    push(
1721                        issues,
1722                        Severity::Error,
1723                        codes::INDEX_STALE_ENTRY,
1724                        index_rel,
1725                        Some(entry.line),
1726                        None,
1727                        format!("index entry `[[{bare}]]` points at a missing file"),
1728                        Some("run `dbmd index rebuild`".into()),
1729                        // The stale target the entry names (the file that no
1730                        // longer exists) — so the agent can locate the dangling
1731                        // reference.
1732                        vec![PathBuf::from(format!("{bare}.md"))],
1733                    );
1734                }
1735                continue;
1736            }
1737        };
1738        // Summary mismatch: the entry text must equal the file's `summary`. A
1739        // bare `- [[path]]` entry (no `— <text>`) when the file HAS a non-empty
1740        // summary is also a mismatch — the SPEC requires every type-folder index
1741        // entry to quote the file's `summary` (`- [[path]] — <summary>`), so a
1742        // missing quote can't validate clean just because there's nothing to
1743        // compare.
1744        if let Some(expected) = read_summary(&target_abs) {
1745            match &entry.summary_text {
1746                // Compare with the SAME whitespace normalization the renderer
1747                // applies when it writes the `index.md` browse line
1748                // (`format_md_entry` -> `collapse_whitespace`). `text_part` is the
1749                // already-collapsed text parsed back out of `index.md`; `expected`
1750                // is the RAW file summary. Comparing a collapsed value against a
1751                // raw one falsely flagged any valid one-line summary that carries
1752                // internal whitespace (a double space, a tab) — a permanent,
1753                // rebuild-immune INDEX_SUMMARY_MISMATCH that wedged the store, since
1754                // `index rebuild` regenerates the byte-identical collapsed line.
1755                // Normalizing both sides makes the check compare like with like.
1756                Some(text_part)
1757                    if crate::summary::collapse_whitespace(text_part)
1758                        != crate::summary::collapse_whitespace(&expected) =>
1759                {
1760                    push(
1761                        issues,
1762                        Severity::Error,
1763                        codes::INDEX_SUMMARY_MISMATCH,
1764                        index_rel,
1765                        Some(entry.line),
1766                        None,
1767                        format!("index entry for `{bare}` text doesn't match the file's `summary`"),
1768                        Some("run `dbmd index rebuild`".into()),
1769                        vec![PathBuf::from(format!("{bare}.md"))],
1770                    );
1771                }
1772                None if !expected.trim().is_empty() => {
1773                    push(
1774                        issues,
1775                        Severity::Error,
1776                        codes::INDEX_SUMMARY_MISMATCH,
1777                        index_rel,
1778                        Some(entry.line),
1779                        None,
1780                        format!("index entry for `{bare}` is missing its summary text (the file has a `summary`)"),
1781                        Some("run `dbmd index rebuild`".into()),
1782                        vec![PathBuf::from(format!("{bare}.md"))],
1783                    );
1784                }
1785                _ => {}
1786            }
1787        }
1788    }
1789
1790    // Missing entries: a member file not listed. Skip the index/log meta files.
1791    // The browse view caps at 500; only flag a missing entry when the folder is
1792    // under the cap (a capped folder legitimately omits older files).
1793    let content_members: Vec<&PathBuf> = members.iter().filter(|m| is_content_file(m)).collect();
1794    if content_members.len() <= 500 {
1795        for m in content_members {
1796            let bare = PathBuf::from(m.to_string_lossy().trim_end_matches(".md").to_string());
1797            if !listed.contains(&bare) {
1798                push(
1799                    issues,
1800                    Severity::Error,
1801                    codes::INDEX_MISSING_ENTRY,
1802                    index_rel,
1803                    None,
1804                    None,
1805                    format!(
1806                        "file `{}` is not listed in its folder's `index.md`",
1807                        m.display()
1808                    ),
1809                    Some("run `dbmd index rebuild`".into()),
1810                    vec![(*m).clone()],
1811                );
1812            }
1813        }
1814    }
1815    let _ = tf;
1816}
1817
1818/// Check a type-folder `index.jsonl` twin: it must list **every** file in the
1819/// folder (uncapped), every record must point at a real file, and each record's
1820/// fields must match the file's frontmatter.
1821fn check_type_folder_index_jsonl(
1822    store: &Store,
1823    tf: &Path,
1824    jsonl_rel: &Path,
1825    members: &[PathBuf],
1826    issues: &mut Vec<Issue>,
1827) {
1828    let abs = store.root.join(jsonl_rel);
1829    let Ok(text) = std::fs::read_to_string(&abs) else {
1830        return;
1831    };
1832
1833    // Parse records (last-write-wins by path), tolerating tombstones/blank lines.
1834    let mut records: BTreeMap<PathBuf, serde_json::Value> = BTreeMap::new();
1835    for (i, line) in text.lines().enumerate() {
1836        let line = line.trim();
1837        if line.is_empty() {
1838            continue;
1839        }
1840        let rec: serde_json::Value = match serde_json::from_str(line) {
1841            Ok(v) => v,
1842            Err(e) => {
1843                push(
1844                    issues,
1845                    Severity::Error,
1846                    codes::INDEX_JSONL_DESYNC,
1847                    jsonl_rel,
1848                    Some((i + 1) as u32),
1849                    None,
1850                    format!("`index.jsonl` line {} is not valid JSON: {e}", i + 1),
1851                    Some("run `dbmd index rebuild`".into()),
1852                    vec![],
1853                );
1854                continue;
1855            }
1856        };
1857        if let Some(path) = rec.get("path").and_then(|v| v.as_str()) {
1858            if !is_safe_store_relative_path(Path::new(path)) {
1859                push(
1860                    issues,
1861                    Severity::Error,
1862                    codes::INDEX_JSONL_DESYNC,
1863                    jsonl_rel,
1864                    Some((i + 1) as u32),
1865                    None,
1866                    format!("`index.jsonl` record path `{path}` is not a safe store-relative path"),
1867                    Some("run `dbmd index rebuild`".into()),
1868                    vec![],
1869                );
1870                continue;
1871            }
1872            records.insert(PathBuf::from(path), rec);
1873        }
1874    }
1875
1876    let member_set: BTreeSet<PathBuf> = members
1877        .iter()
1878        .filter(|m| is_content_file(m))
1879        .cloned()
1880        .collect();
1881
1882    // jsonl record → missing file = desync.
1883    for path in records.keys() {
1884        let target_abs = store.root.join(path);
1885        if !target_abs.is_file() {
1886            push(
1887                issues,
1888                Severity::Error,
1889                codes::INDEX_JSONL_DESYNC,
1890                jsonl_rel,
1891                None,
1892                None,
1893                format!(
1894                    "`index.jsonl` record points at missing file `{}`",
1895                    path.display()
1896                ),
1897                Some("run `dbmd index rebuild`".into()),
1898                vec![],
1899            );
1900        }
1901    }
1902
1903    // file not in jsonl = desync (the jsonl is the complete twin — no cap).
1904    for m in &member_set {
1905        if !records.contains_key(m) {
1906            push(
1907                issues,
1908                Severity::Error,
1909                codes::INDEX_JSONL_DESYNC,
1910                jsonl_rel,
1911                None,
1912                None,
1913                format!(
1914                    "file `{}` is missing from the complete `index.jsonl`",
1915                    m.display()
1916                ),
1917                Some("run `dbmd index rebuild`".into()),
1918                vec![m.clone()],
1919            );
1920        }
1921    }
1922
1923    // Record fields stale vs. frontmatter. SPEC § Validation defines
1924    // `INDEX_JSONL_STALE` as "an `index.jsonl` record's fields don't match the
1925    // file's frontmatter" — ANY field, not just `summary`/`type`. The query and
1926    // search paths read every field straight from these sidecars (`tags`,
1927    // `links`, `created`, `updated`, plus type-specific `email` / `domain` /
1928    // `company` / `amount` / `vendor` …), so a single field left unchecked lets
1929    // a stale value answer queries with data that exists in no `.md` file.
1930    //
1931    // Rather than re-list (and drift from) every projected key, rebuild the
1932    // record the canonical projection would write for this file
1933    // ([`IndexRecord::expected_from_file`], the same path `index rebuild` uses)
1934    // and diff the two as flat JSON maps. Every key the projection emits is
1935    // covered automatically; `path` is the join key and is skipped.
1936    for (path, rec) in &records {
1937        let target_abs = store.root.join(path);
1938        if !target_abs.is_file() {
1939            continue;
1940        }
1941        let Ok(expected) = crate::index::IndexRecord::expected_from_file(&target_abs, path.clone())
1942        else {
1943            continue; // unreadable / unparseable frontmatter is reported elsewhere
1944        };
1945        let Ok(expected_json) = serde_json::to_value(&expected) else {
1946            continue;
1947        };
1948        let (Some(have), Some(want)) = (rec.as_object(), expected_json.as_object()) else {
1949            continue;
1950        };
1951
1952        // Compare the union of keys present on either side; a key the file
1953        // projects but the sidecar omits is just as stale as a wrong value.
1954        let mut mismatched_keys: BTreeSet<&str> = BTreeSet::new();
1955        for key in have.keys().chain(want.keys()) {
1956            if key == "path" {
1957                continue;
1958            }
1959            if have.get(key) != want.get(key) {
1960                mismatched_keys.insert(key);
1961            }
1962        }
1963
1964        if !mismatched_keys.is_empty() {
1965            let keys: Vec<&str> = mismatched_keys.into_iter().collect();
1966            push(
1967                issues,
1968                Severity::Error,
1969                codes::INDEX_JSONL_STALE,
1970                jsonl_rel,
1971                None,
1972                Some(keys.join(",")),
1973                format!(
1974                    "`index.jsonl` record for `{}` is stale ({})",
1975                    path.display(),
1976                    keys.join(", ")
1977                ),
1978                Some("run `dbmd index rebuild`".into()),
1979                vec![path.clone()],
1980            );
1981        }
1982    }
1983    let _ = tf;
1984}
1985
1986/// Check an index's `scope:` frontmatter against its filesystem location.
1987fn check_index_scope(
1988    store: &Store,
1989    index_rel: &Path,
1990    expected_scope: &str,
1991    expected_folder: Option<&str>,
1992    issues: &mut Vec<Issue>,
1993) {
1994    let abs = store.root.join(index_rel);
1995    let Ok(text) = std::fs::read_to_string(&abs) else {
1996        return;
1997    };
1998    let Some((yaml, _, _)) = split_frontmatter(&text) else {
1999        return;
2000    };
2001    let Ok(Value::Mapping(map)) = serde_norway::from_str::<Value>(&yaml) else {
2002        return;
2003    };
2004    let fm = yaml_map_to_btree(&map);
2005
2006    if let Some(scope) = fm.get("scope").and_then(scalar_string) {
2007        // Accept "type-folder" and the SPEC example's looser "folder" alias.
2008        let scope_ok =
2009            scope == expected_scope || (expected_scope == "type-folder" && scope == "folder");
2010        if !scope_ok {
2011            push(
2012                issues,
2013                Severity::Warning,
2014                codes::INDEX_WRONG_SCOPE,
2015                index_rel,
2016                fm_key_line(&yaml, "scope"),
2017                Some("scope".into()),
2018                format!(
2019                    "index `scope: {scope}` doesn't match location (expected `{expected_scope}`)"
2020                ),
2021                Some(format!("set `scope: {expected_scope}`")),
2022                vec![],
2023            );
2024        }
2025    }
2026    // folder: must match for layer/type-folder indexes.
2027    if let Some(expected) = expected_folder {
2028        if let Some(folder) = fm.get("folder").and_then(scalar_string) {
2029            if folder.trim_end_matches('/') != expected.trim_end_matches('/') {
2030                push(
2031                    issues,
2032                    Severity::Warning,
2033                    codes::INDEX_WRONG_SCOPE,
2034                    index_rel,
2035                    fm_key_line(&yaml, "folder"),
2036                    Some("folder".into()),
2037                    format!("index `folder: {folder}` doesn't match location `{expected}`"),
2038                    Some(format!("set `folder: {expected}`")),
2039                    vec![],
2040                );
2041            }
2042        }
2043    }
2044}
2045
2046// ─────────────────────────────────────────────────────────────────────────────
2047//  Cross-file: log.md well-formedness + ordering (validate_all only)
2048// ─────────────────────────────────────────────────────────────────────────────
2049
2050/// `LOG_*` checks: bad timestamps, unknown kinds, out-of-order entries — across
2051/// the active `log.md` AND the rotated `log/<YYYY-MM>.md` archives.
2052///
2053/// [`Log::append`] rolls strictly-prior-month entries into `log/<YYYY-MM>.md`,
2054/// and `Log::tail`/`Log::since` deliberately read those archives back. If the
2055/// LOG_* checks read only the active file, an entry `validate --all` flagged
2056/// while it lived in `log.md` would stop being flagged the moment a newer-month
2057/// append rotated it into an archive — even though the log readers still surface
2058/// that exact entry to the curator. Scanning the archives too keeps validate and
2059/// the readers in agreement after a rotation.
2060///
2061/// Order: archives oldest-month first, then the active `log.md` last — the true
2062/// chronological timeline — so the out-of-order check threads `prev` across the
2063/// rotation boundary the same way it does within a single file.
2064fn check_log(store: &Store, issues: &mut Vec<Issue>) {
2065    let mut prev: Option<DateTime<FixedOffset>> = None;
2066    for rel in log_files_chronological(store) {
2067        check_log_file(store, &rel, &mut prev, issues);
2068    }
2069}
2070
2071/// The log files to scan, in chronological order: every `log/<YYYY-MM>.md`
2072/// archive oldest-month first, then the active `log.md` last. Missing files are
2073/// simply absent from the list.
2074fn log_files_chronological(store: &Store) -> Vec<PathBuf> {
2075    let mut files: Vec<PathBuf> = Vec::new();
2076    let archive_dir = store.root.join("log");
2077    if let Ok(entries) = std::fs::read_dir(&archive_dir) {
2078        let mut archives: Vec<PathBuf> = entries
2079            .flatten()
2080            .map(|e| e.path())
2081            .filter(|p| {
2082                p.is_file()
2083                    && p.file_name()
2084                        .and_then(|s| s.to_str())
2085                        .and_then(|n| n.strip_suffix(".md"))
2086                        .is_some_and(is_year_month_archive)
2087            })
2088            .filter_map(|p| p.strip_prefix(&store.root).ok().map(Path::to_path_buf))
2089            .collect();
2090        // `YYYY-MM` stems sort lexically == chronologically; oldest first.
2091        archives.sort();
2092        files.extend(archives);
2093    }
2094    // The active file holds the current month — newest, so it comes last.
2095    if store.root.join("log.md").is_file() {
2096        files.push(PathBuf::from("log.md"));
2097    }
2098    files
2099}
2100
2101/// Scan one log file's entry headers, threading the running `prev` timestamp so
2102/// the out-of-order check spans file (rotation) boundaries. Issues anchor to the
2103/// given store-relative path so an archived entry points at its archive file.
2104fn check_log_file(
2105    store: &Store,
2106    log_rel: &Path,
2107    prev: &mut Option<DateTime<FixedOffset>>,
2108    issues: &mut Vec<Issue>,
2109) {
2110    let abs = store.root.join(log_rel);
2111    let Ok(text) = std::fs::read_to_string(&abs) else {
2112        return;
2113    };
2114
2115    for (i, line) in text.lines().enumerate() {
2116        if !line.starts_with("## [") {
2117            continue;
2118        }
2119        let line_no = (i + 1) as u32;
2120        match parse_log_header(line) {
2121            None => push(
2122                issues,
2123                Severity::Error,
2124                codes::LOG_BAD_TIMESTAMP,
2125                log_rel,
2126                Some(line_no),
2127                None,
2128                format!("log entry header has an unparseable timestamp: {line:?}"),
2129                Some("use `## [YYYY-MM-DD HH:MM] <kind> | <object>`".into()),
2130                vec![],
2131            ),
2132            Some((ts, kind, _object)) => {
2133                if !RECOGNIZED_LOG_KINDS.contains(&kind.as_str()) {
2134                    push(
2135                        issues,
2136                        Severity::Warning,
2137                        codes::LOG_UNKNOWN_KIND,
2138                        log_rel,
2139                        Some(line_no),
2140                        None,
2141                        format!("log entry kind `{kind}` is not recognized"),
2142                        Some(format!("use one of: {}", RECOGNIZED_LOG_KINDS.join(", "))),
2143                        vec![],
2144                    );
2145                }
2146                if let Some(p) = *prev {
2147                    if ts < p {
2148                        push(
2149                            issues,
2150                            Severity::Warning,
2151                            codes::LOG_OUT_OF_ORDER,
2152                            log_rel,
2153                            Some(line_no),
2154                            None,
2155                            "log entry is older than the entry above it (possible rewrite)".into(),
2156                            Some("append corrective entries; never reorder past ones".into()),
2157                            vec![],
2158                        );
2159                    }
2160                }
2161                *prev = Some(ts);
2162            }
2163        }
2164    }
2165}
2166
2167// ─────────────────────────────────────────────────────────────────────────────
2168//  Self-contained primitives (collapse onto sibling modules once they land)
2169// ─────────────────────────────────────────────────────────────────────────────
2170
2171/// A minimal wiki-link found in a body: target, optional display, 1-based line.
2172#[derive(Debug)]
2173struct Link {
2174    target: String,
2175    line: u32,
2176}
2177
2178/// True if the store marker (`DB.md`, uppercase) is present at the root. On a
2179/// case-insensitive filesystem `db.md` would also match `DB.md`; we require the
2180/// exact-cased directory entry to be present.
2181fn store_marker_present(store: &Store) -> bool {
2182    let want = store.root.join("DB.md");
2183    if !want.is_file() {
2184        return false;
2185    }
2186    // Reject a case-folded match (`db.md`) on case-insensitive filesystems.
2187    match std::fs::read_dir(&store.root) {
2188        Ok(entries) => entries
2189            .flatten()
2190            .any(|e| e.file_name().to_str() == Some("DB.md")),
2191        Err(_) => true, // can't enumerate; trust the is_file() above
2192    }
2193}
2194
2195/// Validate the store's identity file, `DB.md`: its frontmatter `type:` must be
2196/// `db-md`, it must carry both `scope` and `owner`, and its body may contain
2197/// only the three recognized `##` sections (`Agent instructions`, `Policies`,
2198/// `Schemas`).
2199///
2200/// `DB.md` is not a content file (no `summary`), so it is checked here rather
2201/// than through `check_content_file`. The marker presence is established by the
2202/// caller (`store_marker_present`); a malformed-frontmatter `DB.md` still counts
2203/// as a store (the marker is the filename), so we report its shape rather than
2204/// `NOT_A_STORE`. Issues anchor to `DB.md` as the store-relative path.
2205fn check_db_md(store: &Store, issues: &mut Vec<Issue>) {
2206    let rel = Path::new("DB.md");
2207    let abs = store.root.join("DB.md");
2208    let Ok(text) = std::fs::read_to_string(&abs) else {
2209        return; // marker present but unreadable: nothing more to say.
2210    };
2211
2212    let Some((fm_yaml, body, fm_end_line)) = split_frontmatter(&text) else {
2213        // No frontmatter block at all → it cannot declare `type: db-md` and has
2214        // neither required field. Report the type and both missing fields,
2215        // anchored to line 1 (the would-be opening fence).
2216        push(
2217            issues,
2218            Severity::Error,
2219            codes::DB_MD_BAD_TYPE,
2220            rel,
2221            Some(1),
2222            Some("type".into()),
2223            "DB.md has no frontmatter; it must declare `type: db-md`".into(),
2224            Some("add a `---` frontmatter block with `type: db-md`".into()),
2225            vec![],
2226        );
2227        for field in ["scope", "owner"] {
2228            push(
2229                issues,
2230                Severity::Error,
2231                codes::DB_MD_MISSING_FIELD,
2232                rel,
2233                Some(1),
2234                Some(field.into()),
2235                format!("DB.md frontmatter is missing required field `{field}`"),
2236                Some(format!("add `{field}:` to the DB.md frontmatter")),
2237                vec![],
2238            );
2239        }
2240        return;
2241    };
2242
2243    // Parse the frontmatter mapping. If it doesn't parse, we can still say the
2244    // identity contract is unmet (no provable `type: db-md`, no provable fields).
2245    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
2246        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
2247        Ok(Value::Null) => Some(BTreeMap::new()),
2248        _ => None,
2249    };
2250
2251    match &fm {
2252        Some(map) => {
2253            // ── type: db-md ──────────────────────────────────────────────────
2254            let type_ = map.get("type").and_then(scalar_string);
2255            if type_.as_deref() != Some("db-md") {
2256                let (line, msg) = match &type_ {
2257                    Some(t) => (
2258                        fm_key_line(&fm_yaml, "type"),
2259                        format!("DB.md has `type: {t}`; a store's DB.md must be `type: db-md`"),
2260                    ),
2261                    None => (
2262                        Some(1),
2263                        "DB.md frontmatter has no `type:`; it must be `type: db-md`".to_string(),
2264                    ),
2265                };
2266                push(
2267                    issues,
2268                    Severity::Error,
2269                    codes::DB_MD_BAD_TYPE,
2270                    rel,
2271                    line,
2272                    Some("type".into()),
2273                    msg,
2274                    Some("set `type: db-md` in the DB.md frontmatter".into()),
2275                    vec![],
2276                );
2277            }
2278
2279            // ── required fields: scope + owner ───────────────────────────────
2280            for field in ["scope", "owner"] {
2281                let present = map
2282                    .get(field)
2283                    .and_then(scalar_string)
2284                    .map(|s| !s.trim().is_empty())
2285                    .unwrap_or(false);
2286                if !present {
2287                    push(
2288                        issues,
2289                        Severity::Error,
2290                        codes::DB_MD_MISSING_FIELD,
2291                        rel,
2292                        // A present-but-empty field anchors to its line; a fully
2293                        // absent one to the block top.
2294                        fm_key_line_or_top(&fm_yaml, field),
2295                        Some(field.into()),
2296                        format!("DB.md frontmatter is missing required field `{field}`"),
2297                        Some(format!("add `{field}:` to the DB.md frontmatter")),
2298                        vec![],
2299                    );
2300                }
2301            }
2302        }
2303        None => {
2304            // Unparseable frontmatter: the identity contract is unprovable. Emit
2305            // the type error and both field errors, anchored to the block top.
2306            push(
2307                issues,
2308                Severity::Error,
2309                codes::DB_MD_BAD_TYPE,
2310                rel,
2311                Some(1),
2312                Some("type".into()),
2313                "DB.md frontmatter isn't valid YAML; it must declare `type: db-md`".into(),
2314                Some("fix the DB.md frontmatter and set `type: db-md`".into()),
2315                vec![],
2316            );
2317            for field in ["scope", "owner"] {
2318                push(
2319                    issues,
2320                    Severity::Error,
2321                    codes::DB_MD_MISSING_FIELD,
2322                    rel,
2323                    Some(1),
2324                    Some(field.into()),
2325                    format!("DB.md frontmatter is missing required field `{field}`"),
2326                    Some(format!("add `{field}:` to the DB.md frontmatter")),
2327                    vec![],
2328                );
2329            }
2330        }
2331    }
2332
2333    // ── recognized `##` section headers only ─────────────────────────────────
2334    // The body's H2 headings must be one of the four the toolkit reads; any
2335    // other is a likely typo / misplacement (warning — the parser ignores it,
2336    // so the config is not corrupted, but the operator wrote a section that will
2337    // never be read). H3 sub-headings (Frozen pages, Ignored types, `### <type>`
2338    // schema blocks) live under their H2 and are not flagged here.
2339    //
2340    // `## Folders` is recognized: `parse_db_md` reads it into `Config.folders`
2341    // (parser.rs) and the index renders folder display names + descriptions from
2342    // it (index.rs `render_*_md_from_stats`). Flagging it `DB_MD_UNKNOWN_SECTION`
2343    // with "remove this heading" told the operator to delete a working,
2344    // round-tripped config block — destroying curator-authored rollup names. It
2345    // is a real, shipped section; SPEC.md documents it alongside the other three.
2346    for section in crate::parser::extract_sections(&body) {
2347        if section.level != 2 {
2348            continue;
2349        }
2350        let name = section.heading.trim().to_ascii_lowercase();
2351        if matches!(
2352            name.as_str(),
2353            "agent instructions" | "policies" | "schemas" | "folders"
2354        ) {
2355            continue;
2356        }
2357        // `Section::line` is 1-based within the body; the body begins at file
2358        // line `fm_end_line + 1`.
2359        let file_line = fm_end_line + section.line;
2360        push(
2361            issues,
2362            Severity::Warning,
2363            codes::DB_MD_UNKNOWN_SECTION,
2364            rel,
2365            Some(file_line),
2366            None,
2367            format!(
2368                "DB.md has an unrecognized `## {}` section",
2369                section.heading.trim()
2370            ),
2371            Some(
2372                "DB.md sections are `## Agent instructions`, `## Policies`, `## Schemas`, \
2373                 `## Folders` — remove or rename this heading"
2374                    .into(),
2375            ),
2376            vec![],
2377        );
2378    }
2379
2380    // ── `## Schemas` field-declaration lint ──────────────────────────────────
2381    // Without this, every schema misparse is silent: the operator/agent gets no
2382    // signal that DB.md is interpreting their schema differently from what they
2383    // wrote, and downstream records are validated against the degraded schema.
2384    check_db_md_schemas(store, rel, &body, fm_end_line, issues);
2385}
2386
2387/// Lint the parsed `## Schemas` field declarations: an empty field name, a
2388/// duplicate field name within a type, or an unrecognized modifier all parse
2389/// "successfully" into a degraded [`Schema`] today, so a bad declaration never
2390/// surfaces. The parsed schemas live in `store.config.schemas` (directives
2391/// already separated out); this pass reports the suspicious *field* shapes,
2392/// anchored to the `### <type>` heading line so the agent can find the block.
2393fn check_db_md_schemas(
2394    store: &Store,
2395    rel: &Path,
2396    body: &str,
2397    fm_end_line: u32,
2398    issues: &mut Vec<Issue>,
2399) {
2400    if store.config.schemas.is_empty() {
2401        return;
2402    }
2403
2404    // Map each `### <type>` heading (under `## Schemas`) to its file line, so a
2405    // per-type issue can anchor to the declaration block. `extract_sections`
2406    // returns a flat list with 1-based body lines; the body starts at file line
2407    // `fm_end_line + 1`.
2408    let mut type_line: BTreeMap<String, u32> = BTreeMap::new();
2409    let mut current_h2: Option<String> = None;
2410    for section in crate::parser::extract_sections(body) {
2411        match section.level {
2412            2 => current_h2 = Some(section.heading.trim().to_ascii_lowercase()),
2413            3 if current_h2.as_deref() == Some("schemas") => {
2414                // The H3 heading text (as written) is the type name — the same
2415                // key `parse_db_md` inserts into `config.schemas`.
2416                type_line
2417                    .entry(section.heading.trim().to_string())
2418                    .or_insert(fm_end_line + section.line);
2419            }
2420            _ => {}
2421        }
2422    }
2423
2424    for (type_name, schema) in &store.config.schemas {
2425        let line = type_line.get(type_name).copied();
2426        let mut seen: BTreeSet<String> = BTreeSet::new();
2427        for field in &schema.fields {
2428            let name = field.name.trim();
2429
2430            // Empty field name: a `- (string)` / bare `- ` bullet parses to a
2431            // nameless field that can never match a frontmatter key, so its
2432            // required/shape/enum constraints silently never apply.
2433            if name.is_empty() {
2434                push(
2435                    issues,
2436                    Severity::Warning,
2437                    codes::DB_MD_SCHEMA_FIELD,
2438                    rel,
2439                    line,
2440                    None,
2441                    format!("`### {type_name}` has a schema field bullet with no field name"),
2442                    Some(
2443                        "write each field as `- <name> (<modifiers>)`, e.g. `- email (required, email)`"
2444                            .into(),
2445                    ),
2446                    vec![],
2447                );
2448                continue;
2449            }
2450
2451            // Duplicate field name within a type: the second declaration's
2452            // constraints are interpreted independently of the first, so the
2453            // author's intent is ambiguous and likely wrong.
2454            if !seen.insert(name.to_string()) {
2455                push(
2456                    issues,
2457                    Severity::Warning,
2458                    codes::DB_MD_SCHEMA_FIELD,
2459                    rel,
2460                    line,
2461                    Some(name.to_string()),
2462                    format!("`### {type_name}` declares field `{name}` more than once"),
2463                    Some(
2464                        "remove the duplicate field bullet, or merge the modifiers onto one".into(),
2465                    ),
2466                    vec![],
2467                );
2468            }
2469
2470            // Unrecognized modifiers: the parser stashes anything outside the
2471            // known vocabulary (`required` / a shape / `link to …` / `default …`
2472            // / `enum: …`) in `unknown_modifiers`. Surface them as Info so a
2473            // typo'd modifier (`requierd`, `unqiue`) doesn't silently do nothing.
2474            for modifier in &field.unknown_modifiers {
2475                let modifier = modifier.trim();
2476                if modifier.is_empty() {
2477                    continue;
2478                }
2479                push(
2480                    issues,
2481                    Severity::Info,
2482                    codes::DB_MD_SCHEMA_FIELD,
2483                    rel,
2484                    line,
2485                    Some(name.to_string()),
2486                    format!(
2487                        "`### {type_name}` field `{name}` has an unrecognized modifier `{modifier}`"
2488                    ),
2489                    Some(
2490                        "recognized modifiers are `required`, a shape (`string`/`int`/`bool`/`date`/`email`/`currency`/`url`), `link to <prefix>/`, `default <value>`, `enum: <v1>, <v2>, …`"
2491                            .into(),
2492                    ),
2493                    vec![],
2494                );
2495            }
2496        }
2497    }
2498}
2499
2500/// The `NOT_A_STORE` issue for a root with no `DB.md`.
2501fn not_a_store_issue(store: &Store) -> Issue {
2502    Issue {
2503        severity: Severity::Error,
2504        code: codes::NOT_A_STORE,
2505        file: store.root.clone(),
2506        line: None,
2507        key: None,
2508        message: format!("{} has no DB.md; not a db.md store", store.root.display()),
2509        suggestion: Some("create a `DB.md` at the store root".into()),
2510        related: vec![],
2511    }
2512}
2513
2514/// True if a store-relative path is a content file: under `sources/` or
2515/// `records/` and not an `index.md`/`index.jsonl`/`log.md`.
2516fn is_content_file(rel: &Path) -> bool {
2517    // Defense in depth: a real content file is always a forward (Normal-only)
2518    // store-relative path. Reject any `..`/absolute/prefix component so a
2519    // malformed object slot judged only by its FIRST component (`records/../..`)
2520    // can never turn a per-file read into a store escape, even if a future caller
2521    // forgets the path-safety gate `changed_objects_since` now applies.
2522    if !is_safe_store_relative_path(rel) {
2523        return false;
2524    }
2525    let Some(first) = rel.iter().next().and_then(|s| s.to_str()) else {
2526        return false;
2527    };
2528    if !matches!(first, "sources" | "records") {
2529        return false;
2530    }
2531    let name = rel.file_name().and_then(|s| s.to_str()).unwrap_or("");
2532    // Only the derived catalog twins are meta INSIDE a layer. `DB.md` / `log.md`
2533    // are reserved meta only at the store ROOT, which the `first` layer check
2534    // above already excludes — so a content file named `log.md` / `DB.md` inside
2535    // a layer (e.g. `records/docs/log.md`) is real content, consistent with
2536    // `Store::walk`.
2537    if matches!(name, "index.md" | "index.jsonl") {
2538        return false;
2539    }
2540    name.ends_with(".md")
2541}
2542
2543/// True for the store's ROOT append-only meta files (`DB.md` / `log.md`): a
2544/// single-component store-relative path whose name is one of those two. An
2545/// in-layer `records/docs/log.md` is real content (multiple components), not a
2546/// root meta file. These reach `check_content_file` only via the working-set
2547/// incoming-linker scan; their bodies are deliberately not link-checked there
2548/// because `validate --all` doesn't link-check them either.
2549fn is_root_meta_file(rel: &Path) -> bool {
2550    let mut comps = rel.components();
2551    let Some(Component::Normal(only)) = comps.next() else {
2552        return false;
2553    };
2554    if comps.next().is_some() {
2555        return false; // has a parent dir → not a root file
2556    }
2557    matches!(only.to_str(), Some("DB.md") | Some("log.md"))
2558}
2559
2560/// True for a derived index-catalog file (`index.md` / `index.jsonl`) at any
2561/// depth. Its entries are GENERATED wiki-links to type-folder members, not
2562/// authored body links: in the working-set scope it is pulled in as an incoming
2563/// linker, but its integrity belongs to `check_indexes` under `--all` (which
2564/// reports a dangling entry as `INDEX_STALE_ENTRY`, not `WIKI_LINK_BROKEN`). So
2565/// `check_content_file` never body-link-checks it, matching `walk_content_files`
2566/// (which skips `index.md` under `--all`).
2567fn is_index_catalog_file(rel: &Path) -> bool {
2568    matches!(
2569        rel.file_name().and_then(|n| n.to_str()),
2570        Some("index.md") | Some("index.jsonl")
2571    )
2572}
2573
2574/// Split a file into `(frontmatter_yaml, body, closing_fence_line)`. The block
2575/// must start at the very first line with `---` and end at the next `---`.
2576/// Returns `None` if there's no leading frontmatter block.
2577fn split_frontmatter(text: &str) -> Option<(String, String, u32)> {
2578    // Tolerate a single leading UTF-8 BOM, matching parser/store/index (which
2579    // already strip it). Without this, a BOM-prefixed file is read as having no
2580    // frontmatter here while the catalog still indexes it — so validate would
2581    // silently skip frontmatter checks on a file the rest of the toolkit sees.
2582    let text = text.strip_prefix('\u{feff}').unwrap_or(text);
2583    let mut lines = text.lines();
2584    let first = lines.next()?;
2585    if first.trim_end() != "---" {
2586        return None;
2587    }
2588    let mut yaml = String::new();
2589    let mut close_line: Option<u32> = None;
2590    // line 1 is the opening fence; YAML starts at line 2.
2591    let mut current = 1u32;
2592    for line in lines {
2593        current += 1;
2594        if line.trim_end() == "---" {
2595            close_line = Some(current);
2596            break;
2597        }
2598        yaml.push_str(line);
2599        yaml.push('\n');
2600    }
2601    let close_line = close_line?;
2602    // Body = everything after the closing fence.
2603    let body: String = text
2604        .lines()
2605        .skip(close_line as usize)
2606        .collect::<Vec<_>>()
2607        .join("\n");
2608    Some((yaml, body, close_line))
2609}
2610
2611/// Read just the `summary` field of a file, or `None` if absent/unparseable.
2612fn read_summary(abs: &Path) -> Option<String> {
2613    let text = std::fs::read_to_string(abs).ok()?;
2614    let (yaml, _, _) = split_frontmatter(&text)?;
2615    let value: Value = serde_norway::from_str(&yaml).ok()?;
2616    if let Value::Mapping(m) = value {
2617        m.get(Value::String("summary".into()))
2618            .and_then(scalar_string)
2619    } else {
2620        None
2621    }
2622}
2623
2624/// Convert a `serde_norway` mapping into a string-keyed [`BTreeMap`], dropping
2625/// non-string keys (frontmatter keys are always strings).
2626fn yaml_map_to_btree(map: &serde_norway::Mapping) -> BTreeMap<String, Value> {
2627    let mut out = BTreeMap::new();
2628    for (k, v) in map {
2629        if let Value::String(s) = k {
2630            out.insert(s.clone(), v.clone());
2631        }
2632    }
2633    out
2634}
2635
2636/// A scalar YAML value as a string (`String`/`Number`/`Bool`); `None` for
2637/// sequences/mappings/null.
2638fn scalar_string(v: &Value) -> Option<String> {
2639    match v {
2640        Value::String(s) => Some(s.clone()),
2641        Value::Number(n) => Some(n.to_string()),
2642        Value::Bool(b) => Some(b.to_string()),
2643        _ => None,
2644    }
2645}
2646
2647/// True if a frontmatter value carries no content for a *required*-field check:
2648/// a YAML `null` (`name:`), an empty sequence (`name: []`), an empty mapping
2649/// (`name: {}`), or a blank/whitespace-only scalar (`name: ""`). A non-empty
2650/// list or mapping is NOT treated as empty here — a structurally-wrong value on
2651/// a shape/enum field is caught by the later non-scalar shape check, not by the
2652/// required-presence check.
2653fn is_empty_value(v: &Value) -> bool {
2654    match v {
2655        Value::Null => true,
2656        Value::Sequence(items) => items.is_empty(),
2657        Value::Mapping(map) => map.is_empty(),
2658        other => scalar_string(other)
2659            .map(|s| s.trim().is_empty())
2660            .unwrap_or(true),
2661    }
2662}
2663
2664/// True if `tags` is a flat YAML sequence of scalars. A mapping, a scalar, or a
2665/// sequence containing a nested sequence/mapping → false (`TAGS_MALFORMED`).
2666fn is_flat_scalar_list(v: &Value) -> bool {
2667    match v {
2668        Value::Sequence(items) => items.iter().all(|it| scalar_string(it).is_some()),
2669        _ => false,
2670    }
2671}
2672
2673/// Extract every frontmatter wiki-link, returning `(key, Link)` pairs with the
2674/// link's 1-based file line. **Text-based, by necessity:** an unquoted
2675/// `company: [[records/companies/x]]` parses in YAML as a nested *sequence*, not
2676/// a string (because `[[x]]` is YAML flow-list-in-a-list); a quoted
2677/// `"[[...]]"` parses as a string. Scanning the raw frontmatter text catches
2678/// both forms uniformly, the way the link textually appears — the doctrine view.
2679///
2680/// `fm_start_line` is the file line of the first YAML line (file line 2, since
2681/// line 1 is the opening `---`), so the returned `Link::line` is absolute.
2682fn frontmatter_link_fields_text(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, Link)> {
2683    let mut out = Vec::new();
2684    for (key, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2685        for link in links {
2686            out.push((key.clone(), link));
2687        }
2688    }
2689    out
2690}
2691
2692/// The wiki-link targets declared under a single top-level frontmatter key
2693/// (text-based; handles quoted + unquoted forms). Empty if the key is absent or
2694/// carries no `[[...]]`.
2695fn frontmatter_links_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Vec<Link> {
2696    for (k, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2697        if k == key {
2698            return links;
2699        }
2700    }
2701    Vec::new()
2702}
2703
2704/// The raw value text under a single top-level frontmatter key (the remainder of
2705/// the key line plus any indented continuation/sequence lines), trimmed. Used to
2706/// decide whether a `link to` field holds a plain string vs. a wiki-link.
2707fn frontmatter_raw_value_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Option<String> {
2708    for (k, value_text, _links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2709        if k == key {
2710            return Some(value_text);
2711        }
2712    }
2713    None
2714}
2715
2716/// Split a frontmatter YAML block into `(key, raw_value_text, wiki_links)` for
2717/// each top-level key. A top-level key is a line with no leading indentation in
2718/// `name:` form; its value spans the rest of that line plus any deeper-indented
2719/// continuation lines (block scalars, block sequences) until the next top-level
2720/// key. Wiki-links are every `[[...]]` found anywhere in that span, with their
2721/// absolute file line.
2722fn frontmatter_key_blocks(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, String, Vec<Link>)> {
2723    let mut blocks: Vec<(String, String, Vec<Link>)> = Vec::new();
2724    let mut current: Option<(String, String, Vec<Link>)> = None;
2725
2726    for (idx, raw_line) in fm_yaml.lines().enumerate() {
2727        let file_line = fm_start_line + idx as u32;
2728        let indented = raw_line.starts_with(' ') || raw_line.starts_with('\t');
2729        let trimmed = raw_line.trim();
2730
2731        // A new top-level key: no indentation, `name:` prefix, not a list dash or
2732        // comment. (Indented or dash lines belong to the current key's value.)
2733        let new_key = if !indented && !trimmed.starts_with('#') && !trimmed.starts_with('-') {
2734            top_level_key(raw_line)
2735        } else {
2736            None
2737        };
2738
2739        if let Some((key, after)) = new_key {
2740            if let Some(done) = current.take() {
2741                blocks.push(done);
2742            }
2743            let mut links = Vec::new();
2744            collect_line_links(after, file_line, &mut links);
2745            current = Some((key, after.trim().to_string(), links));
2746        } else if let Some((_k, value_text, links)) = current.as_mut() {
2747            // Continuation of the current key's value (indented or dash line).
2748            if !value_text.is_empty() {
2749                value_text.push('\n');
2750            }
2751            value_text.push_str(trimmed);
2752            collect_line_links(raw_line, file_line, links);
2753        }
2754    }
2755    if let Some(done) = current.take() {
2756        blocks.push(done);
2757    }
2758    blocks
2759}
2760
2761/// Parse a top-level frontmatter key line into `(key, value_after_colon)`.
2762/// `None` if the line isn't a `name:` mapping entry.
2763fn top_level_key(line: &str) -> Option<(String, &str)> {
2764    let (key, rest) = line.split_once(':')?;
2765    let key = key.trim();
2766    if key.is_empty()
2767        || !key
2768            .chars()
2769            .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
2770    {
2771        return None;
2772    }
2773    Some((key.to_string(), rest))
2774}
2775
2776/// Append every `[[target]]` / `[[target|display]]` found in `s` to `links`,
2777/// each tagged with `file_line`.
2778fn collect_line_links(s: &str, file_line: u32, links: &mut Vec<Link>) {
2779    let bytes = s.as_bytes();
2780    let mut i = 0;
2781    while i + 1 < bytes.len() {
2782        if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2783            if let Some(close) = s[i + 2..].find("]]") {
2784                let inner = &s[i + 2..i + 2 + close];
2785                // Guard against `[[[` (nested) double-counting: the inner must
2786                // not itself open another `[[`.
2787                let target = inner
2788                    .trim_start_matches('[')
2789                    .split('|')
2790                    .next()
2791                    .unwrap_or(inner)
2792                    .trim()
2793                    .to_string();
2794                if !target.is_empty() {
2795                    links.push(Link {
2796                        target,
2797                        line: file_line,
2798                    });
2799                }
2800                i = i + 2 + close + 2;
2801                continue;
2802            }
2803        }
2804        i += 1;
2805    }
2806}
2807
2808/// Extract every `[[...]]` wiki-link from a body, with 1-based line numbers.
2809/// Skips fenced code blocks, so example links in docs don't trip the validator.
2810///
2811/// Fence tracking matches the toolkit's parser ([`crate::parser`]'s
2812/// `extract_sections`): an open fence is `(fence char, run length)` and closes
2813/// only on a line that is the **same** fence character with a run **at least as
2814/// long**. A naive "toggle a bool on any ``` or ~~~ line" inverts the state when
2815/// a `~~~` block legally contains a ```` ``` ```` line (the standard way to
2816/// document a backtick fence) — the inner backtick line would flip `in_fence`
2817/// off and the demo `[[…]]` inside the code block would be checked as a live
2818/// link, falsely flagging a legal store.
2819fn extract_wiki_links(body: &str) -> Vec<Link> {
2820    let mut out = Vec::new();
2821    let mut fence: Option<(u8, usize)> = None;
2822    for (idx, line) in body.lines().enumerate() {
2823        let content = line.trim_end_matches('\r');
2824        if let Some(f) = fence {
2825            // Inside a fence: the only thing that matters is whether THIS line
2826            // closes it (matching char, run ≥ the opening run). Everything else
2827            // is opaque code — no link extraction.
2828            if fence_closes(content, f) {
2829                fence = None;
2830            }
2831            continue;
2832        }
2833        if let Some(opened) = fence_opens(content) {
2834            fence = Some(opened);
2835            continue;
2836        }
2837        let line_no = (idx + 1) as u32;
2838        let bytes = line.as_bytes();
2839        let mut i = 0;
2840        while i + 1 < bytes.len() {
2841            if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2842                if let Some(close) = line[i + 2..].find("]]") {
2843                    let inner = &line[i + 2..i + 2 + close];
2844                    let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
2845                    // Skip a triple-bracket `[[[…` opening: the inner content
2846                    // starts with `[`, so this is the rejected flow-form list
2847                    // mis-encoding (`[[[a]], [[b]]]`), not a real wiki-link. A
2848                    // legitimate target never starts with `[`. The frontmatter
2849                    // `WIKI_LINK_FLOW_FORM_LIST` check already owns that error;
2850                    // extracting a bogus body link here would double-report it as
2851                    // a spurious `WIKI_LINK_SHORT_FORM`.
2852                    if !target.is_empty() && !target.starts_with('[') {
2853                        out.push(Link {
2854                            target,
2855                            line: line_no,
2856                        });
2857                    }
2858                    i = i + 2 + close + 2;
2859                    continue;
2860                }
2861            }
2862            i += 1;
2863        }
2864    }
2865    out
2866}
2867
2868/// If `line` opens a fenced code block, return `(fence byte, run length)`. A
2869/// local mirror of the parser's `opening_fence` so the validator's fence
2870/// tracking matches the rest of the toolkit: a fence is ``` ``` ``` or `~~~`
2871/// (run ≥ 3) at ≤ 3 spaces of indent, and a backtick fence's info string may
2872/// not itself contain a backtick.
2873fn fence_opens(line: &str) -> Option<(u8, usize)> {
2874    let indent = line.len() - line.trim_start_matches(' ').len();
2875    if indent > 3 {
2876        return None;
2877    }
2878    let rest = &line[indent..];
2879    let byte = rest.bytes().next()?;
2880    if byte != b'`' && byte != b'~' {
2881        return None;
2882    }
2883    let run = rest.len() - rest.trim_start_matches(byte as char).len();
2884    if run < 3 {
2885        return None;
2886    }
2887    // A backtick fence's info string may not itself contain a backtick.
2888    if byte == b'`' && rest[run..].contains('`') {
2889        return None;
2890    }
2891    Some((byte, run))
2892}
2893
2894/// True if `line` closes the currently open `fence`: same char, run at least as
2895/// long, nothing but trailing whitespace after. Local mirror of the parser's
2896/// `is_closing_fence` — so an inner fence of the *other* character (a ``` ``` ```
2897/// line inside a `~~~` block) does NOT close the outer fence.
2898fn fence_closes(line: &str, fence: (u8, usize)) -> bool {
2899    let (byte, open_len) = fence;
2900    let indent = line.len() - line.trim_start_matches(' ').len();
2901    if indent > 3 {
2902        return false;
2903    }
2904    let rest = &line[indent..];
2905    let run = rest.len() - rest.trim_start_matches(byte as char).len();
2906    if run < open_len {
2907        return false;
2908    }
2909    rest[run..].trim().is_empty()
2910}
2911
2912/// Detect the frontmatter INLINE flow-form wiki-link-list mis-encoding —
2913/// `attendees: [[[a]], [[b]]]` — and return the offending keys.
2914///
2915/// **Scoped to the inline value on the key line.** The SPEC's canonical
2916/// list-of-links form is the *unquoted YAML block sequence* (`- [[a]]` per
2917/// indented line), which is explicitly correct (SPEC § Linking) and MUST NOT be
2918/// flagged — even though, parsed whole, it nests the same way the rejected
2919/// inline flow form does. So this check looks only at the value written *inline*
2920/// after the colon: if it opens a flow sequence (`[…]`) whose parsed shape is a
2921/// nested sequence (a list whose items are themselves lists — the wiki-link-list
2922/// mis-encoding), it is flagged. A key with no inline value (the block form,
2923/// whose items live on continuation lines) is never inspected here.
2924///
2925/// Parsing the inline value (rather than a literal `starts_with("[[[")` text
2926/// test) is what catches the whitespace variant `attendees: [ [[a]] ]`, which
2927/// encodes the identical nested sequence but evaded the old prefix match.
2928fn detect_flow_form_link_lists(fm_yaml: &str) -> Vec<String> {
2929    let mut out = Vec::new();
2930    for line in fm_yaml.lines() {
2931        // Top-level key lines only (no indentation, not a comment or list dash).
2932        if line.starts_with(' ') || line.starts_with('\t') {
2933            continue;
2934        }
2935        let Some((key, rest)) = line.split_once(':') else {
2936            continue;
2937        };
2938        let key = key.trim();
2939        if key.is_empty()
2940            || key.starts_with('#')
2941            || key.starts_with('-')
2942            || !key
2943                .chars()
2944                .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
2945        {
2946            continue;
2947        }
2948        let rest = rest.trim();
2949        // Only an inline flow sequence (`[…]`) on the key line is a candidate;
2950        // the unquoted block form has an empty inline value and is never flagged.
2951        if !rest.starts_with('[') {
2952            continue;
2953        }
2954        // Parse just the inline value and test its shape: a list whose items are
2955        // themselves lists is the wiki-link-list mis-encoding (`[[[a]]]` parses
2956        // to `Seq[Seq[Seq[String]]]`; the scalar inline link `[[a]]` is only
2957        // `Seq[Seq[String]]` and is NOT flagged).
2958        if let Ok(Value::Sequence(items)) = serde_norway::from_str::<Value>(rest) {
2959            let nested = items.iter().any(|item| match item {
2960                Value::Sequence(inner) => inner.iter().any(|x| matches!(x, Value::Sequence(_))),
2961                _ => false,
2962            });
2963            if nested {
2964                out.push(key.to_string());
2965            }
2966        }
2967    }
2968    out
2969}
2970
2971/// True if a bare target (no `.md`) is a full store-relative path: it contains a
2972/// `/` and its first segment is a known layer.
2973fn is_full_store_path(bare: &str) -> bool {
2974    let mut parts = bare.splitn(2, '/');
2975    let first = parts.next().unwrap_or("");
2976    let has_rest = parts.next().map(|r| !r.is_empty()).unwrap_or(false);
2977    matches!(first, "sources" | "records") && has_rest
2978}
2979
2980/// True if a path contains only normal relative components. Validator inputs
2981/// come from user-authored markdown/JSON sidecars; never let absolute paths,
2982/// platform prefixes, or `..` turn a validation probe into a filesystem escape.
2983fn is_safe_store_relative_path(path: &Path) -> bool {
2984    let mut saw_component = false;
2985    for component in path.components() {
2986        match component {
2987            Component::Normal(_) => saw_component = true,
2988            Component::CurDir => {}
2989            Component::ParentDir | Component::RootDir | Component::Prefix(_) => return false,
2990        }
2991    }
2992    saw_component
2993}
2994
2995fn safe_md_target_rel(bare: &str) -> Option<PathBuf> {
2996    let path = Path::new(bare);
2997    if !is_safe_store_relative_path(path) {
2998        return None;
2999    }
3000    Some(PathBuf::from(format!("{bare}.md")))
3001}
3002
3003/// How a wiki-link / index-entry target resolves on disk.
3004enum TargetResolution {
3005    /// The target exists (either as the literal path or with a `.md` suffix).
3006    Exists,
3007    /// The target is a safe store-relative path but no file exists for it.
3008    Missing,
3009    /// The target escapes the store (absolute, `..`, prefix) — never probe it.
3010    Unsafe,
3011}
3012
3013/// Resolve a bare wiki-link / index-entry target the way the graph engine does
3014/// ([`crate::graph`]'s `resolve_existing`): try the path **as written** first
3015/// (so a link to a raw non-`.md` source file kept verbatim under `sources/` —
3016/// `[[sources/emails/x.eml]]`, `[[sources/contracts/y.pdf]]` — resolves to the
3017/// real file), then the `.md`-appended path (the common case for content
3018/// pages). Without trying the literal path first, a legal link to a raw source
3019/// file is wrongly flagged `WIKI_LINK_BROKEN` even though `graph backlinks`
3020/// resolves it.
3021fn resolve_wiki_target(store: &Store, bare: &str) -> TargetResolution {
3022    // The literal path and the `.md`-appended path share the same safety check
3023    // (`safe_md_target_rel` only differs by appending `.md`), so an unsafe bare
3024    // target is unsafe in both forms.
3025    if !is_safe_store_relative_path(Path::new(bare)) {
3026        return TargetResolution::Unsafe;
3027    }
3028    match resolved_target_abs(store, bare) {
3029        Some(_) => TargetResolution::Exists,
3030        None => TargetResolution::Missing,
3031    }
3032}
3033
3034/// The absolute on-disk path a bare wiki-link / index-entry target resolves to,
3035/// trying the literal path first, then `.md`-appended — mirroring the graph
3036/// engine. `None` when neither exists, or when the bare target escapes the store
3037/// (callers that need to distinguish unsafe from merely-missing use
3038/// [`resolve_wiki_target`]).
3039fn resolved_target_abs(store: &Store, bare: &str) -> Option<PathBuf> {
3040    if !is_safe_store_relative_path(Path::new(bare)) {
3041        return None;
3042    }
3043    // The literal path, as written (e.g. an `.eml`/`.pdf` source file kept
3044    // verbatim under `sources/`).
3045    let literal = store.root.join(bare);
3046    if literal.is_file() {
3047        return Some(literal);
3048    }
3049    // The `.md`-appended path (a content page referenced without its extension).
3050    let with_md = store.root.join(format!("{bare}.md"));
3051    if with_md.is_file() {
3052        return Some(with_md);
3053    }
3054    None
3055}
3056
3057/// True if a bare target path is under `prefix` (both `.md`-stripped).
3058fn path_under_prefix(bare: &str, prefix: &str) -> bool {
3059    let prefix = prefix.trim_end_matches('/');
3060    bare == prefix || bare.starts_with(&format!("{prefix}/"))
3061}
3062
3063/// The type-folder for a store-relative content path: `<layer>/<type-folder>`
3064/// (the folder directly under the layer; date-shards roll up to it). `None` for
3065/// files directly in a layer folder or outside the two layers.
3066fn type_folder_of(rel: &Path) -> Option<PathBuf> {
3067    let comps: Vec<&str> = rel.iter().filter_map(|s| s.to_str()).collect();
3068    if comps.len() < 3 {
3069        return None; // need layer/type-folder/file at minimum
3070    }
3071    if !matches!(comps[0], "sources" | "records") {
3072        return None;
3073    }
3074    Some(PathBuf::from(comps[0]).join(comps[1]))
3075}
3076
3077/// The layer dir a *loose* content file sits directly in (`records`/`sources`):
3078/// exactly two path components, the first a known layer. `None` for a file
3079/// inside a type-folder or outside any layer. Counterpart to the index crate's
3080/// `loose_layer_of`, kept local so `validate` needs no index internals.
3081fn loose_layer_dir(rel: &Path) -> Option<PathBuf> {
3082    let comps: Vec<&str> = rel.iter().filter_map(|s| s.to_str()).collect();
3083    if comps.len() != 2 || !matches!(comps[0], "sources" | "records") {
3084        return None;
3085    }
3086    Some(PathBuf::from(comps[0]))
3087}
3088
3089/// **SWEEP.** Walk every `.md` content file under `sources/`/`records/`,
3090/// returning store-relative paths to be parsed in full. Skips hidden dirs and
3091/// the index twin (`index.jsonl`). Used only by `validate_all`; the working-set
3092/// incoming-linker scan rides the embedded-ripgrep `Store::find_links_to_any`
3093/// (a single presence-only pass), so the loop default never walks-and-*parses*
3094/// the whole content tree.
3095///
3096/// **`log/` is NOT pruned here.** Only the *root-level* `log/` rotation archive
3097/// is reserved (`Store::is_in_log_dir` checks only the first path component);
3098/// the walk roots are the two layers, so the root archive is already out of
3099/// scope. A `log`-named folder *inside* a layer (e.g. `records/log/` — a
3100/// decision log) is real content (see `is_content_file`), so pruning every
3101/// `name == "log"` made `--all` silently skip those files — reporting fewer
3102/// errors than the default working-set scope on the same store.
3103fn walk_content_files(root: &Path) -> Vec<PathBuf> {
3104    let mut out = Vec::new();
3105    for layer in ["sources", "records"] {
3106        let base = root.join(layer);
3107        if !base.is_dir() {
3108            continue;
3109        }
3110        for entry in walkdir::WalkDir::new(&base)
3111            // Follow symlinks, matching the loop-default `md_walker`
3112            // (store.rs `follow_links(true)`): a content file that is a symlink
3113            // into the store, or that lives in a symlinked-in type-folder, is
3114            // checked by `dbmd validate` (the loop default rides `Store::walk` /
3115            // `walk_all_md`, both following symlinks). Without this the `--all`
3116            // sweep silently SKIPPED such files, so the authoritative superset
3117            // reported FEWER issues than the loop scope on the same store —
3118            // inverting the `--all`-is-the-superset contract. walkdir's loop
3119            // detection drops a symlink cycle (yields an Err that `.flatten()`
3120            // discards), so this cannot hang.
3121            .follow_links(true)
3122            .into_iter()
3123            .filter_entry(|e| {
3124                let name = e.file_name().to_str().unwrap_or("");
3125                !name.starts_with('.')
3126            })
3127            .flatten()
3128        {
3129            if !entry.file_type().is_file() {
3130                continue;
3131            }
3132            let name = entry.file_name().to_str().unwrap_or("");
3133            if name.ends_with(".md") && name != "index.md" {
3134                if let Ok(rel) = entry.path().strip_prefix(root) {
3135                    out.push(rel.to_path_buf());
3136                }
3137            }
3138        }
3139    }
3140    out.sort();
3141    out
3142}
3143
3144/// Every `index.md` under the store (root + layers + type-folders), as
3145/// store-relative paths. Used to detect orphan indexes. Like
3146/// [`walk_content_files`], a `log`-named folder *inside* a layer is real content
3147/// and its `index.md` is not pruned (only the root-level `log/` archive is
3148/// reserved, and the walk roots are the two layers, so it is already
3149/// out of scope).
3150fn walk_index_files(root: &Path) -> Vec<PathBuf> {
3151    let mut out = Vec::new();
3152    if root.join("index.md").is_file() {
3153        out.push(PathBuf::from("index.md"));
3154    }
3155    for layer in ["sources", "records"] {
3156        let base = root.join(layer);
3157        if !base.is_dir() {
3158            continue;
3159        }
3160        for entry in walkdir::WalkDir::new(&base)
3161            // Follow symlinks, matching the loop-default `md_walker`
3162            // (store.rs `follow_links(true)`): a content file that is a symlink
3163            // into the store, or that lives in a symlinked-in type-folder, is
3164            // checked by `dbmd validate` (the loop default rides `Store::walk` /
3165            // `walk_all_md`, both following symlinks). Without this the `--all`
3166            // sweep silently SKIPPED such files, so the authoritative superset
3167            // reported FEWER issues than the loop scope on the same store —
3168            // inverting the `--all`-is-the-superset contract. walkdir's loop
3169            // detection drops a symlink cycle (yields an Err that `.flatten()`
3170            // discards), so this cannot hang.
3171            .follow_links(true)
3172            .into_iter()
3173            .filter_entry(|e| {
3174                let name = e.file_name().to_str().unwrap_or("");
3175                !name.starts_with('.')
3176            })
3177            .flatten()
3178        {
3179            if entry.file_type().is_file() && entry.file_name().to_str() == Some("index.md") {
3180                if let Ok(rel) = entry.path().strip_prefix(root) {
3181                    out.push(rel.to_path_buf());
3182                }
3183            }
3184        }
3185    }
3186    out.sort();
3187    out
3188}
3189
3190/// A parsed `index.md` entry line: the wiki-link target, the optional summary
3191/// text after the `—`, and the 1-based line number.
3192struct IndexEntry {
3193    target: String,
3194    summary_text: Option<String>,
3195    line: u32,
3196}
3197
3198/// Parse the `- [[<path>]] — <summary>` entry lines of an `index.md`. Stops at a
3199/// `## More` footer (those lines aren't file entries). Root/layer entries with a
3200/// `|display` segment and a `(N)` count are parsed too — the target is the bare
3201/// path, the summary text is whatever follows the em dash.
3202fn parse_index_entries(text: &str) -> Vec<IndexEntry> {
3203    let mut out = Vec::new();
3204    let mut in_more = false;
3205    for (idx, line) in text.lines().enumerate() {
3206        let trimmed = line.trim_start();
3207        if trimmed.starts_with("## More") {
3208            in_more = true;
3209            continue;
3210        }
3211        if in_more {
3212            continue;
3213        }
3214        if !trimmed.starts_with("- ") {
3215            continue;
3216        }
3217        // Find the first `[[...]]`.
3218        let Some(open) = trimmed.find("[[") else {
3219            continue;
3220        };
3221        let Some(close_rel) = trimmed[open + 2..].find("]]") else {
3222            continue;
3223        };
3224        let inner = &trimmed[open + 2..open + 2 + close_rel];
3225        let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
3226
3227        // Summary text: whatever follows the first em dash (`—`) or ` - `.
3228        let after = &trimmed[open + 2 + close_rel + 2..];
3229        let summary_text = extract_index_entry_summary(after);
3230
3231        out.push(IndexEntry {
3232            target,
3233            summary_text,
3234            line: (idx + 1) as u32,
3235        });
3236    }
3237    out
3238}
3239
3240/// Pull the summary portion out of the text trailing an index entry's
3241/// wiki-link: drop a leading `(N files)` count, then the `—`/`-` separator, then
3242/// strip a trailing `  ·  #tag` suffix **only when it is a genuine tag block**
3243/// (so a literal `·` inside the summary text is preserved, not mistaken for the
3244/// renderer's tag separator).
3245fn extract_index_entry_summary(after: &str) -> Option<String> {
3246    let mut s = after.trim();
3247    // Drop a leading "(N ...)" count segment, if present.
3248    if s.starts_with('(') {
3249        if let Some(close) = s.find(')') {
3250            s = s[close + 1..].trim_start();
3251        }
3252    }
3253    // Require an em dash or hyphen separator before the summary.
3254    let s = if let Some(rest) = s.strip_prefix('—') {
3255        rest.trim()
3256    } else if let Some(rest) = s.strip_prefix('-') {
3257        rest.trim()
3258    } else {
3259        return None;
3260    };
3261    if s.is_empty() {
3262        return None;
3263    }
3264    // Strip a trailing tag block — but ONLY when it matches the EXACT delimiter
3265    // the renderer emits: `  ·  #tag #tag` (a *double*-spaced middot, per
3266    // `crate::index::format_md_entry`'s `format!("  ·  {tags}")`), dropped when
3267    // the file has no tags. The previous code also accepted a *single*-spaced
3268    // ` · ` separator, which collided with a legal summary whose own text ends
3269    // in a single-spaced middot-plus-hashtag tail — e.g. a tagless file with
3270    // `summary: "Standup notes · #standup"`. The renderer round-trips that
3271    // summary verbatim (no tag block, since there are no tags), but the loose
3272    // strip mistook the ` · #standup` for the renderer's tag suffix, compared
3273    // `"Standup notes"` against the file's full summary, and emitted a spurious
3274    // `INDEX_SUMMARY_MISMATCH` that `dbmd index rebuild` could never fix
3275    // (rebuild regenerates the identical line). Matching the renderer's exact
3276    // double-spaced delimiter makes the comparison round-trip. `rsplit_once`
3277    // matches from the right so only the real trailing tag block is considered.
3278    let s = match s.rsplit_once("  ·  ") {
3279        Some((summary, tags)) if is_tag_suffix(tags) => summary.trim(),
3280        _ => s,
3281    };
3282    Some(s.to_string())
3283}
3284
3285/// True if `s` is a non-empty tag block: one or more whitespace-separated tokens
3286/// each starting with `#`, the exact shape the index renderer appends after the
3287/// `·` separator (`crate::index::format_md_entry`). Used to distinguish the
3288/// renderer's `  ·  #tag` suffix from a literal `·` inside the summary text.
3289fn is_tag_suffix(s: &str) -> bool {
3290    let mut any = false;
3291    for tok in s.split_whitespace() {
3292        if !tok.starts_with('#') || tok.len() < 2 {
3293            return false;
3294        }
3295        any = true;
3296    }
3297    any
3298}
3299
3300/// Parse a `log.md` entry header `## [YYYY-MM-DD HH:MM] <kind> | <object>`.
3301/// Returns `(timestamp, kind, object)`; `None` if the timestamp is unparseable
3302/// or the header isn't well-formed.
3303fn parse_log_header(line: &str) -> Option<(DateTime<FixedOffset>, String, Option<String>)> {
3304    let rest = line.strip_prefix("## [")?;
3305    let close = rest.find(']')?;
3306    let ts_str = &rest[..close];
3307    let tail = rest[close + 1..].trim();
3308
3309    // Parse `YYYY-MM-DD HH:MM` (the SPEC header form) as a naive local time and
3310    // attach a zero offset — the log header carries minute precision, no zone.
3311    let naive = NaiveDateTime::parse_from_str(ts_str.trim(), "%Y-%m-%d %H:%M").ok()?;
3312    let offset = FixedOffset::east_opt(0)?;
3313    let ts = naive.and_local_timezone(offset).single()?;
3314
3315    // kind | object
3316    let (kind, object) = match tail.split_once('|') {
3317        Some((k, o)) => {
3318            let o = o.trim();
3319            (
3320                k.trim().to_string(),
3321                if o.is_empty() {
3322                    None
3323                } else {
3324                    Some(o.to_string())
3325                },
3326            )
3327        }
3328        None => (tail.to_string(), None),
3329    };
3330    if kind.is_empty() {
3331        return None;
3332    }
3333    Some((ts, kind, object))
3334}
3335
3336/// Every log file that holds entries for the working-set scan: the active
3337/// `log.md` plus every `log/<YYYY-MM>.md` archive. [`Log::append`] rotates
3338/// strictly-prior-month entries into the archives, so the active file alone is
3339/// NOT the full timeline — both the last `validate` cutoff and a changed-but-
3340/// unvalidated object can live in an archive after a month rollover. Reading the
3341/// archives here keeps the working-set readers in sync with the rest of the log
3342/// layer (`Log::since`/`Log::tail`), which deliberately cross archives, and
3343/// prevents `dbmd validate` from silently skipping archived changed files. Reads
3344/// only log headers, never the content store, so the loop budget is preserved.
3345fn log_files_for_working_set(store: &Store) -> Vec<PathBuf> {
3346    let mut files = vec![store.root.join("log.md")];
3347    let archive_dir = store.root.join("log");
3348    if let Ok(entries) = std::fs::read_dir(&archive_dir) {
3349        let mut archives: Vec<PathBuf> = entries
3350            .flatten()
3351            .map(|e| e.path())
3352            .filter(|p| {
3353                p.is_file()
3354                    && p.file_name()
3355                        .and_then(|s| s.to_str())
3356                        .and_then(|n| n.strip_suffix(".md"))
3357                        .is_some_and(is_year_month_archive)
3358            })
3359            .collect();
3360        // Deterministic order (oldest month first); the callers fold across all
3361        // files so order doesn't affect the result, but a stable order keeps the
3362        // scan reproducible.
3363        archives.sort();
3364        files.extend(archives);
3365    }
3366    files
3367}
3368
3369/// True if `s` looks like a `YYYY-MM` archive stem (4 digits, `-`, 2 digits) —
3370/// the `log/<YYYY-MM>.md` naming the rotation in [`crate::log`] emits.
3371fn is_year_month_archive(s: &str) -> bool {
3372    let b = s.as_bytes();
3373    b.len() == 7
3374        && b[..4].iter().all(u8::is_ascii_digit)
3375        && b[4] == b'-'
3376        && b[5..7].iter().all(u8::is_ascii_digit)
3377}
3378
3379/// The timestamp of the most recent `validate` entry across the active `log.md`
3380/// **and** the `log/<YYYY-MM>.md` archives — the default working-set cutoff.
3381/// Reads only headers; never the whole store. Archive-aware so a `validate`
3382/// entry that rotated into an archive after a month rollover still anchors the
3383/// cutoff (without this, the cutoff silently resets to `None`).
3384fn last_validate_at(store: &Store) -> Option<DateTime<FixedOffset>> {
3385    let mut latest: Option<DateTime<FixedOffset>> = None;
3386    for file in log_files_for_working_set(store) {
3387        let Ok(text) = std::fs::read_to_string(&file) else {
3388            continue;
3389        };
3390        for line in text.lines() {
3391            if !line.starts_with("## [") {
3392                continue;
3393            }
3394            if let Some((ts, kind, _)) = parse_log_header(line) {
3395                if kind == "validate" {
3396                    latest = Some(match latest {
3397                        Some(p) if p >= ts => p,
3398                        _ => ts,
3399                    });
3400                }
3401            }
3402        }
3403    }
3404    latest
3405}
3406
3407/// The set of content objects changed since `cutoff`, read from log entries
3408/// whose kind mutates a file. When `cutoff` is `None`, every mutating entry
3409/// counts (no prior validate window). Returns store-relative `.md` paths.
3410///
3411/// Scans the active `log.md` **and** every `log/<YYYY-MM>.md` archive: after a
3412/// month rollover [`Log::append`] rotates prior-month entries out of the active
3413/// file, so an object changed-but-never-validated in a prior month lives only in
3414/// an archive. Reading the archives here is what keeps `dbmd validate` from
3415/// silently skipping those files. Reads only log headers, never the content
3416/// store.
3417fn changed_objects_since(
3418    store: &Store,
3419    cutoff: Option<DateTime<FixedOffset>>,
3420) -> BTreeSet<PathBuf> {
3421    let mut out = BTreeSet::new();
3422    for file in log_files_for_working_set(store) {
3423        let Ok(text) = std::fs::read_to_string(&file) else {
3424            continue;
3425        };
3426        for line in text.lines() {
3427            if !line.starts_with("## [") {
3428                continue;
3429            }
3430            let Some((ts, kind, object)) = parse_log_header(line) else {
3431                continue;
3432            };
3433            if let Some(c) = cutoff {
3434                if ts < c {
3435                    continue;
3436                }
3437            }
3438            if !matches!(
3439                kind.as_str(),
3440                "create" | "update" | "ingest" | "rename" | "delete" | "link"
3441            ) {
3442                continue;
3443            }
3444            if let Some(obj) = object {
3445                // The object slot is a store-relative path (or a wiki-link target).
3446                let bare = obj
3447                    .trim()
3448                    .trim_start_matches("[[")
3449                    .trim_end_matches("]]")
3450                    .split('|')
3451                    .next()
3452                    .unwrap_or("")
3453                    .trim()
3454                    .trim_end_matches(".md")
3455                    .to_string();
3456                if bare.is_empty() {
3457                    continue;
3458                }
3459                // Containment: the object slot is a log-header field that can
3460                // carry a `..`/absolute/prefix path (a hand-edited or
3461                // merge-malformed log line). Route it through the same safety gate
3462                // every other disk-touching validator path uses
3463                // (`safe_md_target_rel`, which `link_target_type` already applies)
3464                // so a `records/../../leaky` object cannot make
3465                // `validate_working_set` read + frontmatter-report on a file
3466                // OUTSIDE the store root. An unsafe object is dropped from the
3467                // changed set rather than probed.
3468                if let Some(rel) = safe_md_target_rel(&bare) {
3469                    out.insert(rel);
3470                }
3471            }
3472        }
3473    }
3474    out
3475}
3476
3477/// The result of the [`derived_from_ignored_type`] policy check: the
3478/// `derived_from` target that resolves to an ignored-type record, plus that
3479/// record's type. Carries exactly what both the validate finding and the
3480/// write-time warning need to render their message.
3481#[derive(Debug, Clone, PartialEq, Eq)]
3482pub struct DerivedFromIgnored {
3483    /// The `derived_from` wiki-link target as written (bare store-relative path,
3484    /// no `.md`).
3485    pub target: String,
3486    /// The resolved `type` of that target, which is present in
3487    /// `store.config.ignored_types`.
3488    pub target_type: String,
3489}
3490
3491/// **The single authoritative `### Ignored types` derivation check.** Decides
3492/// whether a conclusion record derives from an ignored-type record: the
3493/// `meta-type` must be `conclusion`, `### Ignored types` must be non-empty, and
3494/// some `derived_from` target must resolve to a record whose `type` is in
3495/// `ignored_types`. Returns the first such target (and its type), or `None`.
3496///
3497/// Both surfaces call this so the policy lives in exactly one place:
3498/// [`check_content_file`] (read side — `dbmd validate`) feeds it the
3499/// `derived_from` targets it scanned from the raw frontmatter, and the write
3500/// surface (`dbmd write`) feeds it the targets from the composed frontmatter.
3501/// The link *extraction* differs per surface (text-scan with line numbers vs.
3502/// the parsed `Frontmatter`); the *decision* — type gate, target-type
3503/// resolution, and `ignored_types` membership — does not.
3504pub fn derived_from_ignored_type<I, S>(
3505    store: &Store,
3506    meta_type: &str,
3507    derived_from_targets: I,
3508) -> Option<DerivedFromIgnored>
3509where
3510    I: IntoIterator<Item = S>,
3511    S: AsRef<str>,
3512{
3513    if meta_type != "conclusion" || store.config.ignored_types.is_empty() {
3514        return None;
3515    }
3516    for target in derived_from_targets {
3517        let target = target.as_ref();
3518        if let Some(target_type) = link_target_type(store, target) {
3519            if store.config.ignored_types.contains(&target_type) {
3520                return Some(DerivedFromIgnored {
3521                    target: target.to_string(),
3522                    target_type,
3523                });
3524            }
3525        }
3526    }
3527    None
3528}
3529
3530/// Resolve the `type` of a wiki-link target file (bare, no `.md`), or `None`.
3531fn link_target_type(store: &Store, target: &str) -> Option<String> {
3532    let bare = target.trim_end_matches(".md");
3533    let abs = store.root.join(safe_md_target_rel(bare)?);
3534    let text = std::fs::read_to_string(&abs).ok()?;
3535    let (yaml, _, _) = split_frontmatter(&text)?;
3536    let value: Value = serde_norway::from_str(&yaml).ok()?;
3537    if let Value::Mapping(m) = value {
3538        m.get(Value::String("type".into())).and_then(scalar_string)
3539    } else {
3540        None
3541    }
3542}
3543
3544// ── Shape validators ─────────────────────────────────────────────────────────
3545
3546/// True if a string is RFC3339 / ISO-8601 with a time + zone (the
3547/// `created`/`updated` contract: `2026-05-27T08:00:00-07:00`).
3548fn is_iso8601(s: &str) -> bool {
3549    DateTime::parse_from_rfc3339(s.trim()).is_ok()
3550}
3551
3552/// True if a string is an ISO-8601 *date* (`2026-05-27`) or a full RFC3339
3553/// datetime. Type-specific date fields (`expense.date`, `contact.last_touch`)
3554/// accept the date-only form per the SPEC's worked example.
3555fn is_iso8601_date_or_datetime(s: &str) -> bool {
3556    let s = s.trim();
3557    if DateTime::parse_from_rfc3339(s).is_ok() {
3558        return true;
3559    }
3560    chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_ok()
3561}
3562
3563/// True for `<local>@<domain>` with a non-empty local part and a dotted domain.
3564/// There must be exactly one `@`: a domain that still contains an `@` after the
3565/// split (the common double-`@` typo `sarah@@acme.com`, or `a@b@c.com`) is
3566/// rejected — without this the domain `@acme.com` passed every other check.
3567fn is_email(s: &str) -> bool {
3568    let s = s.trim();
3569    let Some((local, domain)) = s.split_once('@') else {
3570        return false;
3571    };
3572    !local.is_empty()
3573        && !domain.contains('@')
3574        && domain.contains('.')
3575        && !domain.starts_with('.')
3576        && !domain.ends_with('.')
3577        && !domain.contains(' ')
3578        && !local.contains(' ')
3579}
3580
3581/// True for a currency amount: an optional symbol or 3-letter ISO code, then a
3582/// plain decimal number with optional thousands separators and ≤ 2 decimals.
3583///
3584/// The numeric part is validated by hand (not `f64::parse`) so the non-numeric
3585/// floats `f64` accepts — `inf`, `-inf`, `NaN`, and `1e3`-style exponents — are
3586/// rejected, and the ≤ 2-decimal rule is actually enforced.
3587fn is_currency(s: &str) -> bool {
3588    let mut t = s.trim();
3589    // Strip a leading currency symbol …
3590    for sym in ["$", "€", "£", "¥"] {
3591        if let Some(rest) = t.strip_prefix(sym) {
3592            t = rest.trim_start();
3593            break;
3594        }
3595    }
3596    // … or a leading 3-letter ISO-4217-ish code (`USD 100`, `EUR 9.50`). The
3597    // code must be exactly three ASCII letters and separated from the number by
3598    // whitespace, so a bare `USD` with no amount still fails.
3599    if let Some((head, rest)) = t.split_once(char::is_whitespace) {
3600        if head.len() == 3 && head.chars().all(|c| c.is_ascii_alphabetic()) {
3601            t = rest.trim_start();
3602        }
3603    }
3604
3605    let cleaned: String = t.chars().filter(|c| *c != ',').collect();
3606    is_plain_amount(cleaned.trim())
3607}
3608
3609/// True for a bare decimal amount: optional sign, ≥ 1 digit, an optional
3610/// fractional part of 1–2 digits. No exponents, no `inf`/`NaN`, no empty string.
3611fn is_plain_amount(s: &str) -> bool {
3612    let digits = s.strip_prefix(['+', '-']).unwrap_or(s);
3613    let (int_part, frac_part) = match digits.split_once('.') {
3614        Some((i, f)) => (i, Some(f)),
3615        None => (digits, None),
3616    };
3617    if int_part.is_empty() || !int_part.bytes().all(|b| b.is_ascii_digit()) {
3618        return false;
3619    }
3620    match frac_part {
3621        None => true,
3622        Some(f) => (1..=2).contains(&f.len()) && f.bytes().all(|b| b.is_ascii_digit()),
3623    }
3624}
3625
3626/// True for an http(s) URL: a recognized scheme prefix with at least one
3627/// character after it. The length guard uses the *matched* scheme's own length,
3628/// so a single-character host on the shorter `http://` scheme (`http://x`, 8
3629/// bytes — e.g. an intranet/container hostname) is accepted; a bare scheme with
3630/// nothing after it (`http://`, `https://`) is rejected.
3631fn is_url(s: &str) -> bool {
3632    let s = s.trim();
3633    for scheme in ["http://", "https://"] {
3634        if let Some(rest) = s.strip_prefix(scheme) {
3635            return !rest.is_empty();
3636        }
3637    }
3638    false
3639}
3640
3641/// A short, deterministic suggestion for a `SCHEMA_SHAPE_MISMATCH`.
3642fn shape_suggestion(shape: Shape) -> String {
3643    match shape {
3644        Shape::String => "use a scalar string".into(),
3645        Shape::Int => "use an integer".into(),
3646        Shape::Bool => "use `true` or `false`".into(),
3647        Shape::Date => "use an ISO-8601 date, e.g. 2026-05-27".into(),
3648        Shape::Email => "use a `<local>@<domain>` address".into(),
3649        Shape::Currency => "use a numeric amount, e.g. 1234.56".into(),
3650        Shape::Url => "use an http(s) URL".into(),
3651    }
3652}
3653
3654/// Suggest a full-path rewrite for a short-form wiki-link. Without the layer we
3655/// can't know the folder, so the suggestion is generic but actionable.
3656fn short_form_suggestion(bare: &str) -> Option<String> {
3657    Some(format!(
3658        "use a full store-relative path, e.g. [[records/contacts/{}]]",
3659        slugish(bare)
3660    ))
3661}
3662
3663/// A filesystem-ish leaf for a plain string (lowercase, spaces → hyphens).
3664fn slugish(s: &str) -> String {
3665    s.trim()
3666        .to_lowercase()
3667        .chars()
3668        .map(|c| if c.is_whitespace() { '-' } else { c })
3669        .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '/' || *c == '_')
3670        .collect()
3671}
3672
3673/// Cross-file asset-manifest integrity (the `--all` sweep). Text-only: it never
3674/// hashes a byte or reads an asset file's contents — byte presence and hash
3675/// correctness are `dbmd assets verify`, not `validate`, so a fresh clone with
3676/// no restored bytes still passes. Cross-checks `assets.jsonl` against every
3677/// content file's `asset`/`assets` declarations.
3678fn check_assets(store: &Store, parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
3679    use crate::assets;
3680
3681    let manifest_rel = Path::new(assets::MANIFEST_FILE);
3682    let manifest_abs = store.root.join(assets::MANIFEST_FILE);
3683
3684    // Lenient manifest read: a malformed line is reported, not fatal.
3685    let mut manifest: BTreeMap<String, assets::AssetRecord> = BTreeMap::new();
3686    if let Ok(text) = std::fs::read_to_string(&manifest_abs) {
3687        for (i, line) in text.lines().enumerate() {
3688            if line.trim().is_empty() {
3689                continue;
3690            }
3691            match serde_json::from_str::<assets::AssetRecord>(line) {
3692                Ok(rec) => {
3693                    manifest.insert(rec.path.clone(), rec);
3694                }
3695                Err(e) => push(
3696                    issues,
3697                    Severity::Error,
3698                    codes::ASSET_MANIFEST_MALFORMED,
3699                    manifest_rel,
3700                    Some((i as u32) + 1),
3701                    None,
3702                    format!("invalid {} record: {e}", assets::MANIFEST_FILE),
3703                    Some("run `dbmd assets scan` to rebuild the manifest".to_string()),
3704                    vec![],
3705                ),
3706            }
3707        }
3708    }
3709
3710    // Per-wrapper declarations: every declared asset must be in the manifest and
3711    // must not point at a markdown content file.
3712    let mut declared: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
3713    for (rel, p) in parsed {
3714        let Some(map) = &p.fm else {
3715            continue;
3716        };
3717        for decl in assets::declarations_from_yaml_map(map) {
3718            let norm = match assets::normalize_asset_path(&decl.path) {
3719                Ok(n) => n,
3720                Err(_) => continue, // a bad declared path is surfaced by `scan`, not here
3721            };
3722            declared.insert(norm.clone());
3723            let is_md = Path::new(&norm)
3724                .extension()
3725                .and_then(|e| e.to_str())
3726                .map(|e| e.eq_ignore_ascii_case("md"))
3727                .unwrap_or(false);
3728            if is_md {
3729                push(
3730                    issues,
3731                    Severity::Warning,
3732                    codes::ASSET_PATH_IS_CONTENT,
3733                    rel,
3734                    None,
3735                    Some("asset".to_string()),
3736                    format!("asset path `{norm}` points at a markdown content file"),
3737                    Some("assets are raw binaries; reference a non-markdown path".to_string()),
3738                    vec![PathBuf::from(&norm)],
3739                );
3740            }
3741            if !manifest.contains_key(&norm) {
3742                push(
3743                    issues,
3744                    Severity::Error,
3745                    codes::ASSET_UNDECLARED,
3746                    rel,
3747                    None,
3748                    Some("asset".to_string()),
3749                    format!(
3750                        "references asset `{norm}` with no record in {}",
3751                        assets::MANIFEST_FILE
3752                    ),
3753                    Some("run `dbmd assets scan` to catalog it".to_string()),
3754                    vec![PathBuf::from(&norm)],
3755                );
3756            }
3757        }
3758    }
3759
3760    // Per-record: wrapper existence + orphan detection.
3761    for (path, rec) in &manifest {
3762        for w in &rec.wrappers {
3763            if !store.root.join(w).is_file() {
3764                push(
3765                    issues,
3766                    Severity::Error,
3767                    codes::ASSET_WRAPPER_BROKEN,
3768                    Path::new(path),
3769                    None,
3770                    None,
3771                    format!("manifest record for `{path}` names a missing wrapper `{w}`"),
3772                    Some("run `dbmd assets scan` to reconcile the manifest".to_string()),
3773                    vec![PathBuf::from(w)],
3774                );
3775            }
3776        }
3777        if !declared.contains(path) {
3778            push(
3779                issues,
3780                Severity::Warning,
3781                codes::ASSET_MANIFEST_ORPHAN,
3782                Path::new(path),
3783                None,
3784                None,
3785                format!(
3786                    "`{path}` is in {} but no wrapper references it",
3787                    assets::MANIFEST_FILE
3788                ),
3789                Some("run `dbmd assets scan` to drop the orphan, or add a wrapper".to_string()),
3790                vec![],
3791            );
3792        }
3793    }
3794}
3795
3796/// Push a fully-formed [`Issue`].
3797#[allow(clippy::too_many_arguments)]
3798fn push(
3799    issues: &mut Vec<Issue>,
3800    severity: Severity,
3801    code: &'static str,
3802    file: &Path,
3803    line: Option<u32>,
3804    key: Option<String>,
3805    message: String,
3806    suggestion: Option<String>,
3807    related: Vec<PathBuf>,
3808) {
3809    issues.push(Issue {
3810        severity,
3811        code,
3812        file: file.to_path_buf(),
3813        line,
3814        key,
3815        message,
3816        suggestion,
3817        related,
3818    });
3819}
3820
3821/// 1-based line of a top-level frontmatter key inside the YAML block, offset to
3822/// the file (the YAML starts at file line 2). `None` if not found.
3823fn fm_key_line(fm_yaml: &str, key: &str) -> Option<u32> {
3824    for (i, line) in fm_yaml.lines().enumerate() {
3825        let trimmed = line.trim_start();
3826        // A top-level key line: `key:` with no leading list dash.
3827        if let Some(rest) = trimmed.strip_prefix(key) {
3828            if rest.starts_with(':') && line.starts_with(key) {
3829                // +2: file line 1 is the opening `---`, YAML line 0 → file line 2.
3830                return Some((i as u32) + 2);
3831            }
3832        }
3833    }
3834    None
3835}
3836
3837/// The line a *field-absence* issue (a required key that is missing entirely)
3838/// anchors to: the key's line when present, else line `1` — the frontmatter
3839/// block's opening `---`. A missing key has no line of its own; anchoring it to
3840/// the block top gives the agent (and the `EXPECTED` golden) a stable, non-null
3841/// line to point at instead of an unhelpful `null`.
3842fn fm_key_line_or_top(fm_yaml: &str, key: &str) -> Option<u32> {
3843    fm_key_line(fm_yaml, key).or(Some(1))
3844}
3845
3846/// A stable sort order for issues: by file, then line, then code. Keeps `--json`
3847/// output deterministic across runs.
3848fn issue_order(a: &Issue, b: &Issue) -> std::cmp::Ordering {
3849    a.file
3850        .cmp(&b.file)
3851        .then(a.line.cmp(&b.line))
3852        .then(a.code.cmp(b.code))
3853        .then(a.key.cmp(&b.key))
3854}
3855
3856// ═════════════════════════════════════════════════════════════════════════════
3857//  Tests
3858// ═════════════════════════════════════════════════════════════════════════════
3859
3860#[cfg(test)]
3861mod tests {
3862    use super::*;
3863    use crate::parser::{Config, FieldSpec};
3864    use std::fs;
3865    use tempfile::TempDir;
3866
3867    #[test]
3868    fn split_frontmatter_tolerates_leading_bom() {
3869        // Regression (finding #19 cross-module): a UTF-8 BOM before the opening
3870        // fence must not make validate treat the file as frontmatter-less while
3871        // the catalog indexes it. Pre-fix `first.trim_end() != "---"` was true
3872        // for `\u{feff}---` and the function returned None.
3873        let text = "\u{feff}---\ntype: contact\nsummary: hi\n---\nbody\n";
3874        let parsed = split_frontmatter(text);
3875        assert!(
3876            parsed.is_some(),
3877            "a leading BOM must not hide frontmatter from validate"
3878        );
3879        let (yaml, body, close_line) = parsed.unwrap();
3880        assert_eq!(yaml, "type: contact\nsummary: hi\n");
3881        assert_eq!(body, "body");
3882        assert_eq!(close_line, 4, "BOM is inline on line 1, not a new line");
3883    }
3884
3885    /// A test store builder over a real tempdir. Every helper writes real files
3886    /// so the assertions exercise real behavior, not mocks.
3887    struct Fixture {
3888        dir: TempDir,
3889        config: Config,
3890    }
3891
3892    impl Fixture {
3893        /// A fresh store with a **valid** `DB.md` (the identity contract:
3894        /// `type: db-md` + `scope` + `owner`) and the two layer dirs. A valid
3895        /// DB.md keeps `check_db_md` silent so a "clean store" fixture is truly
3896        /// clean; tests that want a broken DB.md write their own via `write`.
3897        fn new() -> Self {
3898            let dir = TempDir::new().unwrap();
3899            fs::write(
3900                dir.path().join("DB.md"),
3901                "---\ntype: db-md\nscope: company\nowner: Test\n---\n",
3902            )
3903            .unwrap();
3904            for layer in ["sources", "records"] {
3905                fs::create_dir_all(dir.path().join(layer)).unwrap();
3906            }
3907            Fixture {
3908                dir,
3909                config: Config::default(),
3910            }
3911        }
3912
3913        /// A store with no `DB.md` marker.
3914        fn bare() -> Self {
3915            let dir = TempDir::new().unwrap();
3916            Fixture {
3917                dir,
3918                config: Config::default(),
3919            }
3920        }
3921
3922        /// Write a file at a store-relative path, creating parent dirs.
3923        fn write(&self, rel: &str, contents: &str) {
3924            let abs = self.dir.path().join(rel);
3925            fs::create_dir_all(abs.parent().unwrap()).unwrap();
3926            fs::write(abs, contents).unwrap();
3927        }
3928
3929        fn store(&self) -> Store {
3930            Store {
3931                root: self.dir.path().to_path_buf(),
3932                config: self.config.clone(),
3933            }
3934        }
3935
3936        fn store_all(&self) -> Vec<Issue> {
3937            validate_all(&self.store()).unwrap()
3938        }
3939
3940        /// Write the canonical `index.md` + `index.jsonl` at every level via the
3941        /// real builder ([`crate::index::Index::rebuild_all`]) — the same
3942        /// projection a `dbmd index rebuild` produces. Use this (rather than a
3943        /// hand-typed sidecar line) whenever a test asserts a *clean* store, so
3944        /// the sidecar carries the COMPLETE per-field projection and the fixture
3945        /// can't silently drift from what the index writer emits.
3946        fn rebuild_indexes(&self) {
3947            crate::index::Index::rebuild_all(&self.store()).unwrap();
3948        }
3949    }
3950
3951    /// True if any issue has this code.
3952    fn has(issues: &[Issue], code: &str) -> bool {
3953        issues.iter().any(|i| i.code == code)
3954    }
3955
3956    /// Count issues with a code.
3957    fn count(issues: &[Issue], code: &str) -> usize {
3958        issues.iter().filter(|i| i.code == code).count()
3959    }
3960
3961    /// The first issue with a code, or panic.
3962    fn find<'a>(issues: &'a [Issue], code: &str) -> &'a Issue {
3963        issues
3964            .iter()
3965            .find(|i| i.code == code)
3966            .unwrap_or_else(|| panic!("expected an issue with code {code}; got {issues:#?}"))
3967    }
3968
3969    /// A minimal valid `contact` body for reuse.
3970    fn valid_contact(summary: &str) -> String {
3971        format!(
3972            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{summary}\"\nname: A\n---\n\n# A\n"
3973        )
3974    }
3975
3976    // ── store marker ──────────────────────────────────────────────────────────
3977
3978    #[test]
3979    fn not_a_store_when_db_md_absent() {
3980        let fx = Fixture::bare();
3981        let issues = fx.store_all();
3982        assert_eq!(issues.len(), 1, "only NOT_A_STORE expected: {issues:#?}");
3983        assert_eq!(issues[0].code, codes::NOT_A_STORE);
3984        assert!(issues[0].is_error());
3985    }
3986
3987    #[test]
3988    fn working_set_also_reports_not_a_store() {
3989        let fx = Fixture::bare();
3990        let issues = validate_working_set(&fx.store(), None).unwrap();
3991        assert!(has(&issues, codes::NOT_A_STORE));
3992    }
3993
3994    #[test]
3995    fn clean_store_has_no_issues() {
3996        let fx = Fixture::new();
3997        fx.write("records/contacts/a.md", &valid_contact("A contact"));
3998        // Build the canonical indexes (complete per-field jsonl included) the
3999        // same way `dbmd index rebuild` does, so a freshly-rebuilt store is
4000        // proven clean across every projected field, not just summary/type.
4001        fx.rebuild_indexes();
4002        let issues = fx.store_all();
4003        assert!(
4004            issues.is_empty(),
4005            "expected a clean store, got: {issues:#?}"
4006        );
4007    }
4008
4009    // ── meta-type closed enum ─────────────────────────────────────────────────
4010
4011    /// Regression (adversarial review): a NON-SCALAR `meta-type` (a YAML list or
4012    /// mapping) must be rejected with `FM_BAD_META_TYPE`, not silently slip past
4013    /// the enum check (and then get reclassified as the default `fact`). Pre-fix
4014    /// the check was gated on `and_then(scalar_string)`, which returned `None`
4015    /// for a sequence/mapping and short-circuited the whole branch.
4016    #[test]
4017    fn meta_type_enum_is_closed_for_scalars_and_non_scalars() {
4018        let fx = Fixture::new();
4019        let body = |mt: &str| {
4020            format!(
4021                "---\ntype: profile\nmeta-type: {mt}\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n"
4022            )
4023        };
4024
4025        // Valid enum members + absent (default fact) → no FM_BAD_META_TYPE.
4026        for ok in ["fact", "operational", "conclusion"] {
4027            fx.write("records/profiles/ok.md", &body(ok));
4028            let issues = validate_working_set(&fx.store(), None).unwrap();
4029            assert!(
4030                !has(&issues, codes::FM_BAD_META_TYPE),
4031                "`meta-type: {ok}` must be accepted; got {issues:#?}"
4032            );
4033        }
4034        fx.write(
4035            "records/profiles/absent.md",
4036            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n",
4037        );
4038        assert!(
4039            !has(
4040                &validate_working_set(&fx.store(), None).unwrap(),
4041                codes::FM_BAD_META_TYPE
4042            ),
4043            "an absent meta-type is the default `fact` and must be accepted"
4044        );
4045
4046        // Scalar-but-wrong, AND non-scalar (list / mapping) → FM_BAD_META_TYPE.
4047        for bad in ["xyz", "Fact", "[fact, conclusion]", "{kind: conclusion}"] {
4048            let fx2 = Fixture::new();
4049            fx2.write("records/profiles/bad.md", &body(bad));
4050            let issues = validate_working_set(&fx2.store(), None).unwrap();
4051            assert!(
4052                has(&issues, codes::FM_BAD_META_TYPE),
4053                "`meta-type: {bad}` must be rejected with FM_BAD_META_TYPE; got {issues:#?}"
4054            );
4055        }
4056    }
4057
4058    // ── DB.md structure ───────────────────────────────────────────────────────
4059
4060    /// The `Fixture::new` DB.md is valid → no `DB_MD_*` issue. This pins the
4061    /// "valid identity file is silent" half (a bug that flagged a valid DB.md
4062    /// would fail here).
4063    #[test]
4064    fn valid_db_md_emits_no_structure_issue() {
4065        let fx = Fixture::new();
4066        let issues = fx.store_all();
4067        assert!(
4068            !has(&issues, codes::DB_MD_BAD_TYPE)
4069                && !has(&issues, codes::DB_MD_MISSING_FIELD)
4070                && !has(&issues, codes::DB_MD_UNKNOWN_SECTION),
4071            "a valid DB.md (type: db-md + scope + owner, recognized sections) is silent: {issues:#?}"
4072        );
4073    }
4074
4075    /// A DB.md whose `type:` isn't `db-md` → `DB_MD_BAD_TYPE`, keyed on `type`,
4076    /// anchored to the `type:` line (file line 2). Failing to read the type, or
4077    /// accepting a non-`db-md` type, breaks this.
4078    #[test]
4079    fn db_md_wrong_type_is_error() {
4080        let fx = Fixture::new();
4081        fx.write("DB.md", "---\ntype: notes\nscope: company\nowner: T\n---\n");
4082        let issues = fx.store_all();
4083        let i = find(&issues, codes::DB_MD_BAD_TYPE);
4084        assert!(i.is_error());
4085        assert_eq!(i.file, PathBuf::from("DB.md"));
4086        assert_eq!(i.key.as_deref(), Some("type"));
4087        assert_eq!(i.line, Some(2), "anchors to the `type:` line");
4088    }
4089
4090    /// A DB.md missing `scope` and `owner` → one `DB_MD_MISSING_FIELD` per
4091    /// absent field, each keyed on its field name, anchored to the block top.
4092    #[test]
4093    fn db_md_missing_scope_and_owner_each_report() {
4094        let fx = Fixture::new();
4095        fx.write("DB.md", "---\ntype: db-md\n---\n");
4096        let issues = fx.store_all();
4097        assert_eq!(
4098            count(&issues, codes::DB_MD_MISSING_FIELD),
4099            2,
4100            "both scope and owner absent → two issues: {issues:#?}"
4101        );
4102        let keys: BTreeSet<Option<String>> = issues
4103            .iter()
4104            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
4105            .map(|i| i.key.clone())
4106            .collect();
4107        assert_eq!(
4108            keys,
4109            BTreeSet::from([Some("scope".to_string()), Some("owner".to_string())]),
4110            "one issue keyed on each missing field"
4111        );
4112        for i in issues
4113            .iter()
4114            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
4115        {
4116            assert!(i.is_error());
4117            assert_eq!(i.line, Some(1), "absent field anchors to the block top");
4118        }
4119    }
4120
4121    /// A present-but-blank required field is still missing (`DB_MD_MISSING_FIELD`),
4122    /// anchored to its own line — guarding against an "is the key textually
4123    /// present?" shortcut that would miss `owner:` with an empty value.
4124    #[test]
4125    fn db_md_blank_required_field_is_missing() {
4126        let fx = Fixture::new();
4127        fx.write(
4128            "DB.md",
4129            "---\ntype: db-md\nscope: company\nowner: \"\"\n---\n",
4130        );
4131        let issues = fx.store_all();
4132        let i = find(&issues, codes::DB_MD_MISSING_FIELD);
4133        assert_eq!(i.key.as_deref(), Some("owner"));
4134        assert_eq!(
4135            i.line,
4136            Some(4),
4137            "a present-but-empty field anchors to its line"
4138        );
4139        assert!(
4140            count(&issues, codes::DB_MD_MISSING_FIELD) == 1,
4141            "scope is present and non-empty → only owner reported"
4142        );
4143    }
4144
4145    /// An unrecognized `##` section → `DB_MD_UNKNOWN_SECTION` (warning), anchored
4146    /// to the heading's file line; the three recognized sections stay silent.
4147    #[test]
4148    fn db_md_unknown_section_is_warning() {
4149        let fx = Fixture::new();
4150        fx.write(
4151            "DB.md",
4152            // line 1 `---`, 2 type, 3 scope, 4 owner, 5 `---`, 6 blank,
4153            // 7 `## Agent instructions`, 8 blank, 9 prose, 10 blank,
4154            // 11 `## Glossary`.
4155            "---\ntype: db-md\nscope: company\nowner: T\n---\n\n## Agent instructions\n\nbe good\n\n## Glossary\n\nterms\n",
4156        );
4157        let issues = fx.store_all();
4158        let i = find(&issues, codes::DB_MD_UNKNOWN_SECTION);
4159        assert!(!i.is_error(), "unknown section is a warning, not an error");
4160        assert_eq!(i.severity, Severity::Warning);
4161        assert_eq!(
4162            i.line,
4163            Some(11),
4164            "anchors to the `## Glossary` heading line"
4165        );
4166        assert!(
4167            i.message.contains("Glossary"),
4168            "the message names the offending section: {}",
4169            i.message
4170        );
4171        // The recognized `## Agent instructions` section did NOT fire.
4172        assert_eq!(
4173            count(&issues, codes::DB_MD_UNKNOWN_SECTION),
4174            1,
4175            "only the unrecognized section is flagged: {issues:#?}"
4176        );
4177    }
4178
4179    /// A DB.md with no frontmatter at all → `DB_MD_BAD_TYPE` plus both
4180    /// `DB_MD_MISSING_FIELD`s (no provable type, no provable fields).
4181    #[test]
4182    fn db_md_no_frontmatter_reports_type_and_both_fields() {
4183        let fx = Fixture::new();
4184        fx.write("DB.md", "# just a heading, no frontmatter\n");
4185        let issues = fx.store_all();
4186        assert!(has(&issues, codes::DB_MD_BAD_TYPE));
4187        assert_eq!(count(&issues, codes::DB_MD_MISSING_FIELD), 2);
4188    }
4189
4190    // ── frontmatter ─────────────────────────────────────────────────────────
4191
4192    #[test]
4193    fn missing_type_is_error() {
4194        let fx = Fixture::new();
4195        fx.write(
4196            "records/contacts/a.md",
4197            "---\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\n# A\n",
4198        );
4199        let issues = fx.store_all();
4200        assert!(has(&issues, codes::FM_MISSING_TYPE));
4201        assert!(find(&issues, codes::FM_MISSING_TYPE).is_error());
4202    }
4203
4204    #[test]
4205    fn missing_universal_timestamps_are_errors_on_content_files() {
4206        let fx = Fixture::new();
4207        fx.write(
4208            "records/contacts/a.md",
4209            "---\ntype: contact\nsummary: x\nname: A\n---\n\n# A\n",
4210        );
4211        let issues = fx.store_all();
4212
4213        let missing_created = find(&issues, codes::FM_MISSING_CREATED);
4214        assert_eq!(missing_created.key.as_deref(), Some("created"));
4215        assert!(missing_created.is_error());
4216
4217        let missing_updated = find(&issues, codes::FM_MISSING_UPDATED);
4218        assert_eq!(missing_updated.key.as_deref(), Some("updated"));
4219        assert!(missing_updated.is_error());
4220    }
4221
4222    #[test]
4223    fn meta_files_do_not_require_universal_timestamps() {
4224        let fx = Fixture::new();
4225        let issues = fx.store_all();
4226
4227        assert!(
4228            !has(&issues, codes::FM_MISSING_CREATED),
4229            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
4230        );
4231        assert!(
4232            !has(&issues, codes::FM_MISSING_UPDATED),
4233            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
4234        );
4235    }
4236
4237    #[test]
4238    fn content_file_with_no_frontmatter_block_reports_type_and_summary() {
4239        let fx = Fixture::new();
4240        fx.write(
4241            "records/profiles/a.md",
4242            "# Just a heading\n\nNo frontmatter here.\n",
4243        );
4244        let issues = fx.store_all();
4245        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
4246        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4247    }
4248
4249    #[test]
4250    fn content_file_with_empty_frontmatter_reports_type_and_summary() {
4251        let fx = Fixture::new();
4252        fx.write("records/profiles/a.md", "---\n---\n\nbody\n");
4253        let issues = fx.store_all();
4254        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
4255        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4256    }
4257
4258    #[test]
4259    fn malformed_yaml_is_error_and_suppresses_field_checks() {
4260        let fx = Fixture::new();
4261        // A tab inside a mapping value is invalid YAML.
4262        fx.write(
4263            "records/contacts/a.md",
4264            "---\ntype: contact\n  bad: : : :\n: : nope\n---\n\nbody\n",
4265        );
4266        let issues = fx.store_all();
4267        let issue = find(&issues, codes::FM_MALFORMED_YAML);
4268        assert!(issue.is_error());
4269        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4270        // When YAML doesn't parse we don't *also* claim the summary is missing;
4271        // the agent fixes the YAML first.
4272        assert!(
4273            !has(&issues, codes::SUMMARY_MISSING),
4274            "malformed YAML should suppress SUMMARY_MISSING: {issues:#?}"
4275        );
4276    }
4277
4278    #[test]
4279    fn bad_created_timestamp_is_error() {
4280        let fx = Fixture::new();
4281        fx.write(
4282            "records/contacts/a.md",
4283            "---\ntype: contact\ncreated: not-a-date\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
4284        );
4285        let issues = fx.store_all();
4286        let issue = find(&issues, codes::FM_BAD_TIMESTAMP);
4287        assert_eq!(issue.key.as_deref(), Some("created"));
4288        assert!(issue.is_error());
4289    }
4290
4291    #[test]
4292    fn date_only_created_is_rejected_but_type_date_field_accepted() {
4293        let fx = Fixture::new();
4294        // `created` must be a full RFC3339 datetime → a date-only value is bad.
4295        // `last_touch` is a type-specific date field → date-only is fine.
4296        fx.write(
4297            "records/contacts/a.md",
4298            "---\ntype: contact\ncreated: 2026-05-22\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\nlast_touch: 2026-05-22\n---\n\n# A\n",
4299        );
4300        let issues = fx.store_all();
4301        let created_issues: Vec<_> = issues
4302            .iter()
4303            .filter(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created"))
4304            .collect();
4305        assert_eq!(
4306            created_issues.len(),
4307            1,
4308            "date-only `created` must fail: {issues:#?}"
4309        );
4310        assert!(
4311            !issues.iter().any(
4312                |i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("last_touch")
4313            ),
4314            "date-only `last_touch` is valid: {issues:#?}"
4315        );
4316    }
4317
4318    // ── summary ─────────────────────────────────────────────────────────────
4319
4320    #[test]
4321    fn summary_missing_empty_multiline_toolong() {
4322        let fx = Fixture::new();
4323        fx.write(
4324            "records/profiles/missing.md",
4325            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\n---\n\nbody\n",
4326        );
4327        fx.write(
4328            "records/profiles/empty.md",
4329            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"   \"\n---\n\nbody\n",
4330        );
4331        let long = "x".repeat(201);
4332        fx.write(
4333            "records/profiles/long.md",
4334            &format!("---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{long}\"\n---\n\nbody\n"),
4335        );
4336        let issues = fx.store_all();
4337        assert!(has(&issues, codes::SUMMARY_MISSING));
4338        assert_eq!(
4339            find(&issues, codes::SUMMARY_MISSING).file,
4340            PathBuf::from("records/profiles/missing.md")
4341        );
4342        assert!(has(&issues, codes::SUMMARY_EMPTY));
4343        assert!(has(&issues, codes::SUMMARY_TOO_LONG));
4344        assert_eq!(
4345            find(&issues, codes::SUMMARY_TOO_LONG).severity,
4346            Severity::Warning
4347        );
4348    }
4349
4350    #[test]
4351    fn summary_multiline_via_yaml_block_scalar() {
4352        let fx = Fixture::new();
4353        // A literal block scalar produces a value with a newline.
4354        fx.write(
4355            "records/profiles/a.md",
4356            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: |\n  line one\n  line two\n---\n\nbody\n",
4357        );
4358        let issues = fx.store_all();
4359        assert!(has(&issues, codes::SUMMARY_MULTILINE), "{issues:#?}");
4360    }
4361
4362    #[test]
4363    fn summary_exactly_200_chars_is_ok() {
4364        let fx = Fixture::new();
4365        let s = "y".repeat(200);
4366        fx.write(
4367            "records/profiles/a.md",
4368            &format!("---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{s}\"\n---\n\nbody\n"),
4369        );
4370        let issues = fx.store_all();
4371        assert!(
4372            !has(&issues, codes::SUMMARY_TOO_LONG),
4373            "200 is the bound, inclusive: {issues:#?}"
4374        );
4375    }
4376
4377    #[test]
4378    fn meta_files_need_no_summary() {
4379        let fx = Fixture::new();
4380        // The root/layer/type indexes + log carry no summary and must not be
4381        // flagged. (A lone DB.md store with one contact and full indexes.)
4382        fx.write("records/contacts/a.md", &valid_contact("A contact"));
4383        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n# I\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4384        fx.write(
4385            "records/index.md",
4386            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4387        );
4388        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — A contact\n");
4389        fx.write(
4390            "records/contacts/index.jsonl",
4391            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"A contact\"}\n",
4392        );
4393        fx.write("log.md", "---\ntype: log\n---\n\n# Log\n");
4394        let issues = fx.store_all();
4395        assert!(!has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4396    }
4397
4398    // ── tags ────────────────────────────────────────────────────────────────
4399
4400    #[test]
4401    fn nested_tags_warns_flat_tags_ok() {
4402        let fx = Fixture::new();
4403        fx.write(
4404            "records/contacts/nested.md",
4405            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags:\n  - good\n  - [nested, list]\n---\n\n# A\n",
4406        );
4407        fx.write(
4408            "records/contacts/flat.md",
4409            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags: [customer, vip]\n---\n\n# A\n",
4410        );
4411        let issues = fx.store_all();
4412        let tag_issues: Vec<_> = issues
4413            .iter()
4414            .filter(|i| i.code == codes::TAGS_MALFORMED)
4415            .collect();
4416        assert_eq!(
4417            tag_issues.len(),
4418            1,
4419            "only the nested-tags file should warn: {issues:#?}"
4420        );
4421        assert_eq!(
4422            tag_issues[0].file,
4423            PathBuf::from("records/contacts/nested.md")
4424        );
4425        assert_eq!(tag_issues[0].severity, Severity::Warning);
4426    }
4427
4428    // ── wiki-links ────────────────────────────────────────────────────────────
4429
4430    #[test]
4431    fn short_form_wiki_link_is_error() {
4432        let fx = Fixture::new();
4433        let mut body = valid_contact("links to a short form");
4434        body.push_str("\nSee [[sarah-chen]] for details.\n");
4435        fx.write("records/contacts/a.md", &body);
4436        let issues = fx.store_all();
4437        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
4438        assert!(issue.is_error());
4439        assert!(issue.message.contains("sarah-chen"));
4440        // A short-form link must NOT also be reported broken — fix the form first.
4441        assert!(
4442            !issues
4443                .iter()
4444                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.message.contains("sarah-chen")),
4445            "short-form should suppress broken: {issues:#?}"
4446        );
4447    }
4448
4449    #[test]
4450    fn broken_full_path_wiki_link_is_error() {
4451        let fx = Fixture::new();
4452        let mut body = valid_contact("links to a missing file");
4453        body.push_str("\nSee [[records/contacts/ghost]].\n");
4454        fx.write("records/contacts/a.md", &body);
4455        let issues = fx.store_all();
4456        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
4457        assert!(issue.is_error());
4458        assert!(issue.message.contains("records/contacts/ghost"));
4459        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4460    }
4461
4462    #[test]
4463    fn traversal_full_path_wiki_link_is_rejected_before_probe() {
4464        let fx = Fixture::new();
4465        let mut body = valid_contact("links with traversal");
4466        body.push_str("\nSee [[records/contacts/../../ghost]].\n");
4467        fx.write("records/contacts/a.md", &body);
4468        let issues = fx.store_all();
4469        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
4470        assert!(issue.message.contains("not a safe store-relative path"));
4471        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4472    }
4473
4474    #[test]
4475    fn valid_full_path_wiki_link_passes() {
4476        let fx = Fixture::new();
4477        fx.write("records/contacts/target.md", &valid_contact("target"));
4478        let mut body = valid_contact("links to target");
4479        body.push_str("\nSee [[records/contacts/target]].\n");
4480        fx.write("records/contacts/a.md", &body);
4481        let issues = fx.store_all();
4482        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4483        assert!(!has(&issues, codes::WIKI_LINK_SHORT_FORM), "{issues:#?}");
4484    }
4485
4486    #[test]
4487    fn md_extension_wiki_link_warns_and_resolves() {
4488        let fx = Fixture::new();
4489        fx.write("records/contacts/target.md", &valid_contact("target"));
4490        let mut body = valid_contact("links with extension");
4491        body.push_str("\nSee [[records/contacts/target.md]].\n");
4492        fx.write("records/contacts/a.md", &body);
4493        let issues = fx.store_all();
4494        let issue = find(&issues, codes::WIKI_LINK_HAS_EXTENSION);
4495        assert_eq!(issue.severity, Severity::Warning);
4496        assert_eq!(
4497            issue.suggestion.as_deref(),
4498            Some("drop the extension: [[records/contacts/target]]")
4499        );
4500        // The target exists once `.md` is stripped → not broken.
4501        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4502    }
4503
4504    #[test]
4505    fn wiki_links_in_code_fences_are_ignored() {
4506        let fx = Fixture::new();
4507        let mut body = valid_contact("has a fenced example");
4508        body.push_str("\n```\n[[sarah-chen]]\n```\n");
4509        fx.write("records/contacts/a.md", &body);
4510        let issues = fx.store_all();
4511        assert!(
4512            !has(&issues, codes::WIKI_LINK_SHORT_FORM),
4513            "fenced wiki-links must be ignored: {issues:#?}"
4514        );
4515    }
4516
4517    #[test]
4518    fn flow_form_link_list_in_frontmatter_is_error() {
4519        let fx = Fixture::new();
4520        fx.write(
4521            "records/meetings/m.md",
4522            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees: [[[records/contacts/a]], [[records/contacts/b]]]\n---\n\n# M\n",
4523        );
4524        let issues = fx.store_all();
4525        let issue = find(&issues, codes::WIKI_LINK_FLOW_FORM_LIST);
4526        assert!(issue.is_error());
4527        assert_eq!(issue.key.as_deref(), Some("attendees"));
4528    }
4529
4530    #[test]
4531    fn block_form_link_list_in_frontmatter_is_not_flow_form() {
4532        let fx = Fixture::new();
4533        fx.write("records/contacts/a.md", &valid_contact("a"));
4534        fx.write("records/contacts/b.md", &valid_contact("b"));
4535        fx.write(
4536            "records/meetings/m.md",
4537            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees:\n  - [[records/contacts/a]]\n  - [[records/contacts/b]]\n---\n\n# M\n",
4538        );
4539        let issues = fx.store_all();
4540        assert!(
4541            !has(&issues, codes::WIKI_LINK_FLOW_FORM_LIST),
4542            "{issues:#?}"
4543        );
4544        // Block-form link targets are still integrity-checked (both exist here).
4545        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4546    }
4547
4548    #[test]
4549    fn frontmatter_short_form_link_field_is_error() {
4550        let fx = Fixture::new();
4551        // `related` is a *custom* (non-schema) wiki-link field, so it goes
4552        // through the generic doctrine path → a short form is WIKI_LINK_SHORT_FORM.
4553        fx.write(
4554            "records/synthesis/a.md",
4555            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: \"[[sarah-chen]]\"\n---\n\n# A\n",
4556        );
4557        let issues = fx.store_all();
4558        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
4559        assert!(issue.is_error());
4560        assert_eq!(issue.key.as_deref(), Some("related"));
4561    }
4562
4563    #[test]
4564    fn unquoted_frontmatter_link_is_recognized() {
4565        // An UNQUOTED `[[...]]` parses in YAML as a nested sequence, not a
4566        // string. The validator must still see it as a wiki-link (text-based
4567        // extraction). A short-form custom field must report SHORT_FORM, and a
4568        // full-path one with a missing target must report BROKEN.
4569        let fx = Fixture::new();
4570        fx.write(
4571            "records/synthesis/short.md",
4572            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[sarah-chen]]\n---\n\n# A\n",
4573        );
4574        fx.write(
4575            "records/synthesis/broken.md",
4576            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[records/contacts/ghost]]\n---\n\n# A\n",
4577        );
4578        let issues = fx.store_all();
4579        assert!(
4580            issues.iter().any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
4581                && i.file == Path::new("records/synthesis/short.md")
4582                && i.key.as_deref() == Some("related")),
4583            "unquoted short-form frontmatter link must be caught: {issues:#?}"
4584        );
4585        assert!(
4586            issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN
4587                && i.file == Path::new("records/synthesis/broken.md")),
4588            "unquoted full-path frontmatter link to a missing file must be caught: {issues:#?}"
4589        );
4590    }
4591
4592    #[test]
4593    fn short_form_in_declared_link_field_is_prefix_mismatch_not_double_reported() {
4594        // A short-form value in a *declared* link field (a `### contact` schema
4595        // with `company link to records/companies/`) is SCHEMA_LINK_PREFIX_MISMATCH
4596        // (the target isn't under the prefix), and must NOT also be reported as a
4597        // bare WIKI_LINK_SHORT_FORM — the schema path owns that field once.
4598        let mut fx = Fixture::new();
4599        fx.config.schemas.insert(
4600            "contact".into(),
4601            Schema {
4602                fields: vec![FieldSpec {
4603                    name: "company".into(),
4604                    link_prefix: Some(PathBuf::from("records/companies")),
4605                    ..Default::default()
4606                }],
4607                ..Default::default()
4608            },
4609        );
4610        fx.write(
4611            "records/contacts/a.md",
4612            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[northstar]]\"\n---\n\n# A\n",
4613        );
4614        let issues = fx.store_all();
4615        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
4616        assert_eq!(issue.key.as_deref(), Some("company"));
4617        // The same link must NOT also be double-reported via the generic path.
4618        assert!(
4619            !issues
4620                .iter()
4621                .any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
4622                    && i.key.as_deref() == Some("company")),
4623            "schema link fields are checked once, by the schema path: {issues:#?}"
4624        );
4625    }
4626
4627    #[test]
4628    fn schema_link_field_with_md_extension_still_warns() {
4629        let mut fx = Fixture::new();
4630        fx.config.schemas.insert(
4631            "contact".into(),
4632            Schema {
4633                fields: vec![FieldSpec {
4634                    name: "company".into(),
4635                    link_prefix: Some(PathBuf::from("records/companies")),
4636                    ..Default::default()
4637                }],
4638                ..Default::default()
4639            },
4640        );
4641        fx.write(
4642            "records/companies/acme.md",
4643            "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: Acme\nname: Acme\n---\n\n# Acme\n",
4644        );
4645        fx.write(
4646            "records/contacts/a.md",
4647            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[records/companies/acme.md]]\"\n---\n\n# A\n",
4648        );
4649        let issues = fx.store_all();
4650        let issue = issues
4651            .iter()
4652            .find(|i| {
4653                i.code == codes::WIKI_LINK_HAS_EXTENSION && i.key.as_deref() == Some("company")
4654            })
4655            .unwrap_or_else(|| panic!("schema link extension warning missing: {issues:#?}"));
4656        assert_eq!(issue.severity, Severity::Warning);
4657        assert!(
4658            !issues
4659                .iter()
4660                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.key.as_deref() == Some("company")),
4661            "extensionless existence check should still find acme.md: {issues:#?}"
4662        );
4663    }
4664
4665    // ── schema: explicit DB.md schema (required / shape / enum) ───────────────
4666
4667    #[test]
4668    fn explicit_schema_required_shape_enum() {
4669        let fx = {
4670            let mut fx = Fixture::new();
4671            // contact schema: name required, email required+email shape,
4672            // status enum: active|inactive
4673            let schema = Schema {
4674                fields: vec![
4675                    FieldSpec {
4676                        name: "name".into(),
4677                        required: true,
4678                        ..Default::default()
4679                    },
4680                    FieldSpec {
4681                        name: "email".into(),
4682                        required: true,
4683                        shape: Some(Shape::Email),
4684                        ..Default::default()
4685                    },
4686                    FieldSpec {
4687                        name: "status".into(),
4688                        enum_values: Some(vec!["active".into(), "inactive".into()]),
4689                        ..Default::default()
4690                    },
4691                ],
4692                ..Default::default()
4693            };
4694            fx.config.schemas.insert("contact".into(), schema);
4695            fx
4696        };
4697        fx.write(
4698            "records/contacts/a.md",
4699            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nemail: not-an-email\nstatus: archived\n---\n\n# A\n",
4700        );
4701        let issues = fx.store_all();
4702        // name absent → MISSING_REQUIRED
4703        assert!(
4704            issues
4705                .iter()
4706                .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
4707                    && i.key.as_deref() == Some("name")),
4708            "{issues:#?}"
4709        );
4710        // email malformed → SHAPE_MISMATCH
4711        assert!(
4712            issues.iter().any(
4713                |i| i.code == codes::SCHEMA_SHAPE_MISMATCH && i.key.as_deref() == Some("email")
4714            ),
4715            "{issues:#?}"
4716        );
4717        // status archived not in enum → ENUM_VIOLATION
4718        assert!(
4719            issues
4720                .iter()
4721                .any(|i| i.code == codes::SCHEMA_ENUM_VIOLATION
4722                    && i.key.as_deref() == Some("status")),
4723            "{issues:#?}"
4724        );
4725    }
4726
4727    #[test]
4728    fn schema_without_link_field_allows_plain_value() {
4729        // A `contact` schema with no `company` link field means a plain `company`
4730        // string is fine — schema enforcement is exactly what the store declares,
4731        // nothing implicit.
4732        let mut fx = Fixture::new();
4733        fx.config.schemas.insert(
4734            "contact".into(),
4735            Schema {
4736                fields: vec![FieldSpec {
4737                    name: "name".into(),
4738                    required: true,
4739                    ..Default::default()
4740                }],
4741                ..Default::default()
4742            },
4743        );
4744        fx.write(
4745            "records/contacts/a.md",
4746            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
4747        );
4748        let issues = fx.store_all();
4749        assert!(
4750            !has(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH),
4751            "no declared link field for `company` → a plain value is fine: {issues:#?}"
4752        );
4753    }
4754
4755    #[test]
4756    fn schema_link_field_plain_value_is_prefix_mismatch() {
4757        // The surviving link-enforcement path: a declared `link to <prefix>/`
4758        // field with a plain-string value is SCHEMA_LINK_PREFIX_MISMATCH.
4759        let mut fx = Fixture::new();
4760        fx.config.schemas.insert(
4761            "contact".into(),
4762            Schema {
4763                fields: vec![FieldSpec {
4764                    name: "company".into(),
4765                    link_prefix: Some(PathBuf::from("records/companies")),
4766                    ..Default::default()
4767                }],
4768                ..Default::default()
4769            },
4770        );
4771        fx.write(
4772            "records/contacts/a.md",
4773            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
4774        );
4775        let issues = fx.store_all();
4776        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
4777        assert_eq!(issue.key.as_deref(), Some("company"));
4778        assert!(issue
4779            .suggestion
4780            .as_deref()
4781            .unwrap()
4782            .contains("records/companies/"));
4783    }
4784
4785    #[test]
4786    fn schema_shape_int_and_url_and_currency() {
4787        let mut fx = Fixture::new();
4788        fx.config.schemas.insert(
4789            "widget".into(),
4790            Schema {
4791                fields: vec![
4792                    FieldSpec {
4793                        name: "qty".into(),
4794                        shape: Some(Shape::Int),
4795                        ..Default::default()
4796                    },
4797                    FieldSpec {
4798                        name: "site".into(),
4799                        shape: Some(Shape::Url),
4800                        ..Default::default()
4801                    },
4802                    FieldSpec {
4803                        name: "price".into(),
4804                        shape: Some(Shape::Currency),
4805                        ..Default::default()
4806                    },
4807                ],
4808                ..Default::default()
4809            },
4810        );
4811        // `USD 100` is the corpus-realistic shape (an `expense.currency`-style
4812        // ISO code + amount). It must pass — it used to spuriously fail.
4813        fx.write(
4814            "records/widgets/ok.md",
4815            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nqty: 5\nsite: https://example.com\nprice: \"USD 1,234.50\"\n---\n\n# ok\n",
4816        );
4817        // `free` is non-numeric; `inf`/`NaN`/3-decimal used to slip through
4818        // because the old impl leaned on `f64::parse`. `price: inf` here guards
4819        // the under-rejection half of the finding.
4820        fx.write(
4821            "records/widgets/bad.md",
4822            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nqty: five\nsite: ftp://nope\nprice: inf\n---\n\n# bad\n",
4823        );
4824        let issues = fx.store_all();
4825        let bad_shape: Vec<_> = issues
4826            .iter()
4827            .filter(|i| {
4828                i.code == codes::SCHEMA_SHAPE_MISMATCH
4829                    && i.file == Path::new("records/widgets/bad.md")
4830            })
4831            .map(|i| i.key.clone().unwrap_or_default())
4832            .collect();
4833        assert!(bad_shape.contains(&"qty".to_string()), "{issues:#?}");
4834        assert!(bad_shape.contains(&"site".to_string()), "{issues:#?}");
4835        assert!(
4836            bad_shape.contains(&"price".to_string()),
4837            "inf must be rejected as currency: {issues:#?}"
4838        );
4839        assert!(
4840            !issues.iter().any(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH
4841                && i.file == Path::new("records/widgets/ok.md")),
4842            "valid shapes (incl. `USD 1,234.50`) must not fire: {issues:#?}"
4843        );
4844    }
4845
4846    #[test]
4847    fn schema_shape_or_enum_field_with_non_scalar_value_is_shape_mismatch() {
4848        let mut fx = Fixture::new();
4849        fx.config.schemas.insert(
4850            "contact".into(),
4851            Schema {
4852                fields: vec![
4853                    FieldSpec {
4854                        name: "email".into(),
4855                        required: true,
4856                        shape: Some(Shape::Email),
4857                        ..Default::default()
4858                    },
4859                    FieldSpec {
4860                        name: "status".into(),
4861                        enum_values: Some(vec!["active".into(), "inactive".into()]),
4862                        ..Default::default()
4863                    },
4864                ],
4865                ..Default::default()
4866            },
4867        );
4868        // A required EMAIL field and an ENUM field, each holding a LIST. Both
4869        // used to slip through entirely (`scalar_string` → None → the shape and
4870        // enum bodies silently no-op); now they flag SCHEMA_SHAPE_MISMATCH.
4871        fx.write(
4872            "records/contacts/bad.md",
4873            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nemail:\n  - a@b.com\n  - c@d.com\nstatus:\n  - active\n---\n\n# bad\n",
4874        );
4875        let issues = fx.store_all();
4876        let mismatched: Vec<_> = issues
4877            .iter()
4878            .filter(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH)
4879            .map(|i| i.key.clone().unwrap_or_default())
4880            .collect();
4881        assert!(
4882            mismatched.contains(&"email".to_string()),
4883            "list-valued required email must flag: {issues:#?}"
4884        );
4885        assert!(
4886            mismatched.contains(&"status".to_string()),
4887            "list-valued enum must flag: {issues:#?}"
4888        );
4889    }
4890
4891    #[test]
4892    fn is_currency_accepts_codes_and_rejects_non_numeric() {
4893        // Symbols and 3-letter ISO codes both strip; plain numbers pass.
4894        for ok in [
4895            "100",
4896            "1234.56",
4897            "$1,234.50",
4898            "USD 100", // the finding's headline probe — used to be false
4899            "usd 100", // case-insensitive code
4900            "EUR 9.50",
4901            "£12",
4902            "¥1000",
4903            "-5.00", // signed amounts are real (refunds)
4904            "+5",
4905            "1,000,000",
4906        ] {
4907            assert!(is_currency(ok), "expected currency: {ok:?}");
4908        }
4909        // Non-numeric floats `f64::parse` would accept, and the > 2-decimal /
4910        // bare-code / exponent cases the docstring forbids.
4911        for bad in [
4912            "inf", "-inf", "infinity", "NaN", "nan",    // f64 accepts these; we must not
4913            "12.999", // 3 decimals
4914            "1.2345", // 4 decimals
4915            "USD",    // bare code, no amount
4916            "$",      // bare symbol
4917            "free", "", " ", "1e3",      // exponent form
4918            "1.",       // trailing dot, no fractional digits
4919            ".5",       // leading dot, no integer digits
4920            "1 000",    // space as separator is not a thousands separator
4921            "USDD 100", // 4-letter "code" must not strip
4922        ] {
4923            assert!(!is_currency(bad), "expected NOT currency: {bad:?}");
4924        }
4925    }
4926
4927    // ── policies ───────────────────────────────────────────────────────────
4928
4929    #[test]
4930    fn ignored_type_present_is_info() {
4931        let mut fx = Fixture::new();
4932        fx.config.ignored_types.push("temp".into());
4933        fx.write(
4934            "records/temps/x.md",
4935            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
4936        );
4937        let issues = fx.store_all();
4938        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_PRESENT);
4939        assert_eq!(issue.severity, Severity::Info);
4940        assert!(!issue.is_error());
4941        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4942    }
4943
4944    #[test]
4945    fn conclusion_record_derived_from_ignored_type_warns() {
4946        let mut fx = Fixture::new();
4947        fx.config.ignored_types.push("temp".into());
4948        fx.write(
4949            "records/temps/x.md",
4950            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
4951        );
4952        // The policy now gates on `meta-type: conclusion` (not the retired
4953        // `type: wiki-page`): a conclusion record that derives from an
4954        // ignored-type record warns.
4955        fx.write(
4956            "records/synthesis/t.md",
4957            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: derived\nderived_from: \"[[records/temps/x]]\"\n---\n\n# t\n",
4958        );
4959        let issues = fx.store_all();
4960        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_DERIVED);
4961        assert_eq!(issue.severity, Severity::Warning);
4962        assert_eq!(issue.key.as_deref(), Some("derived_from"));
4963        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4964    }
4965
4966    /// The shared `derived_from_ignored_type` entry point — the single
4967    /// policy-decision both `dbmd validate` (read) and `dbmd write` (write-time
4968    /// warning) now route through, so they cannot diverge. This pins its
4969    /// contract directly: the meta-type gate (now `meta-type: conclusion`, not
4970    /// the retired `type: wiki-page`), the empty-ignored-types gate, a positive
4971    /// match carrying the resolved target type, and a non-ignored target
4972    /// rejected.
4973    #[test]
4974    fn derived_from_ignored_type_is_the_shared_policy_decision() {
4975        let mut fx = Fixture::new();
4976        fx.config.ignored_types.push("secret".into());
4977        // An ignored-type record …
4978        fx.write(
4979            "records/secrets/s.md",
4980            "---\ntype: secret\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: hush\n---\n\n# s\n",
4981        );
4982        // … and a non-ignored record.
4983        fx.write(
4984            "records/contacts/c.md",
4985            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nname: C\n---\n\n# c\n",
4986        );
4987        let store = fx.store();
4988
4989        // Positive: a conclusion record deriving from the ignored-type record
4990        // matches, and the hit carries both the target (as written) and its
4991        // resolved type.
4992        let hit =
4993            derived_from_ignored_type(&store, "conclusion", std::iter::once("records/secrets/s"))
4994                .expect("conclusion → ignored-type record must match");
4995        assert_eq!(hit.target, "records/secrets/s");
4996        assert_eq!(hit.target_type, "secret");
4997
4998        // Meta-type gate: a non-`conclusion` meta-type never triggers, even with
4999        // the same ignored-type target.
5000        assert_eq!(
5001            derived_from_ignored_type(&store, "fact", std::iter::once("records/secrets/s")),
5002            None,
5003            "only conclusion derivation is policed"
5004        );
5005
5006        // Target gate: a conclusion deriving from a non-ignored record is fine.
5007        assert_eq!(
5008            derived_from_ignored_type(&store, "conclusion", std::iter::once("records/contacts/c")),
5009            None,
5010            "deriving from a non-ignored type is allowed"
5011        );
5012
5013        // First match wins across multiple targets (here the second is the hit).
5014        let hit = derived_from_ignored_type(
5015            &store,
5016            "conclusion",
5017            ["records/contacts/c", "records/secrets/s"],
5018        )
5019        .expect("a later ignored-type target must still be found");
5020        assert_eq!(hit.target, "records/secrets/s");
5021
5022        // Empty-policy gate: with no `### Ignored types`, nothing is policed.
5023        fx.config.ignored_types.clear();
5024        let store = fx.store();
5025        assert_eq!(
5026            derived_from_ignored_type(&store, "conclusion", std::iter::once("records/secrets/s")),
5027            None,
5028            "an empty ignored-types policy short-circuits"
5029        );
5030    }
5031
5032    // ── duplicates ───────────────────────────────────────────────────────────
5033
5034    #[test]
5035    fn dup_id_is_hard_error_with_related() {
5036        let fx = Fixture::new();
5037        fx.write(
5038            "records/contacts/a.md",
5039            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
5040        );
5041        fx.write(
5042            "records/contacts/b.md",
5043            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
5044        );
5045        let issues = fx.store_all();
5046        // Reporting rule #1: ONE issue per collision group, keyed on the
5047        // lexicographically smallest path (`a.md`), partner in `related`.
5048        assert_eq!(
5049            count(&issues, codes::DUP_ID),
5050            1,
5051            "one issue per group: {issues:#?}"
5052        );
5053        let a = issues.iter().find(|i| i.code == codes::DUP_ID).unwrap();
5054        assert_eq!(a.file, PathBuf::from("records/contacts/a.md"));
5055        assert!(a.is_error());
5056        assert_eq!(a.key.as_deref(), Some("id"));
5057        assert_eq!(
5058            a.line,
5059            Some(3),
5060            "anchors to the `id` line on the reported file"
5061        );
5062        assert_eq!(a.related, vec![PathBuf::from("records/contacts/b.md")]);
5063    }
5064
5065    #[test]
5066    fn dup_id_not_fired_in_working_set() {
5067        // DUP_* is an --all-only cross-file check; the working set must not run it.
5068        let fx = Fixture::new();
5069        fx.write(
5070            "records/contacts/a.md",
5071            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
5072        );
5073        fx.write(
5074            "records/contacts/b.md",
5075            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
5076        );
5077        // Log says both changed since epoch, so they're in the working set.
5078        fx.write(
5079            "log.md",
5080            "---\ntype: log\n---\n\n## [2026-05-22 10:00] create | records/contacts/a\nx\n\n## [2026-05-22 10:01] create | records/contacts/b\nx\n",
5081        );
5082        let issues = validate_working_set(&fx.store(), None).unwrap();
5083        assert!(
5084            !has(&issues, codes::DUP_ID),
5085            "DUP_ID is --all only: {issues:#?}"
5086        );
5087    }
5088
5089    #[test]
5090    fn dup_unique_key_single_field_is_warning() {
5091        let mut fx = Fixture::new();
5092        // contact declares `- unique: email`.
5093        fx.config.schemas.insert(
5094            "contact".into(),
5095            Schema {
5096                unique_keys: vec![vec!["email".into()]],
5097                ..Default::default()
5098            },
5099        );
5100        for (f, name) in [("a", "A"), ("b", "B")] {
5101            fx.write(
5102                &format!("records/contacts/{f}.md"),
5103                &format!("---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: s\nname: {name}\nemail: dup@x.com\n---\n\n# {name}\n"),
5104            );
5105        }
5106        let issues = fx.store_all();
5107        // One issue per group (rule #1), keyed on the smallest path, anchored to
5108        // the single `email` field.
5109        assert_eq!(count(&issues, codes::DUP_UNIQUE_KEY), 1);
5110        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
5111        assert_eq!(dup.severity, Severity::Warning);
5112        assert_eq!(dup.file, PathBuf::from("records/contacts/a.md"));
5113        assert_eq!(dup.key.as_deref(), Some("email"));
5114        assert_eq!(dup.related, vec![PathBuf::from("records/contacts/b.md")]);
5115    }
5116
5117    #[test]
5118    fn dup_unique_key_compound_and_clean_when_one_field_differs() {
5119        let mut fx = Fixture::new();
5120        // expense declares `- unique: date, amount, vendor` (a compound key).
5121        fx.config.schemas.insert(
5122            "expense".into(),
5123            Schema {
5124                unique_keys: vec![vec!["date".into(), "amount".into(), "vendor".into()]],
5125                ..Default::default()
5126            },
5127        );
5128        fx.write("records/companies/acme.md", "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: c\nname: Acme\n---\n# A\n");
5129        let exp = |f: &str, amount: &str| {
5130            format!(
5131            "---\ntype: expense\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: e\ndate: 2026-05-01\namount: {amount}\nvendor: \"[[records/companies/acme]]\"\n---\n\n# {f}\n"
5132        )
5133        };
5134        fx.write("records/expenses/e1.md", &exp("e1", "100"));
5135        fx.write("records/expenses/e2.md", &exp("e2", "100"));
5136        fx.write("records/expenses/e3.md", &exp("e3", "200")); // different amount
5137        let issues = fx.store_all();
5138        // One issue for the e1+e2 group (rule #1), keyed on the smallest path
5139        // (e1) with e2 in `related`; e3 differs on amount and never appears.
5140        assert_eq!(
5141            count(&issues, codes::DUP_UNIQUE_KEY),
5142            1,
5143            "only e1+e2 collide, one issue: {issues:#?}"
5144        );
5145        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
5146        assert_eq!(dup.file, PathBuf::from("records/expenses/e1.md"));
5147        assert_eq!(
5148            dup.line,
5149            Some(1),
5150            "compound-key collision anchors to line 1"
5151        );
5152        assert_eq!(dup.related, vec![PathBuf::from("records/expenses/e2.md")]);
5153        assert!(
5154            !issues.iter().any(|i| i.code == codes::DUP_UNIQUE_KEY
5155                && i.related.contains(&PathBuf::from("records/expenses/e3.md"))),
5156            "e3 differs on amount and must not collide: {issues:#?}"
5157        );
5158    }
5159
5160    #[test]
5161    fn dup_unique_key_list_field_is_order_independent() {
5162        let mut fx = Fixture::new();
5163        // meeting declares `- unique: date, attendees`; the list field is a set.
5164        fx.config.schemas.insert(
5165            "meeting".into(),
5166            Schema {
5167                unique_keys: vec![vec!["date".into(), "attendees".into()]],
5168                ..Default::default()
5169            },
5170        );
5171        fx.write("records/contacts/a.md", &valid_contact("a"));
5172        fx.write("records/contacts/b.md", &valid_contact("b"));
5173        let m = |f: &str, order: &str| {
5174            let attendees = if order == "ab" {
5175                "  - [[records/contacts/a]]\n  - [[records/contacts/b]]"
5176            } else {
5177                "  - [[records/contacts/b]]\n  - [[records/contacts/a]]"
5178            };
5179            format!(
5180                "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nattendees:\n{attendees}\n---\n\n# {f}\n"
5181            )
5182        };
5183        fx.write("records/meetings/m1.md", &m("m1", "ab"));
5184        fx.write("records/meetings/m2.md", &m("m2", "ba"));
5185        let issues = fx.store_all();
5186        // The attendee SET is order-independent, so m1 (ab) and m2 (ba) collide
5187        // → a single issue on the smaller path.
5188        assert_eq!(
5189            count(&issues, codes::DUP_UNIQUE_KEY),
5190            1,
5191            "same date + same attendee set (any order) collide as one issue: {issues:#?}"
5192        );
5193        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
5194        assert_eq!(dup.file, PathBuf::from("records/meetings/m1.md"));
5195        assert_eq!(dup.related, vec![PathBuf::from("records/meetings/m2.md")]);
5196    }
5197
5198    // ── indexes ───────────────────────────────────────────────────────────────
5199
5200    #[test]
5201    fn missing_indexes_at_all_three_levels() {
5202        let fx = Fixture::new();
5203        fx.write("records/contacts/a.md", &valid_contact("a"));
5204        let issues = fx.store_all();
5205        // root, layer (records), and type-folder (records/contacts) all missing.
5206        // The type-folder INDEX_MISSING is keyed on the FOLDER path (not its
5207        // would-be index.md), per the field convention `EXPECTED` pins.
5208        let missing_files: BTreeSet<PathBuf> = issues
5209            .iter()
5210            .filter(|i| i.code == codes::INDEX_MISSING)
5211            .map(|i| i.file.clone())
5212            .collect();
5213        assert!(
5214            missing_files.contains(&PathBuf::from("index.md")),
5215            "{issues:#?}"
5216        );
5217        assert!(
5218            missing_files.contains(&PathBuf::from("records/index.md")),
5219            "{issues:#?}"
5220        );
5221        assert!(
5222            missing_files.contains(&PathBuf::from("records/contacts")),
5223            "{issues:#?}"
5224        );
5225        // When the index.md is entirely absent we do NOT additionally fire
5226        // INDEX_JSONL_MISSING — one INDEX_MISSING covers the folder (rule #4).
5227        assert!(!has(&issues, codes::INDEX_JSONL_MISSING), "{issues:#?}");
5228    }
5229
5230    #[test]
5231    fn index_stale_entry_and_missing_entry() {
5232        let fx = Fixture::new();
5233        fx.write(
5234            "records/contacts/present.md",
5235            &valid_contact("present contact"),
5236        );
5237        // Indexes for the parents (root/layer) present so we isolate type-folder.
5238        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5239        fx.write(
5240            "records/index.md",
5241            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5242        );
5243        // Type-folder index lists a GHOST (stale) and omits `present` (missing).
5244        fx.write(
5245            "records/contacts/index.md",
5246            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/ghost]] — gone\n",
5247        );
5248        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/present.md\",\"type\":\"contact\",\"summary\":\"present contact\"}\n");
5249        let issues = fx.store_all();
5250        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
5251        assert!(stale.message.contains("ghost"));
5252        assert!(stale.is_error());
5253        let missing = find(&issues, codes::INDEX_MISSING_ENTRY);
5254        assert!(
5255            missing.message.contains("present.md"),
5256            "{}",
5257            missing.message
5258        );
5259    }
5260
5261    #[test]
5262    fn index_md_entry_with_traversal_path_is_stale_not_probe() {
5263        let fx = Fixture::new();
5264        fx.write("records/contacts/a.md", &valid_contact("a"));
5265        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5266        fx.write(
5267            "records/index.md",
5268            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5269        );
5270        fx.write(
5271            "records/contacts/index.md",
5272            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/../../ghost]] — unsafe\n",
5273        );
5274        fx.write(
5275            "records/contacts/index.jsonl",
5276            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5277        );
5278        let issues = fx.store_all();
5279        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
5280        assert!(stale.message.contains("not a safe store-relative path"));
5281    }
5282
5283    #[test]
5284    fn index_summary_mismatch() {
5285        let fx = Fixture::new();
5286        fx.write("records/contacts/a.md", &valid_contact("the real summary"));
5287        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5288        fx.write(
5289            "records/index.md",
5290            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5291        );
5292        fx.write(
5293            "records/contacts/index.md",
5294            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a STALE summary\n",
5295        );
5296        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"the real summary\"}\n");
5297        let issues = fx.store_all();
5298        let issue = find(&issues, codes::INDEX_SUMMARY_MISMATCH);
5299        assert!(issue.is_error());
5300        assert_eq!(issue.related, vec![PathBuf::from("records/contacts/a.md")]);
5301    }
5302
5303    #[test]
5304    fn index_summary_match_passes() {
5305        let fx = Fixture::new();
5306        fx.write("records/contacts/a.md", &valid_contact("matching summary"));
5307        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5308        fx.write(
5309            "records/index.md",
5310            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5311        );
5312        fx.write(
5313            "records/contacts/index.md",
5314            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — matching summary\n",
5315        );
5316        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"matching summary\"}\n");
5317        let issues = fx.store_all();
5318        assert!(!has(&issues, codes::INDEX_SUMMARY_MISMATCH), "{issues:#?}");
5319    }
5320
5321    #[test]
5322    fn index_entry_with_tag_suffix_matches_summary() {
5323        let fx = Fixture::new();
5324        fx.write("records/contacts/a.md", &valid_contact("clean summary"));
5325        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5326        fx.write(
5327            "records/index.md",
5328            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5329        );
5330        // Entry carries the renderer's `  ·  #tag` suffix (the EXACT double-spaced
5331        // delimiter `crate::index::format_md_entry` emits for a tagged file),
5332        // which must be stripped before comparing against the file's summary.
5333        fx.write(
5334            "records/contacts/index.md",
5335            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — clean summary  ·  #customer\n",
5336        );
5337        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"clean summary\"}\n");
5338        let issues = fx.store_all();
5339        assert!(
5340            !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
5341            "tag suffix should be stripped: {issues:#?}"
5342        );
5343    }
5344
5345    #[test]
5346    fn index_entry_single_spaced_middot_tail_is_part_of_summary() {
5347        // Regression (the finding): a tagless file whose `summary` legitimately
5348        // ends in a single-spaced ` · #word` tail round-trips through `index
5349        // rebuild` verbatim (the renderer appends NO `  ·  #tag` block, since the
5350        // file has no tags). The validator must NOT mistake that single-spaced
5351        // tail for the renderer's tag suffix, or it reports a spurious — and
5352        // unfixable — INDEX_SUMMARY_MISMATCH on a freshly rebuilt store.
5353        let fx = Fixture::new();
5354        fx.write(
5355            "records/contacts/a.md",
5356            &valid_contact("Standup notes · #standup"),
5357        );
5358        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5359        fx.write(
5360            "records/index.md",
5361            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5362        );
5363        fx.write(
5364            "records/contacts/index.md",
5365            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — Standup notes · #standup\n",
5366        );
5367        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"Standup notes · #standup\"}\n");
5368        let issues = fx.store_all();
5369        assert!(
5370            !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
5371            "a single-spaced middot tail is part of the summary, not a tag block: {issues:#?}"
5372        );
5373    }
5374
5375    #[test]
5376    fn index_jsonl_desync_missing_file_in_jsonl() {
5377        let fx = Fixture::new();
5378        fx.write("records/contacts/a.md", &valid_contact("a"));
5379        fx.write("records/contacts/b.md", &valid_contact("b"));
5380        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (2 files)\n");
5381        fx.write(
5382            "records/index.md",
5383            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5384        );
5385        fx.write(
5386            "records/contacts/index.md",
5387            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n- [[records/contacts/b]] — b\n",
5388        );
5389        // jsonl only lists `a` → `b` is a desync (the twin must be complete).
5390        fx.write(
5391            "records/contacts/index.jsonl",
5392            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5393        );
5394        let issues = fx.store_all();
5395        let desync = find(&issues, codes::INDEX_JSONL_DESYNC);
5396        assert!(desync.message.contains("b.md"), "{}", desync.message);
5397    }
5398
5399    #[test]
5400    fn index_jsonl_desync_record_points_at_missing_file() {
5401        let fx = Fixture::new();
5402        fx.write("records/contacts/a.md", &valid_contact("a"));
5403        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5404        fx.write(
5405            "records/index.md",
5406            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5407        );
5408        fx.write(
5409            "records/contacts/index.md",
5410            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
5411        );
5412        fx.write(
5413            "records/contacts/index.jsonl",
5414            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
5415        );
5416        let issues = fx.store_all();
5417        assert!(
5418            issues
5419                .iter()
5420                .any(|i| i.code == codes::INDEX_JSONL_DESYNC && i.message.contains("ghost.md")),
5421            "{issues:#?}"
5422        );
5423    }
5424
5425    #[test]
5426    fn index_jsonl_record_with_traversal_path_is_desync_not_probe() {
5427        let fx = Fixture::new();
5428        fx.write("records/contacts/a.md", &valid_contact("a"));
5429        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5430        fx.write(
5431            "records/index.md",
5432            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5433        );
5434        fx.write(
5435            "records/contacts/index.md",
5436            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
5437        );
5438        fx.write(
5439            "records/contacts/index.jsonl",
5440            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/../../ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
5441        );
5442        let issues = fx.store_all();
5443        assert!(
5444            issues.iter().any(|i| i.code == codes::INDEX_JSONL_DESYNC
5445                && i.message.contains("not a safe store-relative path")),
5446            "{issues:#?}"
5447        );
5448    }
5449
5450    #[test]
5451    fn index_jsonl_stale_summary() {
5452        let fx = Fixture::new();
5453        fx.write("records/contacts/a.md", &valid_contact("real summary"));
5454        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5455        fx.write(
5456            "records/index.md",
5457            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5458        );
5459        fx.write(
5460            "records/contacts/index.md",
5461            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — real summary\n",
5462        );
5463        // jsonl summary disagrees with the file frontmatter.
5464        fx.write(
5465            "records/contacts/index.jsonl",
5466            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"OUTDATED\"}\n",
5467        );
5468        let issues = fx.store_all();
5469        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5470        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
5471        assert!(stale.key.as_deref().unwrap().contains("summary"));
5472    }
5473
5474    /// The whole point of `INDEX_JSONL_STALE`: a sidecar field the query/search
5475    /// path actually reads (`email`, `domain`, the `(date,amount,vendor)` dedup
5476    /// tuple, `tags`, `updated`, `links`, `company` …) that disagrees with the
5477    /// `.md` is STALE — even when `summary` and `type` are perfectly correct.
5478    /// Pre-fix the validator only diffed summary+type, so a sidecar with a wrong
5479    /// `email` validated clean and answered `--where email=…` with a phantom
5480    /// value present in no file. This is the direct regression guard.
5481    #[test]
5482    fn index_jsonl_stale_queryable_field_email() {
5483        let fx = Fixture::new();
5484        let contact = "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"a contact\"\nname: A\nemail: real@correct.com\n---\n\n# A\n";
5485        fx.write("records/contacts/a.md", contact);
5486        // Start from the canonical, fully-correct sidecar set …
5487        fx.rebuild_indexes();
5488        let jsonl_path = fx.dir.path().join("records/contacts/index.jsonl");
5489        let good = fs::read_to_string(&jsonl_path).unwrap();
5490        // sanity: the canonical store is clean (no STALE on a fresh rebuild).
5491        assert!(
5492            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
5493            "freshly-rebuilt sidecar must not be stale"
5494        );
5495        // … then desync ONLY the email so it's the single differing field.
5496        assert!(
5497            good.contains("real@correct.com"),
5498            "sidecar projects email: {good}"
5499        );
5500        fx.write(
5501            "records/contacts/index.jsonl",
5502            &good.replace("real@correct.com", "STALE-WRONG@evil.com"),
5503        );
5504
5505        let issues = fx.store_all();
5506        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5507        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
5508        // The mismatch is reported precisely on `email`, and summary/type — which
5509        // still match — are NOT named.
5510        let key = stale.key.as_deref().unwrap();
5511        assert!(
5512            key.contains("email"),
5513            "expected `email` in stale key, got {key:?}"
5514        );
5515        assert!(!key.contains("summary"), "summary still matches: {key:?}");
5516        assert!(!key.contains("type"), "type still matches: {key:?}");
5517    }
5518
5519    /// Broaden the guard across the typed/list/timestamp projections at once:
5520    /// a wrong `tags`, `updated`, and a custom dedup field (`amount`) are each
5521    /// caught, with all three named in one issue.
5522    #[test]
5523    fn index_jsonl_stale_typed_and_list_fields() {
5524        let fx = Fixture::new();
5525        let expense = "---\ntype: expense\ncreated: 2026-05-20T08:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"office chairs\"\ntags: [furniture, q2]\namount: 1299\nvendor: Acme\ndate: 2026-05-20\n---\n\n# Expense\n";
5526        fx.write("records/expenses/e.md", expense);
5527        fx.rebuild_indexes();
5528        let jsonl_path = fx.dir.path().join("records/expenses/index.jsonl");
5529        let good = fs::read_to_string(&jsonl_path).unwrap();
5530        assert!(
5531            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
5532            "freshly-rebuilt sidecar must not be stale"
5533        );
5534        // Desync a list field (tags), a timestamp (updated), and a number (amount).
5535        let stale_line = good
5536            .replace("\"q2\"", "\"WRONG-TAG\"")
5537            .replace("2026-05-22T10:00:00-07:00", "2099-01-01T00:00:00-07:00")
5538            .replace("1299", "9999");
5539        fx.write("records/expenses/index.jsonl", &stale_line);
5540
5541        let issues = fx.store_all();
5542        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5543        let key = stale.key.as_deref().unwrap();
5544        for expected in ["amount", "tags", "updated"] {
5545            assert!(
5546                key.contains(expected),
5547                "expected `{expected}` in stale key, got {key:?}"
5548            );
5549        }
5550    }
5551
5552    #[test]
5553    fn index_orphan_in_noncanonical_folder() {
5554        let fx = Fixture::new();
5555        fx.write("records/contacts/a.md", &valid_contact("a"));
5556        // Build the canonical indexes so they aren't reported as orphans.
5557        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5558        fx.write(
5559            "records/index.md",
5560            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5561        );
5562        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
5563        fx.write(
5564            "records/contacts/index.jsonl",
5565            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5566        );
5567        // An index.md inside a sub-sub-folder (operator territory) is an orphan.
5568        fx.write(
5569            "records/contacts/subfolder/index.md",
5570            "---\ntype: index\nscope: type-folder\n---\n\n# stray\n",
5571        );
5572        let issues = fx.store_all();
5573        let orphan = find(&issues, codes::INDEX_ORPHAN);
5574        assert_eq!(orphan.severity, Severity::Warning);
5575        assert_eq!(
5576            orphan.file,
5577            PathBuf::from("records/contacts/subfolder/index.md")
5578        );
5579    }
5580
5581    #[test]
5582    fn index_wrong_scope() {
5583        let fx = Fixture::new();
5584        fx.write("records/contacts/a.md", &valid_contact("a"));
5585        // Root index declares the wrong scope.
5586        fx.write("index.md", "---\ntype: index\nscope: layer\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5587        fx.write(
5588            "records/index.md",
5589            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5590        );
5591        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
5592        fx.write(
5593            "records/contacts/index.jsonl",
5594            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5595        );
5596        let issues = fx.store_all();
5597        let issue = find(&issues, codes::INDEX_WRONG_SCOPE);
5598        assert_eq!(issue.severity, Severity::Warning);
5599        assert_eq!(issue.file, PathBuf::from("index.md"));
5600    }
5601
5602    #[test]
5603    fn capped_type_folder_index_does_not_flag_missing_entries() {
5604        // Over the 500-entry cap, omitted entries are expected, not an error.
5605        let fx = Fixture::new();
5606        for i in 0..501 {
5607            fx.write(
5608                &format!("records/contacts/c{i:04}.md"),
5609                &valid_contact(&format!("contact {i}")),
5610            );
5611        }
5612        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (501 files)\n");
5613        fx.write(
5614            "records/index.md",
5615            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5616        );
5617        // Type-folder index lists only ONE entry + a More footer.
5618        fx.write(
5619            "records/contacts/index.md",
5620            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/c0000]] — contact 0\n\n## More\n\nThis folder has 501 files.\n",
5621        );
5622        // jsonl must still be complete — write all 501 lines.
5623        let mut jsonl = String::new();
5624        for i in 0..501 {
5625            jsonl.push_str(&format!(
5626                "{{\"path\":\"records/contacts/c{i:04}.md\",\"type\":\"contact\",\"summary\":\"contact {i}\"}}\n"
5627            ));
5628        }
5629        fx.write("records/contacts/index.jsonl", &jsonl);
5630        let issues = fx.store_all();
5631        assert!(
5632            !has(&issues, codes::INDEX_MISSING_ENTRY),
5633            "over the cap, missing browse entries are expected: {issues:#?}"
5634        );
5635        // But the jsonl is complete → no desync.
5636        assert!(
5637            !has(&issues, codes::INDEX_JSONL_DESYNC),
5638            "{:#?}",
5639            issues
5640                .iter()
5641                .filter(|i| i.code == codes::INDEX_JSONL_DESYNC)
5642                .collect::<Vec<_>>()
5643        );
5644    }
5645
5646    // ── log ────────────────────────────────────────────────────────────────
5647
5648    #[test]
5649    fn log_bad_timestamp_unknown_kind_out_of_order() {
5650        let fx = Fixture::new();
5651        fx.write(
5652            "log.md",
5653            concat!(
5654                "---\ntype: log\n---\n\n# Log\n\n",
5655                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5656                "## [2026-05-27 09:00] update | records/contacts/b\nx\n\n", // out of order
5657                "## [2026-05-27 11:00] frobnicate | records/contacts/c\nx\n\n", // unknown kind
5658                "## [not-a-date] create | records/contacts/d\nx\n",         // bad timestamp
5659            ),
5660        );
5661        let issues = fx.store_all();
5662        assert!(has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
5663        assert_eq!(
5664            find(&issues, codes::LOG_OUT_OF_ORDER).severity,
5665            Severity::Warning
5666        );
5667        let unknown = find(&issues, codes::LOG_UNKNOWN_KIND);
5668        assert_eq!(unknown.severity, Severity::Warning);
5669        assert!(unknown.message.contains("frobnicate"));
5670        assert!(unknown
5671            .suggestion
5672            .as_deref()
5673            .is_some_and(|s| s.contains("create")));
5674        let bad = find(&issues, codes::LOG_BAD_TIMESTAMP);
5675        assert!(bad.is_error());
5676    }
5677
5678    #[test]
5679    fn log_validate_entry_without_object_is_well_formed() {
5680        let fx = Fixture::new();
5681        fx.write(
5682            "log.md",
5683            "---\ntype: log\n---\n\n## [2026-05-27 10:00] validate\nPASS\n",
5684        );
5685        let issues = fx.store_all();
5686        assert!(!has(&issues, codes::LOG_BAD_TIMESTAMP), "{issues:#?}");
5687        assert!(!has(&issues, codes::LOG_UNKNOWN_KIND), "{issues:#?}");
5688    }
5689
5690    #[test]
5691    fn log_in_order_is_clean() {
5692        let fx = Fixture::new();
5693        fx.write(
5694            "log.md",
5695            concat!(
5696                "---\ntype: log\n---\n\n",
5697                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5698                "## [2026-05-27 10:05] update | records/contacts/a\nx\n",
5699            ),
5700        );
5701        let issues = fx.store_all();
5702        assert!(!has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
5703    }
5704
5705    #[test]
5706    fn log_not_checked_in_working_set() {
5707        // log.md ordering is an --all-only check.
5708        let fx = Fixture::new();
5709        fx.write(
5710            "log.md",
5711            concat!(
5712                "---\ntype: log\n---\n\n",
5713                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5714                "## [2026-05-27 09:00] update | records/contacts/a\nx\n",
5715            ),
5716        );
5717        let issues = validate_working_set(&fx.store(), None).unwrap();
5718        assert!(
5719            !has(&issues, codes::LOG_OUT_OF_ORDER),
5720            "log ordering is --all only: {issues:#?}"
5721        );
5722    }
5723
5724    // ── working-set scoping ───────────────────────────────────────────────────
5725
5726    #[test]
5727    fn working_set_validates_only_changed_files() {
5728        let fx = Fixture::new();
5729        // `dirty` has a bad timestamp; `clean_but_unlogged` also does but is NOT
5730        // in the log → working set must skip it.
5731        fx.write(
5732            "records/contacts/dirty.md",
5733            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5734        );
5735        fx.write(
5736            "records/contacts/unlogged.md",
5737            "---\ntype: contact\ncreated: ALSO-BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5738        );
5739        fx.write(
5740            "log.md",
5741            "---\ntype: log\n---\n\n## [2026-05-22 10:00] update | records/contacts/dirty\nedited\n",
5742        );
5743        let issues = validate_working_set(&fx.store(), None).unwrap();
5744        assert!(
5745            issues.iter().any(|i| i.code == codes::FM_BAD_TIMESTAMP
5746                && i.file == Path::new("records/contacts/dirty.md")),
5747            "{issues:#?}"
5748        );
5749        assert!(
5750            !issues
5751                .iter()
5752                .any(|i| i.file == Path::new("records/contacts/unlogged.md")),
5753            "unlogged file must not be in the working set: {issues:#?}"
5754        );
5755    }
5756
5757    #[test]
5758    fn working_set_includes_incoming_linkers_to_changed_path() {
5759        let fx = Fixture::new();
5760        // `changed` was renamed/removed (logged). `linker` points at it with a
5761        // now-broken link and was NOT itself logged — but must be pulled in.
5762        fx.write(
5763            "records/profiles/linker.md",
5764            "---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: links to a removed page\n---\n\nSee [[records/contacts/changed]].\n",
5765        );
5766        // `changed.md` does NOT exist on disk (removed).
5767        fx.write(
5768            "log.md",
5769            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/changed\nremoved\n",
5770        );
5771        let issues = validate_working_set(&fx.store(), None).unwrap();
5772        assert!(
5773            issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN
5774                && i.file == Path::new("records/profiles/linker.md")),
5775            "incoming linker to a removed path must be validated: {issues:#?}"
5776        );
5777    }
5778
5779    #[test]
5780    fn working_set_respects_explicit_since_cutoff() {
5781        let fx = Fixture::new();
5782        fx.write(
5783            "records/contacts/old.md",
5784            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5785        );
5786        fx.write(
5787            "records/contacts/new.md",
5788            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5789        );
5790        fx.write(
5791            "log.md",
5792            concat!(
5793                "---\ntype: log\n---\n\n",
5794                "## [2026-05-20 10:00] update | records/contacts/old\nx\n\n",
5795                "## [2026-05-25 10:00] update | records/contacts/new\nx\n",
5796            ),
5797        );
5798        // Cutoff after `old` but before `new`.
5799        let since = DateTime::parse_from_rfc3339("2026-05-22T00:00:00+00:00").unwrap();
5800        let issues = validate_working_set(&fx.store(), Some(since)).unwrap();
5801        assert!(
5802            issues
5803                .iter()
5804                .any(|i| i.file == Path::new("records/contacts/new.md")),
5805            "{issues:#?}"
5806        );
5807        assert!(
5808            !issues
5809                .iter()
5810                .any(|i| i.file == Path::new("records/contacts/old.md")),
5811            "old change is before the cutoff: {issues:#?}"
5812        );
5813    }
5814
5815    #[test]
5816    fn working_set_default_since_is_last_validate_entry() {
5817        let fx = Fixture::new();
5818        // `before` changed before the last validate; `after` changed after.
5819        fx.write(
5820            "records/contacts/before.md",
5821            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5822        );
5823        fx.write(
5824            "records/contacts/after.md",
5825            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5826        );
5827        fx.write(
5828            "log.md",
5829            concat!(
5830                "---\ntype: log\n---\n\n",
5831                "## [2026-05-20 10:00] update | records/contacts/before\nx\n\n",
5832                "## [2026-05-21 10:00] validate\nPASS\n\n",
5833                "## [2026-05-22 10:00] update | records/contacts/after\nx\n",
5834            ),
5835        );
5836        let issues = validate_working_set(&fx.store(), None).unwrap();
5837        assert!(
5838            issues
5839                .iter()
5840                .any(|i| i.file == Path::new("records/contacts/after.md")),
5841            "{issues:#?}"
5842        );
5843        assert!(
5844            !issues
5845                .iter()
5846                .any(|i| i.file == Path::new("records/contacts/before.md")),
5847            "change before the last validate entry is outside the default window: {issues:#?}"
5848        );
5849    }
5850
5851    // ── ordering / determinism ────────────────────────────────────────────────
5852
5853    #[test]
5854    fn issues_are_sorted_by_file_then_line() {
5855        let fx = Fixture::new();
5856        fx.write("records/profiles/z.md", "---\ntype: profile\nmeta-type: conclusion\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
5857        fx.write("records/profiles/a.md", "---\ntype: profile\nmeta-type: conclusion\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
5858        let issues = fx.store_all();
5859        let files: Vec<&PathBuf> = issues.iter().map(|i| &i.file).collect();
5860        let mut sorted = files.clone();
5861        sorted.sort();
5862        assert_eq!(
5863            files, sorted,
5864            "issues must be emitted in a stable file order"
5865        );
5866    }
5867
5868    // ── boundaries: codes validate must NOT emit ──────────────────────────────
5869
5870    #[test]
5871    fn frozen_page_is_not_a_validate_error() {
5872        // POLICY_FROZEN_PAGE is a *write-time* refusal, never a validate finding.
5873        // A clean file listed in `### Frozen pages` must validate clean.
5874        let mut fx = Fixture::new();
5875        fx.config
5876            .frozen_pages
5877            .push(PathBuf::from("records/decisions/d.md"));
5878        fx.write(
5879            "records/decisions/d.md",
5880            "---\ntype: decision\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a finalized decision\n---\n\n# D\n",
5881        );
5882        let issues = fx.store_all();
5883        assert!(
5884            !has(&issues, codes::POLICY_FROZEN_PAGE),
5885            "frozen pages are enforced at write-time, not by validate: {issues:#?}"
5886        );
5887    }
5888
5889    #[test]
5890    fn wiki_link_ambiguous_is_never_emitted_under_full_path_doctrine() {
5891        // The full-path doctrine makes ambiguity impossible; the defensive code
5892        // must never fire on a normal store.
5893        let fx = Fixture::new();
5894        fx.write("records/contacts/sarah-chen.md", &valid_contact("sarah"));
5895        let mut body = valid_contact("links to sarah");
5896        body.push_str("\nSee [[records/contacts/sarah-chen]].\n");
5897        fx.write("records/contacts/p.md", &body);
5898        let issues = fx.store_all();
5899        assert!(!has(&issues, codes::WIKI_LINK_AMBIGUOUS), "{issues:#?}");
5900    }
5901
5902    // ── unknown-type / unknown-field passthrough ──────────────────────────────
5903
5904    #[test]
5905    fn unknown_type_passes_through() {
5906        // A custom type is ambient context: it has a `type`, so no
5907        // FM_MISSING_TYPE, and with no matching schema there are no schema
5908        // errors. Only the universal contract (summary, timestamps) applies.
5909        let fx = Fixture::new();
5910        fx.write(
5911            "records/proposals/x.md",
5912            "---\ntype: proposal\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a proposal\ncustom_field: anything\nbudget: 5000\n---\n\n# Proposal\n",
5913        );
5914        let issues = fx.store_all();
5915        assert!(!has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
5916        assert!(!has(&issues, codes::SCHEMA_MISSING_REQUIRED), "{issues:#?}");
5917        assert!(!has(&issues, codes::SCHEMA_SHAPE_MISMATCH), "{issues:#?}");
5918        // The unknown fields don't trip anything.
5919        assert!(
5920            !issues
5921                .iter()
5922                .any(|i| i.key.as_deref() == Some("custom_field")
5923                    || i.key.as_deref() == Some("budget")),
5924            "unknown fields are ambient context: {issues:#?}"
5925        );
5926    }
5927
5928    // ── find_links_to prefix-collision safety (working set) ───────────────────
5929
5930    #[test]
5931    fn incoming_linker_scan_does_not_prefix_match() {
5932        // A changed `records/contacts/sarah` must NOT pull in a file that only
5933        // links to `records/contacts/sarah-chen` (a longer path sharing a prefix).
5934        let fx = Fixture::new();
5935        fx.write(
5936            "records/profiles/only-sarah-chen.md",
5937            "---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
5938        );
5939        // The log says `records/contacts/sarah` (the shorter path) changed.
5940        fx.write(
5941            "log.md",
5942            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah\nremoved\n",
5943        );
5944        let issues = validate_working_set(&fx.store(), None).unwrap();
5945        assert!(
5946            !issues
5947                .iter()
5948                .any(|i| i.file == Path::new("records/profiles/only-sarah-chen.md")),
5949            "a prefix-sharing link must not pull a file into the working set: {issues:#?}"
5950        );
5951    }
5952
5953    #[test]
5954    fn working_set_does_not_flag_stale_catalog_index_as_wiki_link_broken() {
5955        // The working-set incoming-linker scan rides embedded-ripgrep
5956        // `Store::find_links_to`, which scans EVERY `.md` — so a type-folder
5957        // `index.md` listing a now-deleted target IS pulled into the working set.
5958        // But its entries are GENERATED catalog entries, not authored body links:
5959        // a dangling one is an `INDEX_STALE_ENTRY` ("run `dbmd index rebuild`"),
5960        // the job of `check_indexes` under `--all` — NOT a `WIKI_LINK_BROKEN`
5961        // ("create the target"), whose remedy would steer an agent to recreate
5962        // the very data it just deleted. The loop default must therefore NOT
5963        // body-link-check the derived catalog (index integrity is an O(store)
5964        // sweep concern, not an O(changed) loop concern). Adversarial review #11:
5965        // the prior behavior gave WIKI_LINK_BROKEN here while `--all` gave
5966        // INDEX_STALE_ENTRY for the identical condition — two codes, opposite
5967        // remedies, across the loop default vs the sweep.
5968        let fx = Fixture::new();
5969        // A catalog that still lists the deleted contact (a real, common stale
5970        // state after an out-of-band `delete`).
5971        fx.write(
5972            "records/contacts/index.md",
5973            "---\ntype: index\n---\n\n- [[records/contacts/sarah-chen]] — Sarah Chen\n",
5974        );
5975        // The log says `records/contacts/sarah-chen` was deleted.
5976        fx.write(
5977            "log.md",
5978            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n",
5979        );
5980        let issues = validate_working_set(&fx.store(), None).unwrap();
5981        assert!(
5982            !issues
5983                .iter()
5984                .any(|i| i.file == Path::new("records/contacts/index.md")
5985                    && i.code == codes::WIKI_LINK_BROKEN),
5986            "a stale catalog `index.md` entry must NOT be WIKI_LINK_BROKEN in the \
5987             working set (it is an INDEX_STALE_ENTRY under `--all`): {issues:#?}"
5988        );
5989    }
5990
5991    #[test]
5992    fn incoming_linker_scan_covers_the_whole_changed_set_in_one_pass() {
5993        // CONTRACT (the O(changed × store) fix): the working-set scan finds
5994        // incoming linkers for EVERY changed object, and does so via the single
5995        // batch pass `Store::find_links_to_any` — not one full store read per
5996        // changed object. This test pins the behavior that makes the single-pass
5997        // correct: with two DISTINCT deleted targets, the linker to EACH is pulled
5998        // into the working set and flagged. A regression that scanned for only the
5999        // first/last changed object, or that dropped the batch union, would leave
6000        // one of the two broken links unreported and fail here.
6001        let fx = Fixture::new();
6002        // Linker A → deleted target #1 (in the body).
6003        fx.write(
6004            "records/profiles/refers-sarah.md",
6005            "---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
6006        );
6007        // Linker B → deleted target #2 (in a typed frontmatter field — an edge the
6008        // sidecar `links` projection would miss, which is why this must be a
6009        // content scan, not a sidecar read).
6010        fx.write(
6011            "records/meetings/2026/05/kickoff.md",
6012            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\ncompany: \"[[records/companies/acme]]\"\n---\n\n# Kickoff\n",
6013        );
6014        // The log says BOTH targets were deleted in this window.
6015        fx.write(
6016            "log.md",
6017            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n\n## [2026-05-22 10:05] delete | records/companies/acme\nremoved\n",
6018        );
6019
6020        let issues = validate_working_set(&fx.store(), None).unwrap();
6021        assert!(
6022            issues
6023                .iter()
6024                .any(|i| i.file == Path::new("records/profiles/refers-sarah.md")
6025                    && i.code == codes::WIKI_LINK_BROKEN),
6026            "linker to the FIRST deleted target must be pulled in and flagged: {issues:#?}"
6027        );
6028        assert!(
6029            issues.iter().any(
6030                |i| i.file == Path::new("records/meetings/2026/05/kickoff.md")
6031                    && i.code == codes::WIKI_LINK_BROKEN
6032            ),
6033            "linker to the SECOND deleted target (typed-field edge) must also be \
6034             pulled in and flagged — proves the scan covers the whole changed set, \
6035             not just one object: {issues:#?}"
6036        );
6037    }
6038
6039    #[test]
6040    fn frontmatter_block_sequence_links_each_get_their_own_line() {
6041        // Each block-sequence wiki-link reports on its own source line.
6042        let fx = Fixture::new();
6043        // Neither target exists → two WIKI_LINK_BROKEN, on different lines.
6044        fx.write(
6045            "records/meetings/m.md",
6046            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nparticipants:\n  - [[records/contacts/ghost1]]\n  - [[records/contacts/ghost2]]\n---\n\n# M\n",
6047        );
6048        let issues = fx.store_all();
6049        let broken_lines: BTreeSet<Option<u32>> = issues
6050            .iter()
6051            .filter(|i| i.code == codes::WIKI_LINK_BROKEN)
6052            .map(|i| i.line)
6053            .collect();
6054        assert_eq!(
6055            broken_lines.len(),
6056            2,
6057            "two distinct broken-link lines: {issues:#?}"
6058        );
6059    }
6060
6061    // ── Regression: null / non-scalar created/updated ────────────────────────
6062
6063    #[test]
6064    fn null_created_is_missing_not_silently_passed() {
6065        // Regression: a present-but-`null` `created:` previously slipped past
6066        // both FM_MISSING_CREATED (only `!contains_key` was checked) and
6067        // FM_BAD_TIMESTAMP (`scalar_string(null)` is None → branch no-oped).
6068        let fx = Fixture::new();
6069        fx.write(
6070            "records/contacts/a.md",
6071            "---\ntype: contact\ncreated:\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
6072        );
6073        let issues = fx.store_all();
6074        assert!(
6075            has(&issues, codes::FM_MISSING_CREATED),
6076            "null `created:` must read as missing: {issues:#?}"
6077        );
6078    }
6079
6080    #[test]
6081    fn sequence_created_is_bad_timestamp() {
6082        // A non-scalar `created: [2026]` is not a timestamp string → FM_BAD_TIMESTAMP.
6083        let fx = Fixture::new();
6084        fx.write(
6085            "records/contacts/a.md",
6086            "---\ntype: contact\ncreated: [2026]\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
6087        );
6088        let issues = fx.store_all();
6089        assert!(
6090            issues
6091                .iter()
6092                .any(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created")),
6093            "a sequence `created:` must be FM_BAD_TIMESTAMP: {issues:#?}"
6094        );
6095    }
6096
6097    // ── Regression: schema required null / empty-collection ──────────────────
6098
6099    #[test]
6100    fn required_field_null_or_empty_collection_is_missing() {
6101        // Regression: a plain required field (no shape/enum) holding YAML null
6102        // (`name:`), an empty list (`name: []`), or an empty mapping (`name: {}`)
6103        // previously validated with 0 issues — `scalar_string` returned None and
6104        // `.unwrap_or(false)` treated the value as non-empty.
6105        for value in ["", " []", " {}"] {
6106            let mut fx = Fixture::new();
6107            fx.config.schemas.insert(
6108                "contact".into(),
6109                Schema {
6110                    fields: vec![FieldSpec {
6111                        name: "name".into(),
6112                        required: true,
6113                        ..Default::default()
6114                    }],
6115                    ..Default::default()
6116                },
6117            );
6118            fx.write(
6119                "records/contacts/a.md",
6120                &format!(
6121                    "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname:{value}\n---\n\n# A\n"
6122                ),
6123            );
6124            let issues = fx.store_all();
6125            assert!(
6126                issues
6127                    .iter()
6128                    .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
6129                        && i.key.as_deref() == Some("name")),
6130                "required `name:{value}` must be SCHEMA_MISSING_REQUIRED: {issues:#?}"
6131            );
6132        }
6133    }
6134
6135    // ── Regression: WIKI_LINK_BROKEN on raw source files ─────────────────────
6136
6137    #[test]
6138    fn wiki_link_to_raw_source_file_resolves() {
6139        // Regression: a body link to a raw `.eml`/`.pdf` source kept verbatim
6140        // under `sources/` was flagged WIKI_LINK_BROKEN because the existence
6141        // probe only ever stat'd `{bare}.md`. It must resolve the literal path.
6142        let fx = Fixture::new();
6143        fx.write("sources/emails/2026-05-22-elena.eml", "raw email bytes\n");
6144        fx.write(
6145            "records/contacts/a.md",
6146            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\nSee [[sources/emails/2026-05-22-elena.eml]] for context.\n",
6147        );
6148        let issues = fx.store_all();
6149        assert!(
6150            !issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN),
6151            "a link to an existing raw source file must not be broken: {issues:#?}"
6152        );
6153    }
6154
6155    // ── Regression: unreadable (non-UTF-8) content file ──────────────────────
6156
6157    #[test]
6158    fn non_utf8_content_file_is_reported() {
6159        // Regression: a content file with invalid UTF-8 bytes made
6160        // check_content_file return None silently, so the store passed with exit
6161        // 0. It must surface FM_UNREADABLE instead of passing vacuously.
6162        let fx = Fixture::new();
6163        let abs = fx.dir.path().join("records/notes/corrupt.md");
6164        fs::create_dir_all(abs.parent().unwrap()).unwrap();
6165        fs::write(&abs, [0xFF, 0xFE, 0x00, 0x01]).unwrap();
6166        let issues = validate_working_set(&fx.store(), None).unwrap();
6167        assert!(
6168            has(&issues, codes::FM_UNREADABLE),
6169            "an unreadable content file must be reported, not silently skipped: {issues:#?}"
6170        );
6171    }
6172
6173    // ── Regression: code-fence char/run tracking ─────────────────────────────
6174
6175    #[test]
6176    fn tilde_fence_containing_backtick_fence_does_not_invert() {
6177        // Regression: a `~~~` block legally contains ``` lines (documenting a
6178        // backtick fence); a naive toggle inverted `in_fence` and checked the
6179        // demo `[[fake]]` inside the code block as a live link. The link inside
6180        // BOTH fences must be skipped.
6181        let body = "~~~markdown\n```\n[[fake-link]]\n```\n~~~\n";
6182        let links = extract_wiki_links(body);
6183        assert!(
6184            links.is_empty(),
6185            "wiki-link inside a nested code fence must be skipped: {links:?}"
6186        );
6187    }
6188
6189    // ── Regression: --all skips in-layer `log/` folder ───────────────────────
6190
6191    #[test]
6192    fn all_sweep_visits_in_layer_log_folder() {
6193        // Regression: `validate --all` pruned every dir named `log`, so a real
6194        // content folder like `records/log/` was invisible to the full sweep —
6195        // reporting FEWER errors than the default scope. A frontmatter-less file
6196        // there must still surface FM_MISSING_TYPE under --all.
6197        let fx = Fixture::new();
6198        fx.write("records/log/2026-06-01-pricing.md", "no frontmatter here\n");
6199        let issues = fx.store_all();
6200        assert!(
6201            has(&issues, codes::FM_MISSING_TYPE),
6202            "--all must validate files under an in-layer `log/` folder: {issues:#?}"
6203        );
6204    }
6205
6206    // ── Regression: flow-form list with whitespace ───────────────────────────
6207
6208    #[test]
6209    fn flow_form_link_list_with_spaces_is_flagged() {
6210        // Regression: `attendees: [ [[a]] ]` parses to the same nested-sequence
6211        // mis-encoding as `[[[a]]]` but evaded the literal `starts_with("[[[")`
6212        // text test. The value-based detector must catch the whitespace variant.
6213        let keys = detect_flow_form_link_lists("attendees: [ [[records/contacts/elena]] ]\n");
6214        assert!(
6215            keys.iter().any(|k| k == "attendees"),
6216            "spaced flow-form list must be detected: {keys:?}"
6217        );
6218    }
6219
6220    // ── Regression: INDEX_SUMMARY_MISMATCH middot tail ───────────────────────
6221
6222    #[test]
6223    fn middot_hashtag_summary_tail_round_trips() {
6224        // Regression: a tagless summary that legitimately ends in a single-spaced
6225        // ` · #word` tail round-trips through the renderer verbatim, but the loose
6226        // ` · ` strip mistook it for the tag block and reported a spurious,
6227        // unfixable INDEX_SUMMARY_MISMATCH. The strip must use the renderer's
6228        // exact double-spaced `  ·  ` delimiter.
6229        assert_eq!(
6230            extract_index_entry_summary("— Standup notes · #standup").as_deref(),
6231            Some("Standup notes · #standup"),
6232            "a single-spaced middot tail is part of the summary, not a tag block"
6233        );
6234        // The renderer's real double-spaced tag suffix IS still stripped.
6235        assert_eq!(
6236            extract_index_entry_summary("— Renewal champion  ·  #renewal #acme").as_deref(),
6237            Some("Renewal champion"),
6238            "the renderer's double-spaced `  ·  #tag` suffix is stripped"
6239        );
6240    }
6241
6242    // ── Regression: shape Url / Email edge cases ─────────────────────────────
6243
6244    #[test]
6245    fn url_shape_accepts_short_http_and_rejects_bare_scheme() {
6246        assert!(is_url("http://x"), "an 8-char http URL is valid");
6247        assert!(is_url("https://x"), "a 9-char https URL is valid");
6248        assert!(!is_url("http://"), "a bare scheme with no host is rejected");
6249        assert!(!is_url("https://"), "a bare https scheme is rejected");
6250    }
6251
6252    #[test]
6253    fn email_shape_rejects_double_at() {
6254        assert!(!is_email("sarah@@acme.com"), "double-@ domain is rejected");
6255        assert!(!is_email("a@b@c.com"), "two @ signs are rejected");
6256        assert!(is_email("sarah@acme.com"), "a normal address still passes");
6257    }
6258
6259    // ── Regression: working-set vs --all agree on log.md links ───────────────
6260
6261    #[test]
6262    fn working_set_does_not_flag_log_md_body_links() {
6263        // Regression: the working-set incoming-linker scan runs root `log.md`
6264        // through the body wiki-link check, flagging a historical `[[deleted]]`
6265        // mention as WIKI_LINK_BROKEN — an error `--all` never reports and that
6266        // the append-only log can't have "fixed". The root meta files must be
6267        // excluded from the body link check, matching --all.
6268        let fx = Fixture::new();
6269        fx.write("records/contacts/a.md", &valid_contact("A"));
6270        fx.write(
6271            "log.md",
6272            "---\ntype: log\n---\n\n## [2026-06-01 10:00] delete | records/contacts/ghost\n\nRemoved [[records/contacts/ghost]] per cleanup.\n",
6273        );
6274        let issues = validate_working_set(&fx.store(), None).unwrap();
6275        assert!(
6276            !issues
6277                .iter()
6278                .any(|i| i.code == codes::WIKI_LINK_BROKEN
6279                    && i.file == std::path::Path::new("log.md")),
6280            "a broken wiki-link inside append-only log.md must not be flagged: {issues:#?}"
6281        );
6282    }
6283
6284    // ── Regression: DB.md schema field lint ──────────────────────────────────
6285
6286    #[test]
6287    fn schema_duplicate_field_name_is_flagged() {
6288        let mut fx = Fixture::new();
6289        fx.config.schemas.insert(
6290            "contact".into(),
6291            Schema {
6292                fields: vec![
6293                    FieldSpec {
6294                        name: "name".into(),
6295                        required: true,
6296                        ..Default::default()
6297                    },
6298                    FieldSpec {
6299                        name: "name".into(),
6300                        ..Default::default()
6301                    },
6302                ],
6303                ..Default::default()
6304            },
6305        );
6306        let issues = fx.store_all();
6307        assert!(
6308            issues
6309                .iter()
6310                .any(|i| i.code == codes::DB_MD_SCHEMA_FIELD && i.key.as_deref() == Some("name")),
6311            "a duplicate schema field name must be flagged: {issues:#?}"
6312        );
6313    }
6314
6315    #[test]
6316    fn schema_unknown_modifier_is_info() {
6317        let mut fx = Fixture::new();
6318        fx.config.schemas.insert(
6319            "contact".into(),
6320            Schema {
6321                fields: vec![FieldSpec {
6322                    name: "name".into(),
6323                    unknown_modifiers: vec!["requierd".into()],
6324                    ..Default::default()
6325                }],
6326                ..Default::default()
6327            },
6328        );
6329        let issues = fx.store_all();
6330        assert!(
6331            issues.iter().any(|i| i.code == codes::DB_MD_SCHEMA_FIELD
6332                && i.severity == Severity::Info
6333                && i.key.as_deref() == Some("name")),
6334            "an unrecognized schema modifier must surface as Info: {issues:#?}"
6335        );
6336    }
6337
6338    /// Every code in `mod codes` must appear as a row in SPEC.md § Validation —
6339    /// the SPEC table is the declared "complete vocabulary" an agent branches on,
6340    /// and the module doc-comment promises this code implements "exactly those
6341    /// codes — no more, no fewer." This guards against the code/SPEC drift where a
6342    /// new validation code is added to the engine but never documented.
6343    #[test]
6344    fn every_code_constant_is_documented_in_spec() {
6345        // Parse the canonical constant *values* straight out of this module's
6346        // source, so a future `pub const X: &str = "X";` is covered with no test
6347        // edit. Format is uniform: `    pub const NAME: &str = "VALUE";`.
6348        let this_src = include_str!("validate.rs");
6349        let mut codes_in_module: Vec<String> = Vec::new();
6350        let mut in_codes_mod = false;
6351        for line in this_src.lines() {
6352            let t = line.trim();
6353            if t.starts_with("pub mod codes") {
6354                in_codes_mod = true;
6355                continue;
6356            }
6357            // The `mod codes` block ends at its closing brace at column 0.
6358            if in_codes_mod && line == "}" {
6359                break;
6360            }
6361            if in_codes_mod {
6362                if let Some(rest) = t.strip_prefix("pub const ") {
6363                    // rest = `NAME: &str = "VALUE";`
6364                    let value = rest
6365                        .split_once('=')
6366                        .map(|(_, v)| v.trim())
6367                        .and_then(|v| v.strip_prefix('"'))
6368                        .and_then(|v| v.strip_suffix("\";"))
6369                        .unwrap_or_else(|| panic!("unparseable code constant line: {line:?}"));
6370                    codes_in_module.push(value.to_string());
6371                }
6372            }
6373        }
6374        assert!(
6375            codes_in_module.len() >= 36,
6376            "parsed only {} code constants from `mod codes`; the parser likely \
6377             broke against a source-format change",
6378            codes_in_module.len()
6379        );
6380
6381        // SPEC.md lives at the repo root, two levels up from this crate's manifest.
6382        let spec_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../../SPEC.md");
6383        let spec = fs::read_to_string(&spec_path)
6384            .unwrap_or_else(|e| panic!("cannot read {}: {e}", spec_path.display()));
6385
6386        // Each code must appear as a SPEC § Validation table cell: `` | `CODE` | ``.
6387        let missing: Vec<&String> = codes_in_module
6388            .iter()
6389            .filter(|code| !spec.contains(&format!("| `{code}` |")))
6390            .collect();
6391        assert!(
6392            missing.is_empty(),
6393            "validation codes emitted by the engine but absent from SPEC.md \
6394             § Validation (the declared complete vocabulary): {missing:?}"
6395        );
6396    }
6397
6398    // ── loose files (directly at a layer root, no type-folder) ───────────────
6399
6400    const LOOSE_ALICE: &str = "---\ntype: contact\nid: alice\ncreated: 2026-06-01T08:00:00-07:00\nupdated: 2026-06-01T08:00:00-07:00\nsummary: Alice\n---\nbody\n";
6401    const LOOSE_BOB: &str = "---\ntype: contact\nid: bob\ncreated: 2026-06-01T08:00:00-07:00\nupdated: 2026-06-01T08:00:00-07:00\nsummary: Bob loose\n---\nbody\n";
6402
6403    #[test]
6404    fn loose_file_catalogued_in_layer_jsonl_validates_clean() {
6405        let fx = Fixture::new();
6406        fx.write("records/contacts/alice.md", LOOSE_ALICE);
6407        fx.write("records/bob.md", LOOSE_BOB); // loose, directly under records/
6408        fx.rebuild_indexes();
6409        let issues = fx.store_all();
6410        assert!(
6411            issues.is_empty(),
6412            "a rebuilt store with a catalogued loose file must validate clean, got: {issues:?}"
6413        );
6414    }
6415
6416    #[test]
6417    fn loose_file_with_missing_layer_jsonl_is_index_jsonl_missing() {
6418        let fx = Fixture::new();
6419        fx.write("records/contacts/alice.md", LOOSE_ALICE);
6420        fx.write("records/bob.md", LOOSE_BOB);
6421        fx.rebuild_indexes();
6422        // Simulate the layer sidecar going missing (a hand-deletion / bad sync).
6423        fs::remove_file(fx.dir.path().join("records/index.jsonl")).unwrap();
6424        let issues = fx.store_all();
6425        assert!(
6426            has(&issues, codes::INDEX_JSONL_MISSING),
6427            "a loose file with no layer index.jsonl must raise INDEX_JSONL_MISSING, got: {issues:?}"
6428        );
6429    }
6430}