Skip to main content

dbmd_core/
validate.rs

1//! `validate` — the validation engine.
2//!
3//! The canonical issue-code vocabulary is **SPEC.md § Validation** (that table
4//! is the single source of truth). This module implements exactly those codes
5//! — no more, no fewer. If a code is added here it must be added to the SPEC
6//! table in the same change. The codes are exposed as the [`codes`] constants
7//! so call sites never spell a code as a bare string literal.
8//!
9//! **Two scopes.** [`validate_working_set`] is the loop default: content files
10//! changed since `since`, plus any file whose wiki-links target a changed path.
11//! The changed set and the per-file checks are O(changed); the incoming linkers
12//! are found by a *single* embedded-ripgrep pass over the store for the whole
13//! changed set at once ([`Store::find_links_to_any`], one scan — not a full read
14//! per changed object, and not the parse-the-tree walk `--all` does). On this
15//! changed-set path it never builds the global cross-file state.
16//!
17//! The **one** exception is the vacuous-pass guard: when the change log records
18//! no objects since the cutoff and no explicit `--since` was given (a fresh
19//! store, a missing/empty `log.md`, or external edits never logged), the default
20//! call falls back to a single per-file content sweep ([`Store::walk`]) so an
21//! externally edited or freshly copied store cannot pass validation vacuously.
22//! That fallback is O(store) by design; the O(changed) guarantee is about the
23//! normal post-write path, not this safety net.
24//!
25//! [`validate_all`] is the full SWEEP: it adds the checks that need the global
26//! cross-file state — entity-dedup `DUP_*`, every-index sync, and `log.md`
27//! ordering.
28//!
29//! ## Why this module is self-contained
30//!
31//! Validation does its own frontmatter split, YAML parse, wiki-link scan,
32//! log-header parse, and file walk here, reading only the two public,
33//! caller-populated fields of a [`Store`]: [`Store::root`] and
34//! [`Store::config`] — rather than routing through the sibling modules
35//! ([`crate::parser`], [`crate::store`], [`crate::log`], [`crate::index`]).
36//! Keeping the checks local lets the validator report precise, per-issue
37//! diagnostics (exact codes, file, and context) without coupling its output to
38//! incidental behavior of the shared readers; the public surface and the
39//! emitted issue vocabulary are the contract.
40
41use std::collections::{BTreeMap, BTreeSet, HashMap};
42use std::path::{Component, Path, PathBuf};
43
44use chrono::{DateTime, FixedOffset, NaiveDateTime};
45use serde_norway::Value;
46
47use crate::parser::{Schema, Shape};
48use crate::store::Store;
49
50/// Severity of a validation [`Issue`]. Any [`Severity::Error`] fails validation
51/// (non-zero exit); warnings and info do not.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum Severity {
54    /// Blocks: a hard violation of the format or doctrine.
55    Error,
56    /// A decision point the agent resolves at its discretion.
57    Warning,
58    /// Visibility only; never affects exit status.
59    Info,
60}
61
62/// A single structured validation finding. Agent-primary and machine-parseable
63/// via `--json`; `suggestion` is a deterministic remediation hint the agent
64/// applies without guessing.
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub struct Issue {
67    /// The severity; only [`Severity::Error`] fails validation.
68    pub severity: Severity,
69    /// The structured code, e.g. `"WIKI_LINK_SHORT_FORM"` — one of [`codes`].
70    pub code: &'static str,
71    /// The file the issue is about.
72    pub file: PathBuf,
73    /// The 1-based line, when applicable.
74    pub line: Option<u32>,
75    /// The frontmatter key, when the issue is about a specific field.
76    pub key: Option<String>,
77    /// A human-readable message.
78    pub message: String,
79    /// A deterministic remediation hint, when one exists.
80    pub suggestion: Option<String>,
81    /// Other files involved (e.g. the duplicate partner in a collision).
82    pub related: Vec<PathBuf>,
83}
84
85impl Issue {
86    /// True if this issue fails validation (i.e. its severity is
87    /// [`Severity::Error`]).
88    pub fn is_error(&self) -> bool {
89        matches!(self.severity, Severity::Error)
90    }
91}
92
93/// The canonical validation issue codes — one constant per row of the SPEC.md
94/// § Validation table. Call sites reference these instead of bare strings so
95/// the code and the SPEC table can never silently drift.
96pub mod codes {
97    /// path has no `DB.md`; not a db.md store.
98    pub const NOT_A_STORE: &str = "NOT_A_STORE";
99    /// the store's `DB.md` is not `type: db-md`.
100    pub const DB_MD_BAD_TYPE: &str = "DB_MD_BAD_TYPE";
101    /// the store's `DB.md` frontmatter lacks `scope` or `owner`.
102    pub const DB_MD_MISSING_FIELD: &str = "DB_MD_MISSING_FIELD";
103    /// `DB.md` has an `##` section other than the three recognized ones.
104    pub const DB_MD_UNKNOWN_SECTION: &str = "DB_MD_UNKNOWN_SECTION";
105    /// a `DB.md ## Schemas` field declaration is malformed (empty or duplicate
106    /// field name) or carries an unrecognized modifier.
107    pub const DB_MD_SCHEMA_FIELD: &str = "DB_MD_SCHEMA_FIELD";
108    /// content file has no `type:`.
109    pub const FM_MISSING_TYPE: &str = "FM_MISSING_TYPE";
110    /// content file has no `created:`.
111    pub const FM_MISSING_CREATED: &str = "FM_MISSING_CREATED";
112    /// content file has no `updated:`.
113    pub const FM_MISSING_UPDATED: &str = "FM_MISSING_UPDATED";
114    /// content file can't be read (not valid UTF-8, or an I/O error).
115    pub const FM_UNREADABLE: &str = "FM_UNREADABLE";
116    /// frontmatter block isn't valid YAML.
117    pub const FM_MALFORMED_YAML: &str = "FM_MALFORMED_YAML";
118    /// `created` or `updated` isn't ISO-8601.
119    pub const FM_BAD_TIMESTAMP: &str = "FM_BAD_TIMESTAMP";
120    /// content file has no `summary`.
121    pub const SUMMARY_MISSING: &str = "SUMMARY_MISSING";
122    /// `summary` present but empty.
123    pub const SUMMARY_EMPTY: &str = "SUMMARY_EMPTY";
124    /// `summary` contains newlines.
125    pub const SUMMARY_MULTILINE: &str = "SUMMARY_MULTILINE";
126    /// `summary` > 200 chars.
127    pub const SUMMARY_TOO_LONG: &str = "SUMMARY_TOO_LONG";
128    /// wiki-link target isn't a full store-relative path.
129    pub const WIKI_LINK_SHORT_FORM: &str = "WIKI_LINK_SHORT_FORM";
130    /// wiki-link target file doesn't exist.
131    pub const WIKI_LINK_BROKEN: &str = "WIKI_LINK_BROKEN";
132    /// wiki-link target matches multiple files (defensive).
133    pub const WIKI_LINK_AMBIGUOUS: &str = "WIKI_LINK_AMBIGUOUS";
134    /// wiki-link target carries a `.md` extension — drop it.
135    pub const WIKI_LINK_HAS_EXTENSION: &str = "WIKI_LINK_HAS_EXTENSION";
136    /// frontmatter list uses inline `[[[a]], [[b]]]` — use block form.
137    pub const WIKI_LINK_FLOW_FORM_LIST: &str = "WIKI_LINK_FLOW_FORM_LIST";
138    /// two files declare the same explicit `id`.
139    pub const DUP_ID: &str = "DUP_ID";
140    /// two records of a type collide on a `DB.md ## Schemas` `unique:` key.
141    pub const DUP_UNIQUE_KEY: &str = "DUP_UNIQUE_KEY";
142    /// a `DB.md` schema requires a field that's absent.
143    pub const SCHEMA_MISSING_REQUIRED: &str = "SCHEMA_MISSING_REQUIRED";
144    /// a value doesn't match the schema's shape modifier.
145    pub const SCHEMA_SHAPE_MISMATCH: &str = "SCHEMA_SHAPE_MISMATCH";
146    /// a `link to <prefix>/` field has a plain or wrong-prefix value.
147    pub const SCHEMA_LINK_PREFIX_MISMATCH: &str = "SCHEMA_LINK_PREFIX_MISMATCH";
148    /// a value isn't in the schema's `enum`.
149    pub const SCHEMA_ENUM_VIOLATION: &str = "SCHEMA_ENUM_VIOLATION";
150    /// a write was attempted on a `### Frozen pages` path (write-time).
151    pub const POLICY_FROZEN_PAGE: &str = "POLICY_FROZEN_PAGE";
152    /// a file with an `### Ignored types` type exists.
153    pub const POLICY_IGNORED_TYPE_PRESENT: &str = "POLICY_IGNORED_TYPE_PRESENT";
154    /// a `wiki-page` derives from an ignored-type record.
155    pub const POLICY_IGNORED_TYPE_DERIVED: &str = "POLICY_IGNORED_TYPE_DERIVED";
156    /// a `log.md` entry header timestamp is unparseable.
157    pub const LOG_BAD_TIMESTAMP: &str = "LOG_BAD_TIMESTAMP";
158    /// a `log.md` entry kind isn't recognized.
159    pub const LOG_UNKNOWN_KIND: &str = "LOG_UNKNOWN_KIND";
160    /// `log.md` entries aren't in non-decreasing time order (possible rewrite).
161    pub const LOG_OUT_OF_ORDER: &str = "LOG_OUT_OF_ORDER";
162    /// a non-empty canonical folder lacks `index.md`.
163    pub const INDEX_MISSING: &str = "INDEX_MISSING";
164    /// an `index.md` lists a file that no longer exists.
165    pub const INDEX_STALE_ENTRY: &str = "INDEX_STALE_ENTRY";
166    /// a file isn't listed in its folder's `index.md`.
167    pub const INDEX_MISSING_ENTRY: &str = "INDEX_MISSING_ENTRY";
168    /// an `index.md` sits in an empty / non-canonical folder.
169    pub const INDEX_ORPHAN: &str = "INDEX_ORPHAN";
170    /// an index's `scope:` doesn't match its filesystem location.
171    pub const INDEX_WRONG_SCOPE: &str = "INDEX_WRONG_SCOPE";
172    /// an index entry's text doesn't match the target file's `summary`.
173    pub const INDEX_SUMMARY_MISMATCH: &str = "INDEX_SUMMARY_MISMATCH";
174    /// a type-folder's `index.jsonl` twin is missing.
175    pub const INDEX_JSONL_MISSING: &str = "INDEX_JSONL_MISSING";
176    /// a file isn't in the `index.jsonl`, or a jsonl record points at a missing
177    /// file.
178    pub const INDEX_JSONL_DESYNC: &str = "INDEX_JSONL_DESYNC";
179    /// a `index.jsonl` record's fields don't match the file's frontmatter.
180    pub const INDEX_JSONL_STALE: &str = "INDEX_JSONL_STALE";
181    /// `tags` isn't a flat YAML list of short scalar labels.
182    pub const TAGS_MALFORMED: &str = "TAGS_MALFORMED";
183}
184
185/// The SPEC's `summary` length bound (chars). Over it → `SUMMARY_TOO_LONG`.
186const MAX_SUMMARY_LEN: usize = 200;
187
188/// Recognized `log.md` entry kinds (SPEC § `log.md`). Anything else →
189/// `LOG_UNKNOWN_KIND` (warning, not error).
190const RECOGNIZED_LOG_KINDS: &[&str] = &[
191    "ingest",
192    "create",
193    "update",
194    "delete",
195    "rename",
196    "link",
197    "validate",
198    "index-rebuild",
199    "contradiction",
200];
201
202// ─────────────────────────────────────────────────────────────────────────────
203//  Public entrypoints
204// ─────────────────────────────────────────────────────────────────────────────
205
206/// **Loop default.** Validate the working set: content files changed since
207/// `since` (default: the last `validate` entry in `log.md`), plus any file whose
208/// wiki-links target a changed/renamed/removed path. Per-file *checks* only —
209/// none of the cross-file global passes (entity-dedup, every-index sync,
210/// `log.md` ordering) that `--all` adds. If the default call finds no logged
211/// changed objects, it falls back to a per-file content sweep so an externally
212/// edited or freshly copied store cannot pass vacuously.
213///
214/// **Cost.** The changed set is read from `log.md` — O(changed): every
215/// `create`/`update`/`ingest`/`rename`/`delete`/`link` entry newer than the
216/// cutoff names an object. Per-file frontmatter + link-doctrine checks then run
217/// over that set plus its incoming linkers — also O(changed). The one part that
218/// is *not* O(changed) is discovering those incoming linkers: a link to a
219/// changed path can live in the body or a typed frontmatter field of any file,
220/// so it is found by a **single** embedded-ripgrep pass over the store
221/// ([`Store::find_links_to_any`]) for the whole changed set at once — one store
222/// scan, flat in the changed-set size. (It was previously a full store read
223/// *per* changed object — `O(changed × store)`; that is the blow-up this path
224/// no longer pays.) The unavoidable single content scan is the same shape as
225/// free-text `dbmd search`; the sidecar `links` projection can't replace it
226/// because it omits body/typed-field edges.
227pub fn validate_working_set(
228    store: &Store,
229    since: Option<DateTime<FixedOffset>>,
230) -> crate::Result<Vec<Issue>> {
231    if !store_marker_present(store) {
232        return Ok(vec![not_a_store_issue(store)]);
233    }
234
235    let cutoff = match since {
236        Some(ts) => Some(ts),
237        None => last_validate_at(store),
238    };
239
240    // 1. Changed objects, straight from the log (O(changed) — never a walk).
241    let changed = changed_objects_since(store, cutoff);
242    if changed.is_empty() && since.is_none() {
243        return validate_content_sweep(store);
244    }
245
246    // 2. Add every file with an incoming wiki-link to a changed/renamed/removed
247    //    path (the linker may now be stale even though it didn't change). The
248    //    incoming-linker scan is `Store::find_links_to_any` — ONE embedded-ripgrep
249    //    pass over the store for the WHOLE changed set (one `.md` walk, one
250    //    presence-only/early-exit scan per file), not one walk per object. This
251    //    is the fix for the `O(changed × store)` blow-up that calling
252    //    `find_links_to` in a loop produced (a full store read per changed
253    //    object); the cost is now a single store scan regardless of how many
254    //    objects changed. A returned self-link is harmlessly deduped by the set
255    //    (the object is already inserted below).
256    let changed_targets: Vec<PathBuf> = changed.iter().cloned().collect();
257    let mut working: BTreeSet<PathBuf> = changed;
258    for linker in store.find_links_to_any(&changed_targets)? {
259        working.insert(linker);
260    }
261
262    let mut issues = Vec::new();
263    for rel in &working {
264        let abs = store.root.join(rel);
265        // A changed path can be a *deletion* — skip files that no longer exist;
266        // the incoming-linker scan above already flagged links into them.
267        if !abs.is_file() {
268            continue;
269        }
270        // `None` basename index: the working-set pass does not build the
271        // store-wide basename map (that is a `--all`-only structure), so a bare
272        // short-form target is reported as plain `WIKI_LINK_SHORT_FORM` and the
273        // `--all` sweep does the ambiguity upgrade.
274        check_content_file(store, rel, &abs, None, &mut issues);
275    }
276    issues.sort_by(issue_order);
277    Ok(issues)
278}
279
280fn validate_content_sweep(store: &Store) -> crate::Result<Vec<Issue>> {
281    let mut issues = Vec::new();
282    for rel in store.walk()? {
283        let abs = store.root.join(&rel);
284        check_content_file(store, &rel, &abs, None, &mut issues);
285    }
286    issues.sort_by(issue_order);
287    Ok(issues)
288}
289
290/// **Full SWEEP (O(store)).** Validate every file, every link, and every index,
291/// adding the cross-file checks that need global state: entity-dedup `DUP_*`,
292/// every-index sync (md + jsonl), and `log.md` ordering. CI / recovery, not the
293/// loop.
294pub fn validate_all(store: &Store) -> crate::Result<Vec<Issue>> {
295    if !store_marker_present(store) {
296        return Ok(vec![not_a_store_issue(store)]);
297    }
298
299    let mut issues = Vec::new();
300
301    // Store-identity file: `DB.md` shape (type / required fields / section
302    // headers). A single root file, checked once in the sweep — not a content
303    // file (it carries no `summary`), so it is not part of `walk_content_files`.
304    check_db_md(store, &mut issues);
305
306    let files = walk_content_files(&store.root);
307
308    // The basename index makes the short-form wiki-link check able to upgrade a
309    // bare-basename target to `WIKI_LINK_AMBIGUOUS` when it matches ≥2 files.
310    // Built once from the already-gathered sweep list (no extra walk); only the
311    // `--all` path has it (the working-set path stays O(changed)).
312    let basenames = build_basename_index(&files);
313
314    // Per-file checks over the whole store.
315    let mut parsed: Vec<(PathBuf, Parsed)> = Vec::new();
316    for rel in &files {
317        let abs = store.root.join(rel);
318        if let Some(p) = check_content_file(store, rel, &abs, Some(&basenames), &mut issues) {
319            parsed.push((rel.clone(), p));
320        }
321    }
322
323    // Cross-file: hard `id` + soft schema-declared `unique:` dedup collisions.
324    check_duplicates(store, &parsed, &mut issues);
325
326    // Cross-file: hierarchical index.md + index.jsonl sync.
327    check_indexes(store, &files, &mut issues);
328
329    // Cross-file: log.md well-formedness + ordering.
330    check_log(store, &mut issues);
331
332    issues.sort_by(issue_order);
333    Ok(issues)
334}
335
336// ─────────────────────────────────────────────────────────────────────────────
337//  Per-file content checks (shared by both scopes)
338// ─────────────────────────────────────────────────────────────────────────────
339
340/// What `validate_all`'s cross-file pass needs from a per-file parse: the
341/// parsed YAML mapping (for dedup keys) and the raw frontmatter text (for
342/// text-based wiki-link extraction). The body and fence-line are consumed
343/// inline during the per-file pass and not carried here.
344struct Parsed {
345    /// The parsed top-level YAML mapping, keyed by string. `None` ⇒ malformed
346    /// YAML (a `FM_MALFORMED_YAML` was already emitted).
347    fm: Option<BTreeMap<String, Value>>,
348    /// The raw frontmatter YAML text (between the fences) — the source for
349    /// text-based wiki-link extraction in dedup.
350    fm_yaml: String,
351}
352
353/// Run every per-file check on one content file, pushing issues. Returns the
354/// parsed file so `validate_all` can reuse it for cross-file checks. Returns
355/// `None` only when the file is unreadable or has no frontmatter block at all
356/// (which for a content file is itself reported).
357fn check_content_file(
358    store: &Store,
359    rel: &Path,
360    abs: &Path,
361    basenames: Option<&BasenameIndex>,
362    issues: &mut Vec<Issue>,
363) -> Option<Parsed> {
364    let text = match std::fs::read_to_string(abs) {
365        Ok(t) => t,
366        Err(e) => {
367            // The file exists in the walk but can't be read as UTF-8 text
368            // (invalid bytes) or hit an I/O error. Returning `None` silently
369            // here let a store whose only content file was binary garbage pass
370            // `dbmd validate` with exit 0 — the exact vacuous-pass the fallback
371            // sweep exists to prevent. Report it so the agent gets an actionable
372            // diagnostic naming the unreadable file (and `index rebuild`, which
373            // hard-fails on the same file, isn't the only signal).
374            let detail = if e.kind() == std::io::ErrorKind::InvalidData {
375                "file is not valid UTF-8 text".to_string()
376            } else {
377                format!("file could not be read: {e}")
378            };
379            push(
380                issues,
381                Severity::Error,
382                codes::FM_UNREADABLE,
383                rel,
384                None,
385                None,
386                format!("content file is unreadable: {detail}"),
387                Some(
388                    "save the file as UTF-8 text, or remove it if it isn't a db.md content file"
389                        .into(),
390                ),
391                vec![],
392            );
393            return None;
394        }
395    };
396
397    let is_content = is_content_file(rel);
398
399    let (fm_yaml, body, fm_end_line) = match split_frontmatter(&text) {
400        Some(split) => split,
401        None => {
402            // No frontmatter at all. For a content file that means there's no
403            // `type:` and no `summary:` — report both the way a parsed-but-empty
404            // file would, so the agent gets the same actionable codes.
405            if is_content {
406                push(
407                    issues,
408                    Severity::Error,
409                    codes::FM_MISSING_TYPE,
410                    rel,
411                    None,
412                    Some("type".into()),
413                    "content file has no frontmatter `type:`".into(),
414                    Some("add a YAML frontmatter block with `type:`".into()),
415                    vec![],
416                );
417                push(
418                    issues,
419                    Severity::Error,
420                    codes::SUMMARY_MISSING,
421                    rel,
422                    None,
423                    Some("summary".into()),
424                    "content file has no `summary`".into(),
425                    Some("run `dbmd fm init`".into()),
426                    vec![],
427                );
428            }
429            return None;
430        }
431    };
432
433    // Parse the YAML block.
434    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
435        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
436        // An empty frontmatter block parses as Null; treat as an empty mapping.
437        Ok(Value::Null) => Some(BTreeMap::new()),
438        Ok(_) => {
439            // A scalar / sequence at the top level isn't a frontmatter mapping.
440            // Anchor to line 1 — the frontmatter block's opening `---`; the whole
441            // block is opaque, so there is no single offending field line.
442            push(
443                issues,
444                Severity::Error,
445                codes::FM_MALFORMED_YAML,
446                rel,
447                Some(1),
448                None,
449                "frontmatter is not a YAML mapping".into(),
450                Some("repair the frontmatter YAML mapping, then rerun `dbmd validate`".into()),
451                vec![],
452            );
453            None
454        }
455        Err(e) => {
456            // Anchor to line 1 (the opening `---`): an unparseable block has no
457            // single offending field line; the agent re-reads the whole block.
458            push(
459                issues,
460                Severity::Error,
461                codes::FM_MALFORMED_YAML,
462                rel,
463                Some(1),
464                None,
465                format!("frontmatter block isn't valid YAML: {e}"),
466                Some("repair the frontmatter YAML block, then rerun `dbmd validate`".into()),
467                vec![],
468            );
469            None
470        }
471    };
472
473    if let Some(map) = &fm {
474        // The detailed frontmatter checks only run when the YAML parsed.
475        check_frontmatter(store, rel, map, &fm_yaml, basenames, issues, is_content);
476    }
477
478    // Wiki-link doctrine checks run on the body of content files (and on
479    // `index.md` files, whose entries are wiki-links too). They are NOT run on
480    // the root append-only meta files `log.md`/`DB.md`: those reach this
481    // function only via the working-set incoming-linker scan (`walk_all_md`
482    // includes them), and `validate --all` never link-checks their bodies
483    // (`walk_content_files` skips them; `check_log`/`check_db_md` do no body
484    // link checks). Without this guard the two scopes disagree — a historical
485    // `[[deleted-page]]` mention in a `log.md` note, or a `[[…]]` in DB.md's
486    // `## Agent instructions`, is flagged `WIKI_LINK_BROKEN` by the default
487    // working set but is clean under `--all`. The log is append-only by spec, so
488    // the suggested "fix the link" remedy can't even be applied.
489    if !is_root_meta_file(rel) {
490        check_body_wiki_links(store, rel, &body, fm_end_line, basenames, issues);
491    }
492
493    Some(Parsed { fm, fm_yaml })
494}
495
496/// All frontmatter-level checks for a content file with valid YAML.
497fn check_frontmatter(
498    store: &Store,
499    rel: &Path,
500    fm: &BTreeMap<String, Value>,
501    fm_yaml: &str,
502    basenames: Option<&BasenameIndex>,
503    issues: &mut Vec<Issue>,
504    is_content: bool,
505) {
506    let type_ = fm.get("type").and_then(scalar_string);
507
508    // ── type ────────────────────────────────────────────────────────────────
509    if is_content && type_.is_none() {
510        push(
511            issues,
512            Severity::Error,
513            codes::FM_MISSING_TYPE,
514            rel,
515            fm_key_line_or_top(fm_yaml, "type"),
516            Some("type".into()),
517            "content file has no `type:`".into(),
518            Some("add a `type:` field (e.g. `type: contact`)".into()),
519            vec![],
520        );
521    }
522
523    // ── summary (universal on content files) ──────────────────────────────────
524    if is_content {
525        check_summary(rel, fm, fm_yaml, issues);
526    }
527
528    // ── timestamps: created / updated ─────────────────────────────────────────
529    // The `created`/`updated` contract is content-file-only; meta files
530    // (`DB.md`, `log.md`, index twins) legitimately carry no such timestamps.
531    if is_content {
532        for (key, missing_code) in [
533            ("created", codes::FM_MISSING_CREATED),
534            ("updated", codes::FM_MISSING_UPDATED),
535        ] {
536            // A key that is absent, or present-but-`null`, has *no* timestamp →
537            // `FM_MISSING_*`. The toolkit's parser also treats a null value as
538            // "no timestamp", so a null `created:` must read as missing, not
539            // silently pass.
540            let value = fm.get(key);
541            let missing = value.is_none() || value.is_some_and(Value::is_null);
542            if missing {
543                push(
544                    issues,
545                    Severity::Error,
546                    missing_code,
547                    rel,
548                    fm_key_line_or_top(fm_yaml, key),
549                    Some(key.into()),
550                    format!("content file has no `{key}:` timestamp"),
551                    Some(format!(
552                        "set `{key}` to an RFC3339 timestamp, e.g. 2026-05-27T08:00:00-07:00"
553                    )),
554                    vec![],
555                );
556            } else if let Some(v) = value {
557                // Present and non-null. A scalar is checked for ISO-8601; a
558                // sequence/mapping is not a timestamp string at all and so
559                // cannot be ISO-8601 → `FM_BAD_TIMESTAMP` (it must not slip
560                // through the way it did when `scalar_string` returned `None`
561                // and the branch silently no-oped).
562                match scalar_string(v) {
563                    Some(s) if is_iso8601(&s) => {}
564                    Some(s) => push(
565                        issues,
566                        Severity::Error,
567                        codes::FM_BAD_TIMESTAMP,
568                        rel,
569                        fm_key_line(fm_yaml, key),
570                        Some(key.into()),
571                        format!("`{key}` is not ISO-8601: {s:?}"),
572                        Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
573                        vec![],
574                    ),
575                    None => push(
576                        issues,
577                        Severity::Error,
578                        codes::FM_BAD_TIMESTAMP,
579                        rel,
580                        fm_key_line(fm_yaml, key),
581                        Some(key.into()),
582                        format!(
583                            "`{key}` is not ISO-8601: expected a timestamp string, found a list or mapping"
584                        ),
585                        Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
586                        vec![],
587                    ),
588                }
589            }
590        }
591    }
592    // ── tags shape ────────────────────────────────────────────────────────────
593    if let Some(tags) = fm.get("tags") {
594        if !is_flat_scalar_list(tags) {
595            push(
596                issues,
597                Severity::Warning,
598                codes::TAGS_MALFORMED,
599                rel,
600                fm_key_line(fm_yaml, "tags"),
601                Some("tags".into()),
602                "`tags` must be a flat YAML list of short scalar labels".into(),
603                Some("use block form: one `- <tag>` per line".into()),
604                vec![],
605            );
606        }
607    }
608
609    // ── inline flow-form wiki-link lists in frontmatter ──────────────────────
610    for key in detect_flow_form_link_lists(fm_yaml) {
611        push(
612            issues,
613            Severity::Error,
614            codes::WIKI_LINK_FLOW_FORM_LIST,
615            rel,
616            fm_key_line(fm_yaml, &key),
617            Some(key.clone()),
618            format!("`{key}` uses inline flow form `[[[a]], [[b]]]`"),
619            Some("use YAML block-sequence form: one `- [[...]]` per line".into()),
620            vec![],
621        );
622    }
623
624    // ── frontmatter wiki-link fields: doctrine + integrity ───────────────────
625    // Skip keys that have an explicit `link to` schema spec — those are checked
626    // (with prefix enforcement) in `check_schema`, and double-reporting the same
627    // link via two paths would be noise.
628    let schema_link_keys: BTreeSet<String> =
629        effective_schema(store, type_.as_deref().unwrap_or(""))
630            .map(|s| {
631                s.fields
632                    .iter()
633                    .filter(|f| f.link_prefix.is_some())
634                    .map(|f| f.name.clone())
635                    .collect()
636            })
637            .unwrap_or_default();
638    for (key, link) in frontmatter_link_fields_text(fm_yaml, 2) {
639        if schema_link_keys.contains(&key) {
640            continue;
641        }
642        check_wiki_link(
643            store,
644            rel,
645            &link,
646            Some(link.line),
647            Some(&key),
648            basenames,
649            issues,
650        );
651    }
652
653    // ── policies: ignored types ──────────────────────────────────────────────
654    if let Some(t) = &type_ {
655        if store.config.ignored_types.iter().any(|it| it == t) {
656            push(
657                issues,
658                Severity::Info,
659                codes::POLICY_IGNORED_TYPE_PRESENT,
660                rel,
661                fm_key_line(fm_yaml, "type"),
662                Some("type".into()),
663                format!("file has ignored type `{t}` (per DB.md ## Policies)"),
664                Some(
665                    "change the `type`, or remove it from DB.md `### Ignored types` if it should be managed"
666                        .into(),
667                ),
668                // The policy source: `DB.md` declares the ignored type.
669                vec![PathBuf::from("DB.md")],
670            );
671        }
672        // A wiki-page deriving from an ignored-type record → warning. The
673        // decision lives in the shared `derived_from_ignored_type` entry point;
674        // this side only supplies the `derived_from` targets (with their line,
675        // which the issue carries) and renders the finding.
676        for link in frontmatter_links_for_key(fm_yaml, "derived_from", 2) {
677            if let Some(hit) =
678                derived_from_ignored_type(store, t, std::iter::once(link.target.as_str()))
679            {
680                push(
681                    issues,
682                    Severity::Warning,
683                    codes::POLICY_IGNORED_TYPE_DERIVED,
684                    rel,
685                    Some(link.line),
686                    Some("derived_from".into()),
687                    format!(
688                        "wiki-page derives from ignored-type record `{}` (type `{}`)",
689                        hit.target, hit.target_type
690                    ),
691                    Some(
692                        "drop this `derived_from` link, or remove the target type from DB.md `### Ignored types`"
693                            .into(),
694                    ),
695                    // The ignored-type source record, plus `DB.md` (the policy
696                    // source that lists the ignored type).
697                    vec![
698                        PathBuf::from(format!("{}.md", hit.target)),
699                        PathBuf::from("DB.md"),
700                    ],
701                );
702            }
703        }
704    }
705
706    // ── schema enforcement: DB.md ## Schemas (the only schema source) ─────────
707    if let Some(t) = &type_ {
708        if let Some(schema) = effective_schema(store, t) {
709            check_schema(store, rel, fm, fm_yaml, &schema, issues);
710        }
711    }
712}
713
714/// `summary` rules: required, non-empty, single-line, ≤ 200 chars.
715fn check_summary(rel: &Path, fm: &BTreeMap<String, Value>, fm_yaml: &str, issues: &mut Vec<Issue>) {
716    let line = fm_key_line(fm_yaml, "summary");
717    match fm.get("summary") {
718        None => push(
719            issues,
720            Severity::Error,
721            codes::SUMMARY_MISSING,
722            rel,
723            // A missing `summary` key has no line of its own → anchor to the
724            // frontmatter block top (line 1), the EXPECTED field-absence rule.
725            fm_key_line_or_top(fm_yaml, "summary"),
726            Some("summary".into()),
727            "content file has no `summary`".into(),
728            Some("run `dbmd fm init`".into()),
729            vec![],
730        ),
731        Some(v) => {
732            let s = scalar_string(v).unwrap_or_default();
733            if s.trim().is_empty() {
734                push(
735                    issues,
736                    Severity::Error,
737                    codes::SUMMARY_EMPTY,
738                    rel,
739                    line,
740                    Some("summary".into()),
741                    "`summary` is present but empty".into(),
742                    Some("write a one-line summary, or run `dbmd fm init`".into()),
743                    vec![],
744                );
745            } else if s.contains('\n') {
746                push(
747                    issues,
748                    Severity::Error,
749                    codes::SUMMARY_MULTILINE,
750                    rel,
751                    line,
752                    Some("summary".into()),
753                    "`summary` must be one line (contains a newline)".into(),
754                    Some("collapse the summary to a single line".into()),
755                    vec![],
756                );
757            } else if s.chars().count() > MAX_SUMMARY_LEN {
758                push(
759                    issues,
760                    Severity::Warning,
761                    codes::SUMMARY_TOO_LONG,
762                    rel,
763                    line,
764                    Some("summary".into()),
765                    format!(
766                        "`summary` is {} chars (> {MAX_SUMMARY_LEN})",
767                        s.chars().count()
768                    ),
769                    Some(format!("trim the summary to ≤ {MAX_SUMMARY_LEN} chars")),
770                    vec![],
771                );
772            }
773        }
774    }
775}
776
777/// Wiki-link checks for a body. Per-link doctrine (`WIKI_LINK_*`).
778fn check_body_wiki_links(
779    store: &Store,
780    rel: &Path,
781    body: &str,
782    fm_end_line: u32,
783    basenames: Option<&BasenameIndex>,
784    issues: &mut Vec<Issue>,
785) {
786    for link in extract_wiki_links(body) {
787        // Body lines are offset past the frontmatter block. `link.line` is
788        // 1-based within `body`; the body starts at `fm_end_line + 1`.
789        let abs_line = fm_end_line + link.line;
790        check_wiki_link(store, rel, &link, Some(abs_line), None, basenames, issues);
791    }
792}
793
794/// A store-wide map from a file's bare basename (its stem, no `.md`) to every
795/// store-relative path carrying that basename. Built once per `validate --all`
796/// sweep so the short-form wiki-link check can distinguish a merely short-form
797/// target (`WIKI_LINK_SHORT_FORM`) from one that is *ambiguous* because the bare
798/// basename matches two or more files (`WIKI_LINK_AMBIGUOUS`, the defensive
799/// code). `None` in the working-set path — that loop is O(changed) and never
800/// walks the store, so it reports the plain short-form error without the scan.
801type BasenameIndex = HashMap<String, Vec<PathBuf>>;
802
803/// Build the [`BasenameIndex`] from the swept file list (already gathered by
804/// `validate_all`; no extra walk).
805fn build_basename_index(files: &[PathBuf]) -> BasenameIndex {
806    let mut idx: BasenameIndex = HashMap::new();
807    for rel in files {
808        if let Some(stem) = rel.file_stem().and_then(|s| s.to_str()) {
809            idx.entry(stem.to_string()).or_default().push(rel.clone());
810        }
811    }
812    idx
813}
814
815/// The shared per-wiki-link doctrine + integrity check used by both body links
816/// and frontmatter link-fields. `basenames` is `Some` only in the `--all`
817/// sweep, where a no-slash short-form target is upgraded to `WIKI_LINK_AMBIGUOUS`
818/// when its bare basename matches ≥2 files.
819fn check_wiki_link(
820    store: &Store,
821    rel: &Path,
822    link: &Link,
823    line: Option<u32>,
824    key: Option<&str>,
825    basenames: Option<&BasenameIndex>,
826    issues: &mut Vec<Issue>,
827) {
828    let bare = link.target.trim_end_matches(".md");
829
830    // Short-form: not a full store-relative path (no `/`, or first segment isn't
831    // a known layer).
832    if !is_full_store_path(bare) {
833        // Ambiguous (defensive) takes precedence over plain short-form when the
834        // target is a bare basename (no `/`) that matches ≥2 files in the store.
835        // Only computable in the sweep (where `basenames` is populated); the
836        // working-set path falls through to the plain short-form error.
837        if !bare.contains('/') {
838            if let Some(idx) = basenames {
839                if let Some(matches) = idx.get(bare) {
840                    if matches.len() >= 2 {
841                        let mut related = matches.clone();
842                        related.sort();
843                        push(
844                            issues,
845                            Severity::Error,
846                            codes::WIKI_LINK_AMBIGUOUS,
847                            rel,
848                            line,
849                            key.map(str::to_string),
850                            format!(
851                                "short-form wiki-link `[[{}]]` matches multiple files",
852                                link.target
853                            ),
854                            Some("use the full store-relative path to disambiguate".into()),
855                            related,
856                        );
857                        return;
858                    }
859                }
860            }
861        }
862        push(
863            issues,
864            Severity::Error,
865            codes::WIKI_LINK_SHORT_FORM,
866            rel,
867            line,
868            key.map(str::to_string),
869            format!(
870                "wiki-link `[[{}]]` is not a full store-relative path",
871                link.target
872            ),
873            short_form_suggestion(bare),
874            vec![],
875        );
876        // Don't also report broken; the agent must fix the form first.
877        return;
878    }
879
880    // `.md` extension → warning, then still check existence.
881    if link.target.ends_with(".md") {
882        push(
883            issues,
884            Severity::Warning,
885            codes::WIKI_LINK_HAS_EXTENSION,
886            rel,
887            line,
888            key.map(str::to_string),
889            format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
890            Some(format!("drop the extension: [[{bare}]]")),
891            vec![],
892        );
893    }
894
895    // Broken: target file doesn't exist (O(1) stat). Resolve the target the
896    // same way the graph engine does — the literal path first (so a link to a
897    // raw `.eml`/`.pdf` source kept verbatim under `sources/` resolves), then
898    // the `.md`-appended path.
899    match resolve_wiki_target(store, bare) {
900        TargetResolution::Exists => {}
901        TargetResolution::Missing => push(
902            issues,
903            Severity::Error,
904            codes::WIKI_LINK_BROKEN,
905            rel,
906            line,
907            key.map(str::to_string),
908            format!("wiki-link target `{bare}` doesn't exist"),
909            Some(format!(
910                "create `{bare}.md`, or point the link at an existing file"
911            )),
912            vec![],
913        ),
914        TargetResolution::Unsafe => push(
915            issues,
916            Severity::Error,
917            codes::WIKI_LINK_BROKEN,
918            rel,
919            line,
920            key.map(str::to_string),
921            format!("wiki-link target `{bare}` is not a safe store-relative path"),
922            Some("use a full store-relative path under sources/, records/, or wiki/".into()),
923            vec![],
924        ),
925    }
926}
927
928// ─────────────────────────────────────────────────────────────────────────────
929//  Schema enforcement (user-declared DB.md ## Schemas — the only source)
930// ─────────────────────────────────────────────────────────────────────────────
931
932/// The effective schema for a type: the store's explicit `DB.md ## Schemas`
933/// block, or `None`. This is the **only** source of schema enforcement — the
934/// toolkit ships no implicit or built-in per-type schema (SPEC § Schemas). A
935/// store that wants its `contact` / `expense` / etc. fields enforced declares
936/// them in `## Schemas`; the example schema pack in SPEC § Example types is a
937/// copy-in starting point.
938fn effective_schema(store: &Store, type_: &str) -> Option<Schema> {
939    store.config.schemas.get(type_).cloned()
940}
941
942/// Validate a file's frontmatter against a schema's [`FieldSpec`]s.
943fn check_schema(
944    store: &Store,
945    rel: &Path,
946    fm: &BTreeMap<String, Value>,
947    fm_yaml: &str,
948    schema: &Schema,
949    issues: &mut Vec<Issue>,
950) {
951    for spec in &schema.fields {
952        let present = fm.get(&spec.name);
953        let line = fm_key_line(fm_yaml, &spec.name);
954
955        // Required. "Empty" means: the key is absent, or its value carries no
956        // content — a YAML `null` (`name:`), an empty list (`name: []`), an
957        // empty mapping (`name: {}`), or a blank/whitespace-only scalar
958        // (`name: ""`). `scalar_string` returns `None` for null/list/mapping, so
959        // a bare `.unwrap_or(false)` wrongly treated those as non-empty and let
960        // a required field with a null or empty-collection value pass silently;
961        // route them through `is_empty_value` instead.
962        let is_empty = match present {
963            None => true,
964            Some(v) => is_empty_value(v),
965        };
966        if spec.required && is_empty {
967            push(
968                issues,
969                Severity::Error,
970                codes::SCHEMA_MISSING_REQUIRED,
971                rel,
972                // Absent key → anchor to the frontmatter top (line 1); a
973                // present-but-empty value keeps its own line.
974                fm_key_line_or_top(fm_yaml, &spec.name),
975                Some(spec.name.clone()),
976                format!("required field `{}` is absent or empty", spec.name),
977                Some(format!("set `{}` to a non-empty value", spec.name)),
978                vec![],
979            );
980            continue;
981        }
982        let Some(value) = present else { continue };
983
984        // An OPTIONAL field that is `null` or empty is simply unset — there is
985        // no value to shape/enum/link-check. (The required+empty case already
986        // returned above as `SCHEMA_MISSING_REQUIRED`.) Without this, an
987        // `paid_at: null` on an `invoice` whose schema marks `paid_at (date)`
988        // would wrongly fire `SCHEMA_SHAPE_MISMATCH` against the empty string.
989        let value_empty = value.is_null()
990            || scalar_string(value)
991                .map(|s| s.trim().is_empty())
992                .unwrap_or(false);
993        if !spec.required && value_empty {
994            continue;
995        }
996
997        // link to <prefix>/ — extract the link target(s) from the raw frontmatter
998        // text (unquoted `[[...]]` is a YAML nested-sequence, not a string).
999        if let Some(prefix) = &spec.link_prefix {
1000            check_schema_link(store, rel, &spec.name, fm_yaml, prefix, line, issues);
1001            continue; // a link field is never also shape/enum-checked
1002        }
1003
1004        // A shape- or enum-constrained field expects a SCALAR. A YAML sequence
1005        // or mapping satisfies neither, and would otherwise slip through both
1006        // checks (`scalar_string` returns `None` for non-scalars, so the enum
1007        // and shape bodies silently no-op). Flag it as a shape mismatch rather
1008        // than let a structurally-wrong value validate clean. (Link fields,
1009        // which legitimately take block-form sequences, already `continue`d.)
1010        if (spec.shape.is_some() || spec.enum_values.is_some()) && scalar_string(value).is_none() {
1011            push(
1012                issues,
1013                Severity::Error,
1014                codes::SCHEMA_SHAPE_MISMATCH,
1015                rel,
1016                line,
1017                Some(spec.name.clone()),
1018                format!(
1019                    "`{}` must be a scalar value, found a list or mapping",
1020                    spec.name
1021                ),
1022                Some(format!("set `{}` to a single scalar value", spec.name)),
1023                vec![],
1024            );
1025            continue;
1026        }
1027
1028        // enum
1029        if let Some(allowed) = &spec.enum_values {
1030            if let Some(s) = scalar_string(value) {
1031                if !allowed.iter().any(|a| a == &s) {
1032                    push(
1033                        issues,
1034                        Severity::Error,
1035                        codes::SCHEMA_ENUM_VIOLATION,
1036                        rel,
1037                        line,
1038                        Some(spec.name.clone()),
1039                        format!("`{}` value {s:?} not in enum {allowed:?}", spec.name),
1040                        Some(format!("use one of: {}", allowed.join(", "))),
1041                        vec![],
1042                    );
1043                }
1044            }
1045            continue;
1046        }
1047
1048        // shape
1049        if let Some(shape) = spec.shape {
1050            check_schema_shape(rel, &spec.name, value, shape, line, issues);
1051        }
1052    }
1053}
1054
1055/// `link to <prefix>/` enforcement: the value must be a wiki-link whose target
1056/// starts with `<prefix>`. Reads the link target(s) from the raw frontmatter
1057/// text so unquoted `field: [[...]]` (a YAML nested-sequence, not a string) is
1058/// recognized exactly like the quoted form.
1059fn check_schema_link(
1060    store: &Store,
1061    rel: &Path,
1062    field: &str,
1063    fm_yaml: &str,
1064    prefix: &Path,
1065    line: Option<u32>,
1066    issues: &mut Vec<Issue>,
1067) {
1068    let prefix_str = prefix.to_string_lossy();
1069    let prefix_str = prefix_str.trim_end_matches('/');
1070    let suggestion = |target_leaf: &str| {
1071        Some(format!(
1072            "expected `link to {prefix_str}/`; replace with [[{prefix_str}/{target_leaf}]]"
1073        ))
1074    };
1075
1076    let links = frontmatter_links_for_key(fm_yaml, field, 2);
1077    if links.is_empty() {
1078        // No wiki-link in the field's value → it's a plain string.
1079        let raw = frontmatter_raw_value_for_key(fm_yaml, field, 2).unwrap_or_default();
1080        let raw = raw.trim().trim_matches('"').trim_matches('\'').trim();
1081        let leaf = slugish(raw);
1082        push(
1083            issues,
1084            Severity::Error,
1085            codes::SCHEMA_LINK_PREFIX_MISMATCH,
1086            rel,
1087            line,
1088            Some(field.to_string()),
1089            format!(
1090                "`{field}` is a plain string {raw:?}, expected a wiki-link under `{prefix_str}/`"
1091            ),
1092            suggestion(&leaf),
1093            vec![],
1094        );
1095        return;
1096    }
1097
1098    for link in links {
1099        if link.target.ends_with(".md") {
1100            let bare = link.target.trim_end_matches(".md");
1101            push(
1102                issues,
1103                Severity::Warning,
1104                codes::WIKI_LINK_HAS_EXTENSION,
1105                rel,
1106                Some(link.line),
1107                Some(field.to_string()),
1108                format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
1109                Some(format!("drop the extension: [[{bare}]]")),
1110                vec![],
1111            );
1112        }
1113        let bare = link.target.trim_end_matches(".md");
1114        if !path_under_prefix(bare, prefix_str) {
1115            let leaf = bare.rsplit('/').next().unwrap_or(bare);
1116            push(
1117                issues,
1118                Severity::Error,
1119                codes::SCHEMA_LINK_PREFIX_MISMATCH,
1120                rel,
1121                line,
1122                Some(field.to_string()),
1123                format!("`{field}` target `{bare}` is not under `{prefix_str}/`"),
1124                suggestion(leaf),
1125                vec![],
1126            );
1127        } else {
1128            // Correct prefix — still surface a broken target so the agent sees
1129            // one consistent vocabulary. Resolve like the graph engine (literal
1130            // path first, then `.md`) so a `link to sources/` field pointing at a
1131            // raw `.eml`/`.pdf` source isn't wrongly flagged broken.
1132            match resolve_wiki_target(store, bare) {
1133                TargetResolution::Exists => {}
1134                TargetResolution::Missing => push(
1135                    issues,
1136                    Severity::Error,
1137                    codes::WIKI_LINK_BROKEN,
1138                    rel,
1139                    line,
1140                    Some(field.to_string()),
1141                    format!("wiki-link target `{bare}` doesn't exist"),
1142                    Some(format!(
1143                        "create `{bare}.md`, or point the link at an existing file"
1144                    )),
1145                    vec![],
1146                ),
1147                TargetResolution::Unsafe => push(
1148                    issues,
1149                    Severity::Error,
1150                    codes::WIKI_LINK_BROKEN,
1151                    rel,
1152                    line,
1153                    Some(field.to_string()),
1154                    format!("wiki-link target `{bare}` is not a safe store-relative path"),
1155                    Some(
1156                        "use a full store-relative path under sources/, records/, or wiki/".into(),
1157                    ),
1158                    vec![],
1159                ),
1160            }
1161        }
1162    }
1163}
1164
1165/// Shape enforcement for a non-link, non-enum schema field.
1166fn check_schema_shape(
1167    rel: &Path,
1168    field: &str,
1169    value: &Value,
1170    shape: Shape,
1171    line: Option<u32>,
1172    issues: &mut Vec<Issue>,
1173) {
1174    let s = scalar_string(value).unwrap_or_default();
1175    let ok = match shape {
1176        Shape::String => true, // any scalar string
1177        Shape::Int => value.is_i64() || value.is_u64() || s.trim().parse::<i64>().is_ok(),
1178        Shape::Bool => value.is_bool() || matches!(s.trim(), "true" | "false"),
1179        Shape::Date => is_iso8601_date_or_datetime(&s),
1180        Shape::Email => is_email(&s),
1181        Shape::Currency => is_currency(&s),
1182        Shape::Url => is_url(&s),
1183    };
1184    if !ok {
1185        push(
1186            issues,
1187            Severity::Error,
1188            codes::SCHEMA_SHAPE_MISMATCH,
1189            rel,
1190            line,
1191            Some(field.to_string()),
1192            format!("`{field}` value {s:?} doesn't match shape {shape:?}"),
1193            Some(shape_suggestion(shape)),
1194            vec![],
1195        );
1196    }
1197}
1198
1199// ─────────────────────────────────────────────────────────────────────────────
1200//  Cross-file: entity-dedup collisions (validate_all only)
1201// ─────────────────────────────────────────────────────────────────────────────
1202
1203/// Hard `DUP_ID` + the soft, schema-declared `DUP_UNIQUE_KEY` collisions.
1204///
1205/// `DUP_ID` is universal (two files with the same explicit `id`).
1206/// `DUP_UNIQUE_KEY` is driven entirely by the store's `DB.md ## Schemas`: each
1207/// `- unique: <field>[, <field> …]` directive on a `### <type>` declares a
1208/// uniqueness constraint, and two records of that type whose declared values
1209/// collide warn. No type carries a built-in dedup key — the store opts in.
1210///
1211/// **Reporting precedence (rule #1 in `corpus-b-edges/EXPECTED/README.md`):** a
1212/// collision group of N files yields exactly ONE issue, not N. Its `file` is the
1213/// lexicographically smallest store-relative path in the group (a total order →
1214/// deterministic); `related` is the rest, sorted. A single-field key anchors to
1215/// that field's line on the reported file and carries it as `key`; a multi-field
1216/// key anchors to line 1 with a null key.
1217fn check_duplicates(store: &Store, parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
1218    // Path → frontmatter YAML, for resolving the anchor field's line on the
1219    // reported (smallest-path) member.
1220    let fm_yaml_of: HashMap<&PathBuf, &str> = parsed
1221        .iter()
1222        .map(|(rel, p)| (rel, p.fm_yaml.as_str()))
1223        .collect();
1224
1225    // ── DUP_ID (hard error): two files with the same explicit `id`. ──────────
1226    let mut by_id: HashMap<String, Vec<PathBuf>> = HashMap::new();
1227    for (rel, p) in parsed {
1228        if let Some(map) = &p.fm {
1229            if let Some(id) = map.get("id").and_then(scalar_string) {
1230                if !id.trim().is_empty() {
1231                    by_id.entry(id).or_default().push(rel.clone());
1232                }
1233            }
1234        }
1235    }
1236    for (id, files) in &by_id {
1237        if files.len() > 1 {
1238            let (reported, related) = canonical_and_related(files);
1239            let line = fm_yaml_of.get(&reported).and_then(|y| fm_key_line(y, "id"));
1240            push(
1241                issues,
1242                Severity::Error,
1243                codes::DUP_ID,
1244                &reported,
1245                line,
1246                Some("id".into()),
1247                format!("id {id:?} is declared by more than one file"),
1248                Some("give each file a unique `id` (or drop it to derive from the path)".into()),
1249                related,
1250            );
1251        }
1252    }
1253
1254    // ── DUP_UNIQUE_KEY (warning): schema-declared `unique:` collisions. ───────
1255    // Every constraint comes from the store's `## Schemas`; a type with no
1256    // `unique:` directive is never dedup-checked. Iteration over the BTreeMap is
1257    // key-ordered, so emitted issues are deterministic across runs.
1258    for (type_name, schema) in &store.config.schemas {
1259        for key_fields in &schema.unique_keys {
1260            soft_dup(parsed, issues, type_name, key_fields, &fm_yaml_of);
1261        }
1262    }
1263}
1264
1265/// Emit ONE `DUP_UNIQUE_KEY` warning per group of ≥2 files of `type_` whose
1266/// declared `key_fields` render to the same token tuple. Files missing any key
1267/// field are skipped — an incomplete key is never a collision.
1268///
1269/// Per reporting rule #1 the issue is keyed on the lexicographically smallest
1270/// store-relative path; `related` is the rest. A single-field key anchors to
1271/// that field's line on the reported file and carries it as `key`; a multi-field
1272/// key anchors to line 1 with a null key. `fm_yaml_of` resolves the field line.
1273fn soft_dup(
1274    parsed: &[(PathBuf, Parsed)],
1275    issues: &mut Vec<Issue>,
1276    type_: &str,
1277    key_fields: &[String],
1278    fm_yaml_of: &HashMap<&PathBuf, &str>,
1279) {
1280    if key_fields.is_empty() {
1281        return;
1282    }
1283    let mut groups: HashMap<Vec<String>, Vec<PathBuf>> = HashMap::new();
1284    for (rel, p) in parsed {
1285        let is_type =
1286            p.fm.as_ref()
1287                .and_then(|m| m.get("type"))
1288                .and_then(scalar_string)
1289                .map(|t| t == type_)
1290                .unwrap_or(false);
1291        if !is_type {
1292            continue;
1293        }
1294        if let Some(key) = dedup_key(p, key_fields) {
1295            groups.entry(key).or_default().push(rel.clone());
1296        }
1297    }
1298    // HashMap iteration is nondeterministic; sort by reported member so the
1299    // emitted issue order is stable across runs.
1300    let mut collisions: Vec<(PathBuf, Vec<PathBuf>)> = groups
1301        .values()
1302        .filter(|files| files.len() > 1)
1303        .map(|files| canonical_and_related(files))
1304        .collect();
1305    collisions.sort_by(|a, b| a.0.cmp(&b.0));
1306
1307    let fields_disp = key_fields.join(", ");
1308    for (reported, related) in collisions {
1309        // Single-field keys anchor to the field's line + carry the key; multi-
1310        // field keys anchor to line 1 with a null key.
1311        let (line, key) = if key_fields.len() == 1 {
1312            (
1313                fm_yaml_of
1314                    .get(&reported)
1315                    .and_then(|y| fm_key_line(y, &key_fields[0])),
1316                Some(key_fields[0].clone()),
1317            )
1318        } else {
1319            (Some(1), None)
1320        };
1321        let n = related.len();
1322        push(
1323            issues,
1324            Severity::Warning,
1325            codes::DUP_UNIQUE_KEY,
1326            &reported,
1327            line,
1328            key,
1329            format!("`{type_}` unique key ({fields_disp}) collides with {n} other record(s)"),
1330            Some("merge with `dbmd rename`, or cross-link with `dbmd link`".into()),
1331            related,
1332        );
1333    }
1334}
1335
1336/// Render a type's `unique:` key for one file: each field's dedup token in
1337/// order, or `None` if any field is absent/empty (an incomplete key never
1338/// collides).
1339fn dedup_key(p: &Parsed, key_fields: &[String]) -> Option<Vec<String>> {
1340    let mut out = Vec::with_capacity(key_fields.len());
1341    for f in key_fields {
1342        out.push(dedup_token(p, f)?);
1343    }
1344    Some(out)
1345}
1346
1347/// One field's normalized dedup token, or `None` when absent/empty. Wiki-link
1348/// values (single or block-sequence list) reduce to their lower-cased target
1349/// path(s); a list collapses to a sorted, de-duplicated set so item order never
1350/// matters. Plain scalars (and YAML scalar lists) lower-case and trim.
1351fn dedup_token(p: &Parsed, field: &str) -> Option<String> {
1352    // Wiki-links first — read from the raw frontmatter text so the unquoted
1353    // `field: [[...]]` (a YAML nested-sequence, not a string) is handled.
1354    let links = frontmatter_links_for_key(&p.fm_yaml, field, 2);
1355    if !links.is_empty() {
1356        let set: BTreeSet<String> = links
1357            .into_iter()
1358            .map(|l| l.target.trim_end_matches(".md").to_lowercase())
1359            .filter(|t| !t.is_empty())
1360            .collect();
1361        return if set.is_empty() {
1362            None
1363        } else {
1364            Some(set.into_iter().collect::<Vec<_>>().join(","))
1365        };
1366    }
1367    match p.fm.as_ref()?.get(field) {
1368        Some(Value::Sequence(items)) => {
1369            let set: BTreeSet<String> = items
1370                .iter()
1371                .filter_map(scalar_string)
1372                .map(|s| s.trim().to_lowercase())
1373                .filter(|t| !t.is_empty())
1374                .collect();
1375            if set.is_empty() {
1376                None
1377            } else {
1378                Some(set.into_iter().collect::<Vec<_>>().join(","))
1379            }
1380        }
1381        Some(v) => {
1382            let s = scalar_string(v)?.trim().to_lowercase();
1383            if s.is_empty() {
1384                None
1385            } else {
1386                Some(s)
1387            }
1388        }
1389        None => None,
1390    }
1391}
1392
1393/// Split a non-empty collision group into `(reported, related)`: the
1394/// lexicographically smallest store-relative path is the reported member; the
1395/// rest, sorted ascending, are `related`. Deterministic because store-relative
1396/// path is a total order — the property reporting rule #1 relies on.
1397fn canonical_and_related(files: &[PathBuf]) -> (PathBuf, Vec<PathBuf>) {
1398    let mut sorted = files.to_vec();
1399    sorted.sort();
1400    let reported = sorted[0].clone();
1401    let related = sorted[1..].to_vec();
1402    (reported, related)
1403}
1404
1405// ─────────────────────────────────────────────────────────────────────────────
1406//  Cross-file: hierarchical index.md + index.jsonl sync (validate_all only)
1407// ─────────────────────────────────────────────────────────────────────────────
1408
1409/// All `INDEX_*` and `INDEX_JSONL_*` checks across the three canonical levels.
1410fn check_indexes(store: &Store, files: &[PathBuf], issues: &mut Vec<Issue>) {
1411    // Group content files by their immediate parent folder (the type-folder,
1412    // *across date shards* — a sharded file's "type folder" is the folder right
1413    // under the layer). We key on the type-folder so shards roll up correctly.
1414    let mut type_folders: BTreeMap<PathBuf, Vec<PathBuf>> = BTreeMap::new();
1415    let mut layers_present: BTreeSet<&'static str> = BTreeSet::new();
1416    for rel in files {
1417        // The layer is the first path component — recorded independently of the
1418        // type-folder so a layer containing only loose files still requires an
1419        // `index.md`.
1420        if let Some(layer) = rel.iter().next().and_then(|s| s.to_str()) {
1421            match layer {
1422                "sources" => layers_present.insert("sources"),
1423                "records" => layers_present.insert("records"),
1424                "wiki" => layers_present.insert("wiki"),
1425                _ => false,
1426            };
1427        }
1428        if let Some(tf) = type_folder_of(rel) {
1429            type_folders.entry(tf).or_default().push(rel.clone());
1430        }
1431    }
1432
1433    // ── Root index.md ─────────────────────────────────────────────────────────
1434    if !files.is_empty() {
1435        let root_index = store.root.join("index.md");
1436        if !root_index.is_file() {
1437            push(
1438                issues,
1439                Severity::Error,
1440                codes::INDEX_MISSING,
1441                Path::new("index.md"),
1442                None,
1443                None,
1444                "store has files but no root `index.md`".into(),
1445                Some("run `dbmd index rebuild`".into()),
1446                vec![],
1447            );
1448        } else {
1449            check_index_scope(store, Path::new("index.md"), "root", None, issues);
1450        }
1451    }
1452
1453    // ── Layer index.md ────────────────────────────────────────────────────────
1454    for layer in &layers_present {
1455        let layer_index_rel = PathBuf::from(layer).join("index.md");
1456        let abs = store.root.join(&layer_index_rel);
1457        if !abs.is_file() {
1458            push(
1459                issues,
1460                Severity::Error,
1461                codes::INDEX_MISSING,
1462                &layer_index_rel,
1463                None,
1464                None,
1465                format!("layer `{layer}/` has files but no `index.md`"),
1466                Some("run `dbmd index rebuild`".into()),
1467                vec![],
1468            );
1469        } else {
1470            check_index_scope(store, &layer_index_rel, "layer", Some(layer), issues);
1471        }
1472    }
1473
1474    // ── Type-folder index.md + index.jsonl ───────────────────────────────────
1475    for (tf, members) in &type_folders {
1476        let index_md_rel = tf.join("index.md");
1477        let index_md_abs = store.root.join(&index_md_rel);
1478        let index_md_present = index_md_abs.is_file();
1479        if !index_md_present {
1480            // The whole folder index is absent → a single `INDEX_MISSING` keyed
1481            // on the FOLDER (not the would-be `index.md` path). When the index is
1482            // entirely missing we do NOT additionally evaluate per-entry
1483            // completeness or the `index.jsonl` twin: one `INDEX_MISSING` covers
1484            // the folder (precedence rule #4 in `corpus-b-edges/EXPECTED`).
1485            push(
1486                issues,
1487                Severity::Error,
1488                codes::INDEX_MISSING,
1489                tf,
1490                None,
1491                None,
1492                format!("non-empty folder `{}` has no index.md", tf.display()),
1493                Some(format!(
1494                    "run `dbmd index rebuild --folder {}`",
1495                    tf.display()
1496                )),
1497                vec![],
1498            );
1499            continue;
1500        }
1501
1502        check_index_scope(store, &index_md_rel, "type-folder", tf.to_str(), issues);
1503        check_type_folder_index_md(store, tf, &index_md_rel, members, issues);
1504
1505        // index.jsonl twin — must exist and be complete (uncapped). Only checked
1506        // when the `index.md` is present (above): a folder whose entire index is
1507        // missing is one `INDEX_MISSING`, not also an `INDEX_JSONL_MISSING`.
1508        let jsonl_rel = tf.join("index.jsonl");
1509        let jsonl_abs = store.root.join(&jsonl_rel);
1510        if !jsonl_abs.is_file() {
1511            push(
1512                issues,
1513                Severity::Error,
1514                codes::INDEX_JSONL_MISSING,
1515                &jsonl_rel,
1516                None,
1517                None,
1518                format!("type-folder `{}/` has no `index.jsonl` twin", tf.display()),
1519                Some("run `dbmd index rebuild`".into()),
1520                vec![],
1521            );
1522        } else {
1523            check_type_folder_index_jsonl(store, tf, &jsonl_rel, members, issues);
1524        }
1525    }
1526
1527    // ── Orphan index.md: an index file in a folder with no content. ──────────
1528    for rel in walk_index_files(&store.root) {
1529        let parent = rel.parent().unwrap_or(Path::new("")).to_path_buf();
1530        let parent_str = parent.to_string_lossy().to_string();
1531        let is_canonical = parent_str.is_empty() // root
1532            || matches!(parent_str.as_str(), "sources" | "records" | "wiki")
1533            || type_folders.contains_key(&parent);
1534        if !is_canonical {
1535            push(
1536                issues,
1537                Severity::Warning,
1538                codes::INDEX_ORPHAN,
1539                &rel,
1540                None,
1541                None,
1542                format!(
1543                    "`{}` sits in an empty or non-canonical folder",
1544                    rel.display()
1545                ),
1546                Some("remove it, or run `dbmd index rebuild`".into()),
1547                vec![],
1548            );
1549        }
1550    }
1551}
1552
1553/// Check a type-folder `index.md`'s entries against the folder's actual files:
1554/// stale entries (target gone), missing entries (file not listed), and
1555/// summary mismatches.
1556fn check_type_folder_index_md(
1557    store: &Store,
1558    tf: &Path,
1559    index_rel: &Path,
1560    members: &[PathBuf],
1561    issues: &mut Vec<Issue>,
1562) {
1563    let abs = store.root.join(index_rel);
1564    let Ok(text) = std::fs::read_to_string(&abs) else {
1565        return;
1566    };
1567    let entries = parse_index_entries(&text);
1568
1569    let listed: BTreeSet<PathBuf> = entries
1570        .iter()
1571        .map(|e| PathBuf::from(e.target.trim_end_matches(".md")))
1572        .collect();
1573
1574    // Stale entries + summary mismatch.
1575    for entry in &entries {
1576        let bare = entry.target.trim_end_matches(".md");
1577        // Resolve like the graph engine (literal path first, then `.md`) so an
1578        // index entry naming a raw `.eml`/`.pdf` source isn't reported stale.
1579        let target_abs = match resolved_target_abs(store, bare) {
1580            Some(abs) => abs,
1581            None => {
1582                if matches!(resolve_wiki_target(store, bare), TargetResolution::Unsafe) {
1583                    push(
1584                        issues,
1585                        Severity::Error,
1586                        codes::INDEX_STALE_ENTRY,
1587                        index_rel,
1588                        Some(entry.line),
1589                        None,
1590                        format!("index entry `[[{bare}]]` is not a safe store-relative path"),
1591                        Some("run `dbmd index rebuild`".into()),
1592                        vec![],
1593                    );
1594                } else {
1595                    push(
1596                        issues,
1597                        Severity::Error,
1598                        codes::INDEX_STALE_ENTRY,
1599                        index_rel,
1600                        Some(entry.line),
1601                        None,
1602                        format!("index entry `[[{bare}]]` points at a missing file"),
1603                        Some("run `dbmd index rebuild`".into()),
1604                        // The stale target the entry names (the file that no
1605                        // longer exists) — so the agent can locate the dangling
1606                        // reference.
1607                        vec![PathBuf::from(format!("{bare}.md"))],
1608                    );
1609                }
1610                continue;
1611            }
1612        };
1613        // Summary mismatch: the entry text must equal the file's `summary`. A
1614        // bare `- [[path]]` entry (no `— <text>`) when the file HAS a non-empty
1615        // summary is also a mismatch — the SPEC requires every type-folder index
1616        // entry to quote the file's `summary` (`- [[path]] — <summary>`), so a
1617        // missing quote can't validate clean just because there's nothing to
1618        // compare.
1619        if let Some(expected) = read_summary(&target_abs) {
1620            match &entry.summary_text {
1621                Some(text_part) if text_part.trim() != expected.trim() => {
1622                    push(
1623                        issues,
1624                        Severity::Error,
1625                        codes::INDEX_SUMMARY_MISMATCH,
1626                        index_rel,
1627                        Some(entry.line),
1628                        None,
1629                        format!("index entry for `{bare}` text doesn't match the file's `summary`"),
1630                        Some("run `dbmd index rebuild`".into()),
1631                        vec![PathBuf::from(format!("{bare}.md"))],
1632                    );
1633                }
1634                None if !expected.trim().is_empty() => {
1635                    push(
1636                        issues,
1637                        Severity::Error,
1638                        codes::INDEX_SUMMARY_MISMATCH,
1639                        index_rel,
1640                        Some(entry.line),
1641                        None,
1642                        format!("index entry for `{bare}` is missing its summary text (the file has a `summary`)"),
1643                        Some("run `dbmd index rebuild`".into()),
1644                        vec![PathBuf::from(format!("{bare}.md"))],
1645                    );
1646                }
1647                _ => {}
1648            }
1649        }
1650    }
1651
1652    // Missing entries: a member file not listed. Skip the index/log meta files.
1653    // The browse view caps at 500; only flag a missing entry when the folder is
1654    // under the cap (a capped folder legitimately omits older files).
1655    let content_members: Vec<&PathBuf> = members.iter().filter(|m| is_content_file(m)).collect();
1656    if content_members.len() <= 500 {
1657        for m in content_members {
1658            let bare = PathBuf::from(m.to_string_lossy().trim_end_matches(".md").to_string());
1659            if !listed.contains(&bare) {
1660                push(
1661                    issues,
1662                    Severity::Error,
1663                    codes::INDEX_MISSING_ENTRY,
1664                    index_rel,
1665                    None,
1666                    None,
1667                    format!(
1668                        "file `{}` is not listed in its folder's `index.md`",
1669                        m.display()
1670                    ),
1671                    Some("run `dbmd index rebuild`".into()),
1672                    vec![(*m).clone()],
1673                );
1674            }
1675        }
1676    }
1677    let _ = tf;
1678}
1679
1680/// Check a type-folder `index.jsonl` twin: it must list **every** file in the
1681/// folder (uncapped), every record must point at a real file, and each record's
1682/// fields must match the file's frontmatter.
1683fn check_type_folder_index_jsonl(
1684    store: &Store,
1685    tf: &Path,
1686    jsonl_rel: &Path,
1687    members: &[PathBuf],
1688    issues: &mut Vec<Issue>,
1689) {
1690    let abs = store.root.join(jsonl_rel);
1691    let Ok(text) = std::fs::read_to_string(&abs) else {
1692        return;
1693    };
1694
1695    // Parse records (last-write-wins by path), tolerating tombstones/blank lines.
1696    let mut records: BTreeMap<PathBuf, serde_json::Value> = BTreeMap::new();
1697    for (i, line) in text.lines().enumerate() {
1698        let line = line.trim();
1699        if line.is_empty() {
1700            continue;
1701        }
1702        let rec: serde_json::Value = match serde_json::from_str(line) {
1703            Ok(v) => v,
1704            Err(e) => {
1705                push(
1706                    issues,
1707                    Severity::Error,
1708                    codes::INDEX_JSONL_DESYNC,
1709                    jsonl_rel,
1710                    Some((i + 1) as u32),
1711                    None,
1712                    format!("`index.jsonl` line {} is not valid JSON: {e}", i + 1),
1713                    Some("run `dbmd index rebuild`".into()),
1714                    vec![],
1715                );
1716                continue;
1717            }
1718        };
1719        if let Some(path) = rec.get("path").and_then(|v| v.as_str()) {
1720            if !is_safe_store_relative_path(Path::new(path)) {
1721                push(
1722                    issues,
1723                    Severity::Error,
1724                    codes::INDEX_JSONL_DESYNC,
1725                    jsonl_rel,
1726                    Some((i + 1) as u32),
1727                    None,
1728                    format!("`index.jsonl` record path `{path}` is not a safe store-relative path"),
1729                    Some("run `dbmd index rebuild`".into()),
1730                    vec![],
1731                );
1732                continue;
1733            }
1734            records.insert(PathBuf::from(path), rec);
1735        }
1736    }
1737
1738    let member_set: BTreeSet<PathBuf> = members
1739        .iter()
1740        .filter(|m| is_content_file(m))
1741        .cloned()
1742        .collect();
1743
1744    // jsonl record → missing file = desync.
1745    for path in records.keys() {
1746        let target_abs = store.root.join(path);
1747        if !target_abs.is_file() {
1748            push(
1749                issues,
1750                Severity::Error,
1751                codes::INDEX_JSONL_DESYNC,
1752                jsonl_rel,
1753                None,
1754                None,
1755                format!(
1756                    "`index.jsonl` record points at missing file `{}`",
1757                    path.display()
1758                ),
1759                Some("run `dbmd index rebuild`".into()),
1760                vec![],
1761            );
1762        }
1763    }
1764
1765    // file not in jsonl = desync (the jsonl is the complete twin — no cap).
1766    for m in &member_set {
1767        if !records.contains_key(m) {
1768            push(
1769                issues,
1770                Severity::Error,
1771                codes::INDEX_JSONL_DESYNC,
1772                jsonl_rel,
1773                None,
1774                None,
1775                format!(
1776                    "file `{}` is missing from the complete `index.jsonl`",
1777                    m.display()
1778                ),
1779                Some("run `dbmd index rebuild`".into()),
1780                vec![m.clone()],
1781            );
1782        }
1783    }
1784
1785    // Record fields stale vs. frontmatter. SPEC § Validation defines
1786    // `INDEX_JSONL_STALE` as "an `index.jsonl` record's fields don't match the
1787    // file's frontmatter" — ANY field, not just `summary`/`type`. The query and
1788    // search paths read every field straight from these sidecars (`tags`,
1789    // `links`, `created`, `updated`, plus type-specific `email` / `domain` /
1790    // `company` / `amount` / `vendor` …), so a single field left unchecked lets
1791    // a stale value answer queries with data that exists in no `.md` file.
1792    //
1793    // Rather than re-list (and drift from) every projected key, rebuild the
1794    // record the canonical projection would write for this file
1795    // ([`IndexRecord::expected_from_file`], the same path `index rebuild` uses)
1796    // and diff the two as flat JSON maps. Every key the projection emits is
1797    // covered automatically; `path` is the join key and is skipped.
1798    for (path, rec) in &records {
1799        let target_abs = store.root.join(path);
1800        if !target_abs.is_file() {
1801            continue;
1802        }
1803        let Ok(expected) = crate::index::IndexRecord::expected_from_file(&target_abs, path.clone())
1804        else {
1805            continue; // unreadable / unparseable frontmatter is reported elsewhere
1806        };
1807        let Ok(expected_json) = serde_json::to_value(&expected) else {
1808            continue;
1809        };
1810        let (Some(have), Some(want)) = (rec.as_object(), expected_json.as_object()) else {
1811            continue;
1812        };
1813
1814        // Compare the union of keys present on either side; a key the file
1815        // projects but the sidecar omits is just as stale as a wrong value.
1816        let mut mismatched_keys: BTreeSet<&str> = BTreeSet::new();
1817        for key in have.keys().chain(want.keys()) {
1818            if key == "path" {
1819                continue;
1820            }
1821            if have.get(key) != want.get(key) {
1822                mismatched_keys.insert(key);
1823            }
1824        }
1825
1826        if !mismatched_keys.is_empty() {
1827            let keys: Vec<&str> = mismatched_keys.into_iter().collect();
1828            push(
1829                issues,
1830                Severity::Error,
1831                codes::INDEX_JSONL_STALE,
1832                jsonl_rel,
1833                None,
1834                Some(keys.join(",")),
1835                format!(
1836                    "`index.jsonl` record for `{}` is stale ({})",
1837                    path.display(),
1838                    keys.join(", ")
1839                ),
1840                Some("run `dbmd index rebuild`".into()),
1841                vec![path.clone()],
1842            );
1843        }
1844    }
1845    let _ = tf;
1846}
1847
1848/// Check an index's `scope:` frontmatter against its filesystem location.
1849fn check_index_scope(
1850    store: &Store,
1851    index_rel: &Path,
1852    expected_scope: &str,
1853    expected_folder: Option<&str>,
1854    issues: &mut Vec<Issue>,
1855) {
1856    let abs = store.root.join(index_rel);
1857    let Ok(text) = std::fs::read_to_string(&abs) else {
1858        return;
1859    };
1860    let Some((yaml, _, _)) = split_frontmatter(&text) else {
1861        return;
1862    };
1863    let Ok(Value::Mapping(map)) = serde_norway::from_str::<Value>(&yaml) else {
1864        return;
1865    };
1866    let fm = yaml_map_to_btree(&map);
1867
1868    if let Some(scope) = fm.get("scope").and_then(scalar_string) {
1869        // Accept "type-folder" and the SPEC example's looser "folder" alias.
1870        let scope_ok =
1871            scope == expected_scope || (expected_scope == "type-folder" && scope == "folder");
1872        if !scope_ok {
1873            push(
1874                issues,
1875                Severity::Warning,
1876                codes::INDEX_WRONG_SCOPE,
1877                index_rel,
1878                fm_key_line(&yaml, "scope"),
1879                Some("scope".into()),
1880                format!(
1881                    "index `scope: {scope}` doesn't match location (expected `{expected_scope}`)"
1882                ),
1883                Some(format!("set `scope: {expected_scope}`")),
1884                vec![],
1885            );
1886        }
1887    }
1888    // folder: must match for layer/type-folder indexes.
1889    if let Some(expected) = expected_folder {
1890        if let Some(folder) = fm.get("folder").and_then(scalar_string) {
1891            if folder.trim_end_matches('/') != expected.trim_end_matches('/') {
1892                push(
1893                    issues,
1894                    Severity::Warning,
1895                    codes::INDEX_WRONG_SCOPE,
1896                    index_rel,
1897                    fm_key_line(&yaml, "folder"),
1898                    Some("folder".into()),
1899                    format!("index `folder: {folder}` doesn't match location `{expected}`"),
1900                    Some(format!("set `folder: {expected}`")),
1901                    vec![],
1902                );
1903            }
1904        }
1905    }
1906}
1907
1908// ─────────────────────────────────────────────────────────────────────────────
1909//  Cross-file: log.md well-formedness + ordering (validate_all only)
1910// ─────────────────────────────────────────────────────────────────────────────
1911
1912/// `LOG_*` checks: bad timestamps, unknown kinds, out-of-order entries — across
1913/// the active `log.md` AND the rotated `log/<YYYY-MM>.md` archives.
1914///
1915/// [`Log::append`] rolls strictly-prior-month entries into `log/<YYYY-MM>.md`,
1916/// and `Log::tail`/`Log::since` deliberately read those archives back. If the
1917/// LOG_* checks read only the active file, an entry `validate --all` flagged
1918/// while it lived in `log.md` would stop being flagged the moment a newer-month
1919/// append rotated it into an archive — even though the log readers still surface
1920/// that exact entry to the curator. Scanning the archives too keeps validate and
1921/// the readers in agreement after a rotation.
1922///
1923/// Order: archives oldest-month first, then the active `log.md` last — the true
1924/// chronological timeline — so the out-of-order check threads `prev` across the
1925/// rotation boundary the same way it does within a single file.
1926fn check_log(store: &Store, issues: &mut Vec<Issue>) {
1927    let mut prev: Option<DateTime<FixedOffset>> = None;
1928    for rel in log_files_chronological(store) {
1929        check_log_file(store, &rel, &mut prev, issues);
1930    }
1931}
1932
1933/// The log files to scan, in chronological order: every `log/<YYYY-MM>.md`
1934/// archive oldest-month first, then the active `log.md` last. Missing files are
1935/// simply absent from the list.
1936fn log_files_chronological(store: &Store) -> Vec<PathBuf> {
1937    let mut files: Vec<PathBuf> = Vec::new();
1938    let archive_dir = store.root.join("log");
1939    if let Ok(entries) = std::fs::read_dir(&archive_dir) {
1940        let mut archives: Vec<PathBuf> = entries
1941            .flatten()
1942            .map(|e| e.path())
1943            .filter(|p| {
1944                p.is_file()
1945                    && p.file_name()
1946                        .and_then(|s| s.to_str())
1947                        .and_then(|n| n.strip_suffix(".md"))
1948                        .is_some_and(is_year_month_archive)
1949            })
1950            .filter_map(|p| p.strip_prefix(&store.root).ok().map(Path::to_path_buf))
1951            .collect();
1952        // `YYYY-MM` stems sort lexically == chronologically; oldest first.
1953        archives.sort();
1954        files.extend(archives);
1955    }
1956    // The active file holds the current month — newest, so it comes last.
1957    if store.root.join("log.md").is_file() {
1958        files.push(PathBuf::from("log.md"));
1959    }
1960    files
1961}
1962
1963/// Scan one log file's entry headers, threading the running `prev` timestamp so
1964/// the out-of-order check spans file (rotation) boundaries. Issues anchor to the
1965/// given store-relative path so an archived entry points at its archive file.
1966fn check_log_file(
1967    store: &Store,
1968    log_rel: &Path,
1969    prev: &mut Option<DateTime<FixedOffset>>,
1970    issues: &mut Vec<Issue>,
1971) {
1972    let abs = store.root.join(log_rel);
1973    let Ok(text) = std::fs::read_to_string(&abs) else {
1974        return;
1975    };
1976
1977    for (i, line) in text.lines().enumerate() {
1978        if !line.starts_with("## [") {
1979            continue;
1980        }
1981        let line_no = (i + 1) as u32;
1982        match parse_log_header(line) {
1983            None => push(
1984                issues,
1985                Severity::Error,
1986                codes::LOG_BAD_TIMESTAMP,
1987                log_rel,
1988                Some(line_no),
1989                None,
1990                format!("log entry header has an unparseable timestamp: {line:?}"),
1991                Some("use `## [YYYY-MM-DD HH:MM] <kind> | <object>`".into()),
1992                vec![],
1993            ),
1994            Some((ts, kind, _object)) => {
1995                if !RECOGNIZED_LOG_KINDS.contains(&kind.as_str()) {
1996                    push(
1997                        issues,
1998                        Severity::Warning,
1999                        codes::LOG_UNKNOWN_KIND,
2000                        log_rel,
2001                        Some(line_no),
2002                        None,
2003                        format!("log entry kind `{kind}` is not recognized"),
2004                        Some(format!("use one of: {}", RECOGNIZED_LOG_KINDS.join(", "))),
2005                        vec![],
2006                    );
2007                }
2008                if let Some(p) = *prev {
2009                    if ts < p {
2010                        push(
2011                            issues,
2012                            Severity::Warning,
2013                            codes::LOG_OUT_OF_ORDER,
2014                            log_rel,
2015                            Some(line_no),
2016                            None,
2017                            "log entry is older than the entry above it (possible rewrite)".into(),
2018                            Some("append corrective entries; never reorder past ones".into()),
2019                            vec![],
2020                        );
2021                    }
2022                }
2023                *prev = Some(ts);
2024            }
2025        }
2026    }
2027}
2028
2029// ─────────────────────────────────────────────────────────────────────────────
2030//  Self-contained primitives (collapse onto sibling modules once they land)
2031// ─────────────────────────────────────────────────────────────────────────────
2032
2033/// A minimal wiki-link found in a body: target, optional display, 1-based line.
2034#[derive(Debug)]
2035struct Link {
2036    target: String,
2037    line: u32,
2038}
2039
2040/// True if the store marker (`DB.md`, uppercase) is present at the root. On a
2041/// case-insensitive filesystem `db.md` would also match `DB.md`; we require the
2042/// exact-cased directory entry to be present.
2043fn store_marker_present(store: &Store) -> bool {
2044    let want = store.root.join("DB.md");
2045    if !want.is_file() {
2046        return false;
2047    }
2048    // Reject a case-folded match (`db.md`) on case-insensitive filesystems.
2049    match std::fs::read_dir(&store.root) {
2050        Ok(entries) => entries
2051            .flatten()
2052            .any(|e| e.file_name().to_str() == Some("DB.md")),
2053        Err(_) => true, // can't enumerate; trust the is_file() above
2054    }
2055}
2056
2057/// Validate the store's identity file, `DB.md`: its frontmatter `type:` must be
2058/// `db-md`, it must carry both `scope` and `owner`, and its body may contain
2059/// only the three recognized `##` sections (`Agent instructions`, `Policies`,
2060/// `Schemas`).
2061///
2062/// `DB.md` is not a content file (no `summary`), so it is checked here rather
2063/// than through `check_content_file`. The marker presence is established by the
2064/// caller (`store_marker_present`); a malformed-frontmatter `DB.md` still counts
2065/// as a store (the marker is the filename), so we report its shape rather than
2066/// `NOT_A_STORE`. Issues anchor to `DB.md` as the store-relative path.
2067fn check_db_md(store: &Store, issues: &mut Vec<Issue>) {
2068    let rel = Path::new("DB.md");
2069    let abs = store.root.join("DB.md");
2070    let Ok(text) = std::fs::read_to_string(&abs) else {
2071        return; // marker present but unreadable: nothing more to say.
2072    };
2073
2074    let Some((fm_yaml, body, fm_end_line)) = split_frontmatter(&text) else {
2075        // No frontmatter block at all → it cannot declare `type: db-md` and has
2076        // neither required field. Report the type and both missing fields,
2077        // anchored to line 1 (the would-be opening fence).
2078        push(
2079            issues,
2080            Severity::Error,
2081            codes::DB_MD_BAD_TYPE,
2082            rel,
2083            Some(1),
2084            Some("type".into()),
2085            "DB.md has no frontmatter; it must declare `type: db-md`".into(),
2086            Some("add a `---` frontmatter block with `type: db-md`".into()),
2087            vec![],
2088        );
2089        for field in ["scope", "owner"] {
2090            push(
2091                issues,
2092                Severity::Error,
2093                codes::DB_MD_MISSING_FIELD,
2094                rel,
2095                Some(1),
2096                Some(field.into()),
2097                format!("DB.md frontmatter is missing required field `{field}`"),
2098                Some(format!("add `{field}:` to the DB.md frontmatter")),
2099                vec![],
2100            );
2101        }
2102        return;
2103    };
2104
2105    // Parse the frontmatter mapping. If it doesn't parse, we can still say the
2106    // identity contract is unmet (no provable `type: db-md`, no provable fields).
2107    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
2108        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
2109        Ok(Value::Null) => Some(BTreeMap::new()),
2110        _ => None,
2111    };
2112
2113    match &fm {
2114        Some(map) => {
2115            // ── type: db-md ──────────────────────────────────────────────────
2116            let type_ = map.get("type").and_then(scalar_string);
2117            if type_.as_deref() != Some("db-md") {
2118                let (line, msg) = match &type_ {
2119                    Some(t) => (
2120                        fm_key_line(&fm_yaml, "type"),
2121                        format!("DB.md has `type: {t}`; a store's DB.md must be `type: db-md`"),
2122                    ),
2123                    None => (
2124                        Some(1),
2125                        "DB.md frontmatter has no `type:`; it must be `type: db-md`".to_string(),
2126                    ),
2127                };
2128                push(
2129                    issues,
2130                    Severity::Error,
2131                    codes::DB_MD_BAD_TYPE,
2132                    rel,
2133                    line,
2134                    Some("type".into()),
2135                    msg,
2136                    Some("set `type: db-md` in the DB.md frontmatter".into()),
2137                    vec![],
2138                );
2139            }
2140
2141            // ── required fields: scope + owner ───────────────────────────────
2142            for field in ["scope", "owner"] {
2143                let present = map
2144                    .get(field)
2145                    .and_then(scalar_string)
2146                    .map(|s| !s.trim().is_empty())
2147                    .unwrap_or(false);
2148                if !present {
2149                    push(
2150                        issues,
2151                        Severity::Error,
2152                        codes::DB_MD_MISSING_FIELD,
2153                        rel,
2154                        // A present-but-empty field anchors to its line; a fully
2155                        // absent one to the block top.
2156                        fm_key_line_or_top(&fm_yaml, field),
2157                        Some(field.into()),
2158                        format!("DB.md frontmatter is missing required field `{field}`"),
2159                        Some(format!("add `{field}:` to the DB.md frontmatter")),
2160                        vec![],
2161                    );
2162                }
2163            }
2164        }
2165        None => {
2166            // Unparseable frontmatter: the identity contract is unprovable. Emit
2167            // the type error and both field errors, anchored to the block top.
2168            push(
2169                issues,
2170                Severity::Error,
2171                codes::DB_MD_BAD_TYPE,
2172                rel,
2173                Some(1),
2174                Some("type".into()),
2175                "DB.md frontmatter isn't valid YAML; it must declare `type: db-md`".into(),
2176                Some("fix the DB.md frontmatter and set `type: db-md`".into()),
2177                vec![],
2178            );
2179            for field in ["scope", "owner"] {
2180                push(
2181                    issues,
2182                    Severity::Error,
2183                    codes::DB_MD_MISSING_FIELD,
2184                    rel,
2185                    Some(1),
2186                    Some(field.into()),
2187                    format!("DB.md frontmatter is missing required field `{field}`"),
2188                    Some(format!("add `{field}:` to the DB.md frontmatter")),
2189                    vec![],
2190                );
2191            }
2192        }
2193    }
2194
2195    // ── recognized `##` section headers only ─────────────────────────────────
2196    // The body's H2 headings must be one of the three the toolkit reads; any
2197    // other is a likely typo / misplacement (warning — the parser ignores it,
2198    // so the config is not corrupted, but the operator wrote a section that will
2199    // never be read). H3 sub-headings (Frozen pages, Ignored types, `### <type>`
2200    // schema blocks) live under their H2 and are not flagged here.
2201    for section in crate::parser::extract_sections(&body) {
2202        if section.level != 2 {
2203            continue;
2204        }
2205        let name = section.heading.trim().to_ascii_lowercase();
2206        if matches!(name.as_str(), "agent instructions" | "policies" | "schemas") {
2207            continue;
2208        }
2209        // `Section::line` is 1-based within the body; the body begins at file
2210        // line `fm_end_line + 1`.
2211        let file_line = fm_end_line + section.line;
2212        push(
2213            issues,
2214            Severity::Warning,
2215            codes::DB_MD_UNKNOWN_SECTION,
2216            rel,
2217            Some(file_line),
2218            None,
2219            format!(
2220                "DB.md has an unrecognized `## {}` section",
2221                section.heading.trim()
2222            ),
2223            Some(
2224                "DB.md sections are `## Agent instructions`, `## Policies`, `## Schemas` — \
2225                 remove or rename this heading"
2226                    .into(),
2227            ),
2228            vec![],
2229        );
2230    }
2231
2232    // ── `## Schemas` field-declaration lint ──────────────────────────────────
2233    // Without this, every schema misparse is silent: the operator/agent gets no
2234    // signal that DB.md is interpreting their schema differently from what they
2235    // wrote, and downstream records are validated against the degraded schema.
2236    check_db_md_schemas(store, rel, &body, fm_end_line, issues);
2237}
2238
2239/// Lint the parsed `## Schemas` field declarations: an empty field name, a
2240/// duplicate field name within a type, or an unrecognized modifier all parse
2241/// "successfully" into a degraded [`Schema`] today, so a bad declaration never
2242/// surfaces. The parsed schemas live in `store.config.schemas` (directives
2243/// already separated out); this pass reports the suspicious *field* shapes,
2244/// anchored to the `### <type>` heading line so the agent can find the block.
2245fn check_db_md_schemas(
2246    store: &Store,
2247    rel: &Path,
2248    body: &str,
2249    fm_end_line: u32,
2250    issues: &mut Vec<Issue>,
2251) {
2252    if store.config.schemas.is_empty() {
2253        return;
2254    }
2255
2256    // Map each `### <type>` heading (under `## Schemas`) to its file line, so a
2257    // per-type issue can anchor to the declaration block. `extract_sections`
2258    // returns a flat list with 1-based body lines; the body starts at file line
2259    // `fm_end_line + 1`.
2260    let mut type_line: BTreeMap<String, u32> = BTreeMap::new();
2261    let mut current_h2: Option<String> = None;
2262    for section in crate::parser::extract_sections(body) {
2263        match section.level {
2264            2 => current_h2 = Some(section.heading.trim().to_ascii_lowercase()),
2265            3 if current_h2.as_deref() == Some("schemas") => {
2266                // The H3 heading text (as written) is the type name — the same
2267                // key `parse_db_md` inserts into `config.schemas`.
2268                type_line
2269                    .entry(section.heading.trim().to_string())
2270                    .or_insert(fm_end_line + section.line);
2271            }
2272            _ => {}
2273        }
2274    }
2275
2276    for (type_name, schema) in &store.config.schemas {
2277        let line = type_line.get(type_name).copied();
2278        let mut seen: BTreeSet<String> = BTreeSet::new();
2279        for field in &schema.fields {
2280            let name = field.name.trim();
2281
2282            // Empty field name: a `- (string)` / bare `- ` bullet parses to a
2283            // nameless field that can never match a frontmatter key, so its
2284            // required/shape/enum constraints silently never apply.
2285            if name.is_empty() {
2286                push(
2287                    issues,
2288                    Severity::Warning,
2289                    codes::DB_MD_SCHEMA_FIELD,
2290                    rel,
2291                    line,
2292                    None,
2293                    format!("`### {type_name}` has a schema field bullet with no field name"),
2294                    Some(
2295                        "write each field as `- <name> (<modifiers>)`, e.g. `- email (required, email)`"
2296                            .into(),
2297                    ),
2298                    vec![],
2299                );
2300                continue;
2301            }
2302
2303            // Duplicate field name within a type: the second declaration's
2304            // constraints are interpreted independently of the first, so the
2305            // author's intent is ambiguous and likely wrong.
2306            if !seen.insert(name.to_string()) {
2307                push(
2308                    issues,
2309                    Severity::Warning,
2310                    codes::DB_MD_SCHEMA_FIELD,
2311                    rel,
2312                    line,
2313                    Some(name.to_string()),
2314                    format!("`### {type_name}` declares field `{name}` more than once"),
2315                    Some(
2316                        "remove the duplicate field bullet, or merge the modifiers onto one".into(),
2317                    ),
2318                    vec![],
2319                );
2320            }
2321
2322            // Unrecognized modifiers: the parser stashes anything outside the
2323            // known vocabulary (`required` / a shape / `link to …` / `default …`
2324            // / `enum: …`) in `unknown_modifiers`. Surface them as Info so a
2325            // typo'd modifier (`requierd`, `unqiue`) doesn't silently do nothing.
2326            for modifier in &field.unknown_modifiers {
2327                let modifier = modifier.trim();
2328                if modifier.is_empty() {
2329                    continue;
2330                }
2331                push(
2332                    issues,
2333                    Severity::Info,
2334                    codes::DB_MD_SCHEMA_FIELD,
2335                    rel,
2336                    line,
2337                    Some(name.to_string()),
2338                    format!(
2339                        "`### {type_name}` field `{name}` has an unrecognized modifier `{modifier}`"
2340                    ),
2341                    Some(
2342                        "recognized modifiers are `required`, a shape (`string`/`int`/`bool`/`date`/`email`/`currency`/`url`), `link to <prefix>/`, `default <value>`, `enum: <v1>, <v2>, …`"
2343                            .into(),
2344                    ),
2345                    vec![],
2346                );
2347            }
2348        }
2349    }
2350}
2351
2352/// The `NOT_A_STORE` issue for a root with no `DB.md`.
2353fn not_a_store_issue(store: &Store) -> Issue {
2354    Issue {
2355        severity: Severity::Error,
2356        code: codes::NOT_A_STORE,
2357        file: store.root.clone(),
2358        line: None,
2359        key: None,
2360        message: format!("{} has no DB.md; not a db.md store", store.root.display()),
2361        suggestion: Some("create a `DB.md` at the store root".into()),
2362        related: vec![],
2363    }
2364}
2365
2366/// True if a store-relative path is a content file: under `sources/`,
2367/// `records/`, or `wiki/` and not an `index.md`/`index.jsonl`/`log.md`.
2368fn is_content_file(rel: &Path) -> bool {
2369    let Some(first) = rel.iter().next().and_then(|s| s.to_str()) else {
2370        return false;
2371    };
2372    if !matches!(first, "sources" | "records" | "wiki") {
2373        return false;
2374    }
2375    let name = rel.file_name().and_then(|s| s.to_str()).unwrap_or("");
2376    // Only the derived catalog twins are meta INSIDE a layer. `DB.md` / `log.md`
2377    // are reserved meta only at the store ROOT, which the `first` layer check
2378    // above already excludes — so a content file named `log.md` / `DB.md` inside
2379    // a layer (e.g. `records/docs/log.md`) is real content, consistent with
2380    // `Store::walk`.
2381    if matches!(name, "index.md" | "index.jsonl") {
2382        return false;
2383    }
2384    name.ends_with(".md")
2385}
2386
2387/// True for the store's ROOT append-only meta files (`DB.md` / `log.md`): a
2388/// single-component store-relative path whose name is one of those two. An
2389/// in-layer `records/docs/log.md` is real content (multiple components), not a
2390/// root meta file. These reach `check_content_file` only via the working-set
2391/// incoming-linker scan; their bodies are deliberately not link-checked there
2392/// because `validate --all` doesn't link-check them either.
2393fn is_root_meta_file(rel: &Path) -> bool {
2394    let mut comps = rel.components();
2395    let Some(Component::Normal(only)) = comps.next() else {
2396        return false;
2397    };
2398    if comps.next().is_some() {
2399        return false; // has a parent dir → not a root file
2400    }
2401    matches!(only.to_str(), Some("DB.md") | Some("log.md"))
2402}
2403
2404/// Split a file into `(frontmatter_yaml, body, closing_fence_line)`. The block
2405/// must start at the very first line with `---` and end at the next `---`.
2406/// Returns `None` if there's no leading frontmatter block.
2407fn split_frontmatter(text: &str) -> Option<(String, String, u32)> {
2408    // Tolerate a single leading UTF-8 BOM, matching parser/store/index (which
2409    // already strip it). Without this, a BOM-prefixed file is read as having no
2410    // frontmatter here while the catalog still indexes it — so validate would
2411    // silently skip frontmatter checks on a file the rest of the toolkit sees.
2412    let text = text.strip_prefix('\u{feff}').unwrap_or(text);
2413    let mut lines = text.lines();
2414    let first = lines.next()?;
2415    if first.trim_end() != "---" {
2416        return None;
2417    }
2418    let mut yaml = String::new();
2419    let mut close_line: Option<u32> = None;
2420    // line 1 is the opening fence; YAML starts at line 2.
2421    let mut current = 1u32;
2422    for line in lines {
2423        current += 1;
2424        if line.trim_end() == "---" {
2425            close_line = Some(current);
2426            break;
2427        }
2428        yaml.push_str(line);
2429        yaml.push('\n');
2430    }
2431    let close_line = close_line?;
2432    // Body = everything after the closing fence.
2433    let body: String = text
2434        .lines()
2435        .skip(close_line as usize)
2436        .collect::<Vec<_>>()
2437        .join("\n");
2438    Some((yaml, body, close_line))
2439}
2440
2441/// Read just the `summary` field of a file, or `None` if absent/unparseable.
2442fn read_summary(abs: &Path) -> Option<String> {
2443    let text = std::fs::read_to_string(abs).ok()?;
2444    let (yaml, _, _) = split_frontmatter(&text)?;
2445    let value: Value = serde_norway::from_str(&yaml).ok()?;
2446    if let Value::Mapping(m) = value {
2447        m.get(Value::String("summary".into()))
2448            .and_then(scalar_string)
2449    } else {
2450        None
2451    }
2452}
2453
2454/// Convert a `serde_norway` mapping into a string-keyed [`BTreeMap`], dropping
2455/// non-string keys (frontmatter keys are always strings).
2456fn yaml_map_to_btree(map: &serde_norway::Mapping) -> BTreeMap<String, Value> {
2457    let mut out = BTreeMap::new();
2458    for (k, v) in map {
2459        if let Value::String(s) = k {
2460            out.insert(s.clone(), v.clone());
2461        }
2462    }
2463    out
2464}
2465
2466/// A scalar YAML value as a string (`String`/`Number`/`Bool`); `None` for
2467/// sequences/mappings/null.
2468fn scalar_string(v: &Value) -> Option<String> {
2469    match v {
2470        Value::String(s) => Some(s.clone()),
2471        Value::Number(n) => Some(n.to_string()),
2472        Value::Bool(b) => Some(b.to_string()),
2473        _ => None,
2474    }
2475}
2476
2477/// True if a frontmatter value carries no content for a *required*-field check:
2478/// a YAML `null` (`name:`), an empty sequence (`name: []`), an empty mapping
2479/// (`name: {}`), or a blank/whitespace-only scalar (`name: ""`). A non-empty
2480/// list or mapping is NOT treated as empty here — a structurally-wrong value on
2481/// a shape/enum field is caught by the later non-scalar shape check, not by the
2482/// required-presence check.
2483fn is_empty_value(v: &Value) -> bool {
2484    match v {
2485        Value::Null => true,
2486        Value::Sequence(items) => items.is_empty(),
2487        Value::Mapping(map) => map.is_empty(),
2488        other => scalar_string(other)
2489            .map(|s| s.trim().is_empty())
2490            .unwrap_or(true),
2491    }
2492}
2493
2494/// True if `tags` is a flat YAML sequence of scalars. A mapping, a scalar, or a
2495/// sequence containing a nested sequence/mapping → false (`TAGS_MALFORMED`).
2496fn is_flat_scalar_list(v: &Value) -> bool {
2497    match v {
2498        Value::Sequence(items) => items.iter().all(|it| scalar_string(it).is_some()),
2499        _ => false,
2500    }
2501}
2502
2503/// Extract every frontmatter wiki-link, returning `(key, Link)` pairs with the
2504/// link's 1-based file line. **Text-based, by necessity:** an unquoted
2505/// `company: [[records/companies/x]]` parses in YAML as a nested *sequence*, not
2506/// a string (because `[[x]]` is YAML flow-list-in-a-list); a quoted
2507/// `"[[...]]"` parses as a string. Scanning the raw frontmatter text catches
2508/// both forms uniformly, the way the link textually appears — the doctrine view.
2509///
2510/// `fm_start_line` is the file line of the first YAML line (file line 2, since
2511/// line 1 is the opening `---`), so the returned `Link::line` is absolute.
2512fn frontmatter_link_fields_text(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, Link)> {
2513    let mut out = Vec::new();
2514    for (key, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2515        for link in links {
2516            out.push((key.clone(), link));
2517        }
2518    }
2519    out
2520}
2521
2522/// The wiki-link targets declared under a single top-level frontmatter key
2523/// (text-based; handles quoted + unquoted forms). Empty if the key is absent or
2524/// carries no `[[...]]`.
2525fn frontmatter_links_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Vec<Link> {
2526    for (k, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2527        if k == key {
2528            return links;
2529        }
2530    }
2531    Vec::new()
2532}
2533
2534/// The raw value text under a single top-level frontmatter key (the remainder of
2535/// the key line plus any indented continuation/sequence lines), trimmed. Used to
2536/// decide whether a `link to` field holds a plain string vs. a wiki-link.
2537fn frontmatter_raw_value_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Option<String> {
2538    for (k, value_text, _links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2539        if k == key {
2540            return Some(value_text);
2541        }
2542    }
2543    None
2544}
2545
2546/// Split a frontmatter YAML block into `(key, raw_value_text, wiki_links)` for
2547/// each top-level key. A top-level key is a line with no leading indentation in
2548/// `name:` form; its value spans the rest of that line plus any deeper-indented
2549/// continuation lines (block scalars, block sequences) until the next top-level
2550/// key. Wiki-links are every `[[...]]` found anywhere in that span, with their
2551/// absolute file line.
2552fn frontmatter_key_blocks(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, String, Vec<Link>)> {
2553    let mut blocks: Vec<(String, String, Vec<Link>)> = Vec::new();
2554    let mut current: Option<(String, String, Vec<Link>)> = None;
2555
2556    for (idx, raw_line) in fm_yaml.lines().enumerate() {
2557        let file_line = fm_start_line + idx as u32;
2558        let indented = raw_line.starts_with(' ') || raw_line.starts_with('\t');
2559        let trimmed = raw_line.trim();
2560
2561        // A new top-level key: no indentation, `name:` prefix, not a list dash or
2562        // comment. (Indented or dash lines belong to the current key's value.)
2563        let new_key = if !indented && !trimmed.starts_with('#') && !trimmed.starts_with('-') {
2564            top_level_key(raw_line)
2565        } else {
2566            None
2567        };
2568
2569        if let Some((key, after)) = new_key {
2570            if let Some(done) = current.take() {
2571                blocks.push(done);
2572            }
2573            let mut links = Vec::new();
2574            collect_line_links(after, file_line, &mut links);
2575            current = Some((key, after.trim().to_string(), links));
2576        } else if let Some((_k, value_text, links)) = current.as_mut() {
2577            // Continuation of the current key's value (indented or dash line).
2578            if !value_text.is_empty() {
2579                value_text.push('\n');
2580            }
2581            value_text.push_str(trimmed);
2582            collect_line_links(raw_line, file_line, links);
2583        }
2584    }
2585    if let Some(done) = current.take() {
2586        blocks.push(done);
2587    }
2588    blocks
2589}
2590
2591/// Parse a top-level frontmatter key line into `(key, value_after_colon)`.
2592/// `None` if the line isn't a `name:` mapping entry.
2593fn top_level_key(line: &str) -> Option<(String, &str)> {
2594    let (key, rest) = line.split_once(':')?;
2595    let key = key.trim();
2596    if key.is_empty()
2597        || !key
2598            .chars()
2599            .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
2600    {
2601        return None;
2602    }
2603    Some((key.to_string(), rest))
2604}
2605
2606/// Append every `[[target]]` / `[[target|display]]` found in `s` to `links`,
2607/// each tagged with `file_line`.
2608fn collect_line_links(s: &str, file_line: u32, links: &mut Vec<Link>) {
2609    let bytes = s.as_bytes();
2610    let mut i = 0;
2611    while i + 1 < bytes.len() {
2612        if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2613            if let Some(close) = s[i + 2..].find("]]") {
2614                let inner = &s[i + 2..i + 2 + close];
2615                // Guard against `[[[` (nested) double-counting: the inner must
2616                // not itself open another `[[`.
2617                let target = inner
2618                    .trim_start_matches('[')
2619                    .split('|')
2620                    .next()
2621                    .unwrap_or(inner)
2622                    .trim()
2623                    .to_string();
2624                if !target.is_empty() {
2625                    links.push(Link {
2626                        target,
2627                        line: file_line,
2628                    });
2629                }
2630                i = i + 2 + close + 2;
2631                continue;
2632            }
2633        }
2634        i += 1;
2635    }
2636}
2637
2638/// Extract every `[[...]]` wiki-link from a body, with 1-based line numbers.
2639/// Skips fenced code blocks, so example links in docs don't trip the validator.
2640///
2641/// Fence tracking matches the toolkit's parser ([`crate::parser`]'s
2642/// `extract_sections`): an open fence is `(fence char, run length)` and closes
2643/// only on a line that is the **same** fence character with a run **at least as
2644/// long**. A naive "toggle a bool on any ``` or ~~~ line" inverts the state when
2645/// a `~~~` block legally contains a ```` ``` ```` line (the standard way to
2646/// document a backtick fence) — the inner backtick line would flip `in_fence`
2647/// off and the demo `[[…]]` inside the code block would be checked as a live
2648/// link, falsely flagging a legal store.
2649fn extract_wiki_links(body: &str) -> Vec<Link> {
2650    let mut out = Vec::new();
2651    let mut fence: Option<(u8, usize)> = None;
2652    for (idx, line) in body.lines().enumerate() {
2653        let content = line.trim_end_matches('\r');
2654        if let Some(f) = fence {
2655            // Inside a fence: the only thing that matters is whether THIS line
2656            // closes it (matching char, run ≥ the opening run). Everything else
2657            // is opaque code — no link extraction.
2658            if fence_closes(content, f) {
2659                fence = None;
2660            }
2661            continue;
2662        }
2663        if let Some(opened) = fence_opens(content) {
2664            fence = Some(opened);
2665            continue;
2666        }
2667        let line_no = (idx + 1) as u32;
2668        let bytes = line.as_bytes();
2669        let mut i = 0;
2670        while i + 1 < bytes.len() {
2671            if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2672                if let Some(close) = line[i + 2..].find("]]") {
2673                    let inner = &line[i + 2..i + 2 + close];
2674                    let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
2675                    // Skip a triple-bracket `[[[…` opening: the inner content
2676                    // starts with `[`, so this is the rejected flow-form list
2677                    // mis-encoding (`[[[a]], [[b]]]`), not a real wiki-link. A
2678                    // legitimate target never starts with `[`. The frontmatter
2679                    // `WIKI_LINK_FLOW_FORM_LIST` check already owns that error;
2680                    // extracting a bogus body link here would double-report it as
2681                    // a spurious `WIKI_LINK_SHORT_FORM`.
2682                    if !target.is_empty() && !target.starts_with('[') {
2683                        out.push(Link {
2684                            target,
2685                            line: line_no,
2686                        });
2687                    }
2688                    i = i + 2 + close + 2;
2689                    continue;
2690                }
2691            }
2692            i += 1;
2693        }
2694    }
2695    out
2696}
2697
2698/// If `line` opens a fenced code block, return `(fence byte, run length)`. A
2699/// local mirror of the parser's `opening_fence` so the validator's fence
2700/// tracking matches the rest of the toolkit: a fence is ``` ``` ``` or `~~~`
2701/// (run ≥ 3) at ≤ 3 spaces of indent, and a backtick fence's info string may
2702/// not itself contain a backtick.
2703fn fence_opens(line: &str) -> Option<(u8, usize)> {
2704    let indent = line.len() - line.trim_start_matches(' ').len();
2705    if indent > 3 {
2706        return None;
2707    }
2708    let rest = &line[indent..];
2709    let byte = rest.bytes().next()?;
2710    if byte != b'`' && byte != b'~' {
2711        return None;
2712    }
2713    let run = rest.len() - rest.trim_start_matches(byte as char).len();
2714    if run < 3 {
2715        return None;
2716    }
2717    // A backtick fence's info string may not itself contain a backtick.
2718    if byte == b'`' && rest[run..].contains('`') {
2719        return None;
2720    }
2721    Some((byte, run))
2722}
2723
2724/// True if `line` closes the currently open `fence`: same char, run at least as
2725/// long, nothing but trailing whitespace after. Local mirror of the parser's
2726/// `is_closing_fence` — so an inner fence of the *other* character (a ``` ``` ```
2727/// line inside a `~~~` block) does NOT close the outer fence.
2728fn fence_closes(line: &str, fence: (u8, usize)) -> bool {
2729    let (byte, open_len) = fence;
2730    let indent = line.len() - line.trim_start_matches(' ').len();
2731    if indent > 3 {
2732        return false;
2733    }
2734    let rest = &line[indent..];
2735    let run = rest.len() - rest.trim_start_matches(byte as char).len();
2736    if run < open_len {
2737        return false;
2738    }
2739    rest[run..].trim().is_empty()
2740}
2741
2742/// Detect the frontmatter INLINE flow-form wiki-link-list mis-encoding —
2743/// `attendees: [[[a]], [[b]]]` — and return the offending keys.
2744///
2745/// **Scoped to the inline value on the key line.** The SPEC's canonical
2746/// list-of-links form is the *unquoted YAML block sequence* (`- [[a]]` per
2747/// indented line), which is explicitly correct (SPEC § Linking) and MUST NOT be
2748/// flagged — even though, parsed whole, it nests the same way the rejected
2749/// inline flow form does. So this check looks only at the value written *inline*
2750/// after the colon: if it opens a flow sequence (`[…]`) whose parsed shape is a
2751/// nested sequence (a list whose items are themselves lists — the wiki-link-list
2752/// mis-encoding), it is flagged. A key with no inline value (the block form,
2753/// whose items live on continuation lines) is never inspected here.
2754///
2755/// Parsing the inline value (rather than a literal `starts_with("[[[")` text
2756/// test) is what catches the whitespace variant `attendees: [ [[a]] ]`, which
2757/// encodes the identical nested sequence but evaded the old prefix match.
2758fn detect_flow_form_link_lists(fm_yaml: &str) -> Vec<String> {
2759    let mut out = Vec::new();
2760    for line in fm_yaml.lines() {
2761        // Top-level key lines only (no indentation, not a comment or list dash).
2762        if line.starts_with(' ') || line.starts_with('\t') {
2763            continue;
2764        }
2765        let Some((key, rest)) = line.split_once(':') else {
2766            continue;
2767        };
2768        let key = key.trim();
2769        if key.is_empty()
2770            || key.starts_with('#')
2771            || key.starts_with('-')
2772            || !key
2773                .chars()
2774                .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
2775        {
2776            continue;
2777        }
2778        let rest = rest.trim();
2779        // Only an inline flow sequence (`[…]`) on the key line is a candidate;
2780        // the unquoted block form has an empty inline value and is never flagged.
2781        if !rest.starts_with('[') {
2782            continue;
2783        }
2784        // Parse just the inline value and test its shape: a list whose items are
2785        // themselves lists is the wiki-link-list mis-encoding (`[[[a]]]` parses
2786        // to `Seq[Seq[Seq[String]]]`; the scalar inline link `[[a]]` is only
2787        // `Seq[Seq[String]]` and is NOT flagged).
2788        if let Ok(Value::Sequence(items)) = serde_norway::from_str::<Value>(rest) {
2789            let nested = items.iter().any(|item| match item {
2790                Value::Sequence(inner) => inner.iter().any(|x| matches!(x, Value::Sequence(_))),
2791                _ => false,
2792            });
2793            if nested {
2794                out.push(key.to_string());
2795            }
2796        }
2797    }
2798    out
2799}
2800
2801/// True if a bare target (no `.md`) is a full store-relative path: it contains a
2802/// `/` and its first segment is a known layer.
2803fn is_full_store_path(bare: &str) -> bool {
2804    let mut parts = bare.splitn(2, '/');
2805    let first = parts.next().unwrap_or("");
2806    let has_rest = parts.next().map(|r| !r.is_empty()).unwrap_or(false);
2807    matches!(first, "sources" | "records" | "wiki") && has_rest
2808}
2809
2810/// True if a path contains only normal relative components. Validator inputs
2811/// come from user-authored markdown/JSON sidecars; never let absolute paths,
2812/// platform prefixes, or `..` turn a validation probe into a filesystem escape.
2813fn is_safe_store_relative_path(path: &Path) -> bool {
2814    let mut saw_component = false;
2815    for component in path.components() {
2816        match component {
2817            Component::Normal(_) => saw_component = true,
2818            Component::CurDir => {}
2819            Component::ParentDir | Component::RootDir | Component::Prefix(_) => return false,
2820        }
2821    }
2822    saw_component
2823}
2824
2825fn safe_md_target_rel(bare: &str) -> Option<PathBuf> {
2826    let path = Path::new(bare);
2827    if !is_safe_store_relative_path(path) {
2828        return None;
2829    }
2830    Some(PathBuf::from(format!("{bare}.md")))
2831}
2832
2833/// How a wiki-link / index-entry target resolves on disk.
2834enum TargetResolution {
2835    /// The target exists (either as the literal path or with a `.md` suffix).
2836    Exists,
2837    /// The target is a safe store-relative path but no file exists for it.
2838    Missing,
2839    /// The target escapes the store (absolute, `..`, prefix) — never probe it.
2840    Unsafe,
2841}
2842
2843/// Resolve a bare wiki-link / index-entry target the way the graph engine does
2844/// ([`crate::graph`]'s `resolve_existing`): try the path **as written** first
2845/// (so a link to a raw non-`.md` source file kept verbatim under `sources/` —
2846/// `[[sources/emails/x.eml]]`, `[[sources/contracts/y.pdf]]` — resolves to the
2847/// real file), then the `.md`-appended path (the common case for content
2848/// pages). Without trying the literal path first, a legal link to a raw source
2849/// file is wrongly flagged `WIKI_LINK_BROKEN` even though `graph backlinks`
2850/// resolves it.
2851fn resolve_wiki_target(store: &Store, bare: &str) -> TargetResolution {
2852    // The literal path and the `.md`-appended path share the same safety check
2853    // (`safe_md_target_rel` only differs by appending `.md`), so an unsafe bare
2854    // target is unsafe in both forms.
2855    if !is_safe_store_relative_path(Path::new(bare)) {
2856        return TargetResolution::Unsafe;
2857    }
2858    match resolved_target_abs(store, bare) {
2859        Some(_) => TargetResolution::Exists,
2860        None => TargetResolution::Missing,
2861    }
2862}
2863
2864/// The absolute on-disk path a bare wiki-link / index-entry target resolves to,
2865/// trying the literal path first, then `.md`-appended — mirroring the graph
2866/// engine. `None` when neither exists, or when the bare target escapes the store
2867/// (callers that need to distinguish unsafe from merely-missing use
2868/// [`resolve_wiki_target`]).
2869fn resolved_target_abs(store: &Store, bare: &str) -> Option<PathBuf> {
2870    if !is_safe_store_relative_path(Path::new(bare)) {
2871        return None;
2872    }
2873    // The literal path, as written (e.g. an `.eml`/`.pdf` source file kept
2874    // verbatim under `sources/`).
2875    let literal = store.root.join(bare);
2876    if literal.is_file() {
2877        return Some(literal);
2878    }
2879    // The `.md`-appended path (a content page referenced without its extension).
2880    let with_md = store.root.join(format!("{bare}.md"));
2881    if with_md.is_file() {
2882        return Some(with_md);
2883    }
2884    None
2885}
2886
2887/// True if a bare target path is under `prefix` (both `.md`-stripped).
2888fn path_under_prefix(bare: &str, prefix: &str) -> bool {
2889    let prefix = prefix.trim_end_matches('/');
2890    bare == prefix || bare.starts_with(&format!("{prefix}/"))
2891}
2892
2893/// The type-folder for a store-relative content path: `<layer>/<type-folder>`
2894/// (the folder directly under the layer; date-shards roll up to it). `None` for
2895/// files directly in a layer folder or outside the three layers.
2896fn type_folder_of(rel: &Path) -> Option<PathBuf> {
2897    let comps: Vec<&str> = rel.iter().filter_map(|s| s.to_str()).collect();
2898    if comps.len() < 3 {
2899        return None; // need layer/type-folder/file at minimum
2900    }
2901    if !matches!(comps[0], "sources" | "records" | "wiki") {
2902        return None;
2903    }
2904    Some(PathBuf::from(comps[0]).join(comps[1]))
2905}
2906
2907/// **SWEEP.** Walk every `.md` content file under `sources/`/`records/`/`wiki/`,
2908/// returning store-relative paths to be parsed in full. Skips hidden dirs and
2909/// the index twin (`index.jsonl`). Used only by `validate_all`; the working-set
2910/// incoming-linker scan rides the embedded-ripgrep `Store::find_links_to_any`
2911/// (a single presence-only pass), so the loop default never walks-and-*parses*
2912/// the whole content tree.
2913///
2914/// **`log/` is NOT pruned here.** Only the *root-level* `log/` rotation archive
2915/// is reserved (`Store::is_in_log_dir` checks only the first path component);
2916/// the walk roots are the three layers, so the root archive is already out of
2917/// scope. A `log`-named folder *inside* a layer (e.g. `records/log/` — a
2918/// decision log) is real content (see `is_content_file`), so pruning every
2919/// `name == "log"` made `--all` silently skip those files — reporting fewer
2920/// errors than the default working-set scope on the same store.
2921fn walk_content_files(root: &Path) -> Vec<PathBuf> {
2922    let mut out = Vec::new();
2923    for layer in ["sources", "records", "wiki"] {
2924        let base = root.join(layer);
2925        if !base.is_dir() {
2926            continue;
2927        }
2928        for entry in walkdir::WalkDir::new(&base)
2929            .into_iter()
2930            .filter_entry(|e| {
2931                let name = e.file_name().to_str().unwrap_or("");
2932                !name.starts_with('.')
2933            })
2934            .flatten()
2935        {
2936            if !entry.file_type().is_file() {
2937                continue;
2938            }
2939            let name = entry.file_name().to_str().unwrap_or("");
2940            if name.ends_with(".md") && name != "index.md" {
2941                if let Ok(rel) = entry.path().strip_prefix(root) {
2942                    out.push(rel.to_path_buf());
2943                }
2944            }
2945        }
2946    }
2947    out.sort();
2948    out
2949}
2950
2951/// Every `index.md` under the store (root + layers + type-folders), as
2952/// store-relative paths. Used to detect orphan indexes. Like
2953/// [`walk_content_files`], a `log`-named folder *inside* a layer is real content
2954/// and its `index.md` is not pruned (only the root-level `log/` archive is
2955/// reserved, and the walk roots are the three layers, so it is already
2956/// out of scope).
2957fn walk_index_files(root: &Path) -> Vec<PathBuf> {
2958    let mut out = Vec::new();
2959    if root.join("index.md").is_file() {
2960        out.push(PathBuf::from("index.md"));
2961    }
2962    for layer in ["sources", "records", "wiki"] {
2963        let base = root.join(layer);
2964        if !base.is_dir() {
2965            continue;
2966        }
2967        for entry in walkdir::WalkDir::new(&base)
2968            .into_iter()
2969            .filter_entry(|e| {
2970                let name = e.file_name().to_str().unwrap_or("");
2971                !name.starts_with('.')
2972            })
2973            .flatten()
2974        {
2975            if entry.file_type().is_file() && entry.file_name().to_str() == Some("index.md") {
2976                if let Ok(rel) = entry.path().strip_prefix(root) {
2977                    out.push(rel.to_path_buf());
2978                }
2979            }
2980        }
2981    }
2982    out.sort();
2983    out
2984}
2985
2986/// A parsed `index.md` entry line: the wiki-link target, the optional summary
2987/// text after the `—`, and the 1-based line number.
2988struct IndexEntry {
2989    target: String,
2990    summary_text: Option<String>,
2991    line: u32,
2992}
2993
2994/// Parse the `- [[<path>]] — <summary>` entry lines of an `index.md`. Stops at a
2995/// `## More` footer (those lines aren't file entries). Root/layer entries with a
2996/// `|display` segment and a `(N)` count are parsed too — the target is the bare
2997/// path, the summary text is whatever follows the em dash.
2998fn parse_index_entries(text: &str) -> Vec<IndexEntry> {
2999    let mut out = Vec::new();
3000    let mut in_more = false;
3001    for (idx, line) in text.lines().enumerate() {
3002        let trimmed = line.trim_start();
3003        if trimmed.starts_with("## More") {
3004            in_more = true;
3005            continue;
3006        }
3007        if in_more {
3008            continue;
3009        }
3010        if !trimmed.starts_with("- ") {
3011            continue;
3012        }
3013        // Find the first `[[...]]`.
3014        let Some(open) = trimmed.find("[[") else {
3015            continue;
3016        };
3017        let Some(close_rel) = trimmed[open + 2..].find("]]") else {
3018            continue;
3019        };
3020        let inner = &trimmed[open + 2..open + 2 + close_rel];
3021        let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
3022
3023        // Summary text: whatever follows the first em dash (`—`) or ` - `.
3024        let after = &trimmed[open + 2 + close_rel + 2..];
3025        let summary_text = extract_index_entry_summary(after);
3026
3027        out.push(IndexEntry {
3028            target,
3029            summary_text,
3030            line: (idx + 1) as u32,
3031        });
3032    }
3033    out
3034}
3035
3036/// Pull the summary portion out of the text trailing an index entry's
3037/// wiki-link: drop a leading `(N files)` count, then the `—`/`-` separator, then
3038/// strip a trailing `  ·  #tag` suffix **only when it is a genuine tag block**
3039/// (so a literal `·` inside the summary text is preserved, not mistaken for the
3040/// renderer's tag separator).
3041fn extract_index_entry_summary(after: &str) -> Option<String> {
3042    let mut s = after.trim();
3043    // Drop a leading "(N ...)" count segment, if present.
3044    if s.starts_with('(') {
3045        if let Some(close) = s.find(')') {
3046            s = s[close + 1..].trim_start();
3047        }
3048    }
3049    // Require an em dash or hyphen separator before the summary.
3050    let s = if let Some(rest) = s.strip_prefix('—') {
3051        rest.trim()
3052    } else if let Some(rest) = s.strip_prefix('-') {
3053        rest.trim()
3054    } else {
3055        return None;
3056    };
3057    if s.is_empty() {
3058        return None;
3059    }
3060    // Strip a trailing tag block — but ONLY when it matches the EXACT delimiter
3061    // the renderer emits: `  ·  #tag #tag` (a *double*-spaced middot, per
3062    // `crate::index::format_md_entry`'s `format!("  ·  {tags}")`), dropped when
3063    // the file has no tags. The previous code also accepted a *single*-spaced
3064    // ` · ` separator, which collided with a legal summary whose own text ends
3065    // in a single-spaced middot-plus-hashtag tail — e.g. a tagless file with
3066    // `summary: "Standup notes · #standup"`. The renderer round-trips that
3067    // summary verbatim (no tag block, since there are no tags), but the loose
3068    // strip mistook the ` · #standup` for the renderer's tag suffix, compared
3069    // `"Standup notes"` against the file's full summary, and emitted a spurious
3070    // `INDEX_SUMMARY_MISMATCH` that `dbmd index rebuild` could never fix
3071    // (rebuild regenerates the identical line). Matching the renderer's exact
3072    // double-spaced delimiter makes the comparison round-trip. `rsplit_once`
3073    // matches from the right so only the real trailing tag block is considered.
3074    let s = match s.rsplit_once("  ·  ") {
3075        Some((summary, tags)) if is_tag_suffix(tags) => summary.trim(),
3076        _ => s,
3077    };
3078    Some(s.to_string())
3079}
3080
3081/// True if `s` is a non-empty tag block: one or more whitespace-separated tokens
3082/// each starting with `#`, the exact shape the index renderer appends after the
3083/// `·` separator (`crate::index::format_md_entry`). Used to distinguish the
3084/// renderer's `  ·  #tag` suffix from a literal `·` inside the summary text.
3085fn is_tag_suffix(s: &str) -> bool {
3086    let mut any = false;
3087    for tok in s.split_whitespace() {
3088        if !tok.starts_with('#') || tok.len() < 2 {
3089            return false;
3090        }
3091        any = true;
3092    }
3093    any
3094}
3095
3096/// Parse a `log.md` entry header `## [YYYY-MM-DD HH:MM] <kind> | <object>`.
3097/// Returns `(timestamp, kind, object)`; `None` if the timestamp is unparseable
3098/// or the header isn't well-formed.
3099fn parse_log_header(line: &str) -> Option<(DateTime<FixedOffset>, String, Option<String>)> {
3100    let rest = line.strip_prefix("## [")?;
3101    let close = rest.find(']')?;
3102    let ts_str = &rest[..close];
3103    let tail = rest[close + 1..].trim();
3104
3105    // Parse `YYYY-MM-DD HH:MM` (the SPEC header form) as a naive local time and
3106    // attach a zero offset — the log header carries minute precision, no zone.
3107    let naive = NaiveDateTime::parse_from_str(ts_str.trim(), "%Y-%m-%d %H:%M").ok()?;
3108    let offset = FixedOffset::east_opt(0)?;
3109    let ts = naive.and_local_timezone(offset).single()?;
3110
3111    // kind | object
3112    let (kind, object) = match tail.split_once('|') {
3113        Some((k, o)) => {
3114            let o = o.trim();
3115            (
3116                k.trim().to_string(),
3117                if o.is_empty() {
3118                    None
3119                } else {
3120                    Some(o.to_string())
3121                },
3122            )
3123        }
3124        None => (tail.to_string(), None),
3125    };
3126    if kind.is_empty() {
3127        return None;
3128    }
3129    Some((ts, kind, object))
3130}
3131
3132/// Every log file that holds entries for the working-set scan: the active
3133/// `log.md` plus every `log/<YYYY-MM>.md` archive. [`Log::append`] rotates
3134/// strictly-prior-month entries into the archives, so the active file alone is
3135/// NOT the full timeline — both the last `validate` cutoff and a changed-but-
3136/// unvalidated object can live in an archive after a month rollover. Reading the
3137/// archives here keeps the working-set readers in sync with the rest of the log
3138/// layer (`Log::since`/`Log::tail`), which deliberately cross archives, and
3139/// prevents `dbmd validate` from silently skipping archived changed files. Reads
3140/// only log headers, never the content store, so the loop budget is preserved.
3141fn log_files_for_working_set(store: &Store) -> Vec<PathBuf> {
3142    let mut files = vec![store.root.join("log.md")];
3143    let archive_dir = store.root.join("log");
3144    if let Ok(entries) = std::fs::read_dir(&archive_dir) {
3145        let mut archives: Vec<PathBuf> = entries
3146            .flatten()
3147            .map(|e| e.path())
3148            .filter(|p| {
3149                p.is_file()
3150                    && p.file_name()
3151                        .and_then(|s| s.to_str())
3152                        .and_then(|n| n.strip_suffix(".md"))
3153                        .is_some_and(is_year_month_archive)
3154            })
3155            .collect();
3156        // Deterministic order (oldest month first); the callers fold across all
3157        // files so order doesn't affect the result, but a stable order keeps the
3158        // scan reproducible.
3159        archives.sort();
3160        files.extend(archives);
3161    }
3162    files
3163}
3164
3165/// True if `s` looks like a `YYYY-MM` archive stem (4 digits, `-`, 2 digits) —
3166/// the `log/<YYYY-MM>.md` naming the rotation in [`crate::log`] emits.
3167fn is_year_month_archive(s: &str) -> bool {
3168    let b = s.as_bytes();
3169    b.len() == 7
3170        && b[..4].iter().all(u8::is_ascii_digit)
3171        && b[4] == b'-'
3172        && b[5..7].iter().all(u8::is_ascii_digit)
3173}
3174
3175/// The timestamp of the most recent `validate` entry across the active `log.md`
3176/// **and** the `log/<YYYY-MM>.md` archives — the default working-set cutoff.
3177/// Reads only headers; never the whole store. Archive-aware so a `validate`
3178/// entry that rotated into an archive after a month rollover still anchors the
3179/// cutoff (without this, the cutoff silently resets to `None`).
3180fn last_validate_at(store: &Store) -> Option<DateTime<FixedOffset>> {
3181    let mut latest: Option<DateTime<FixedOffset>> = None;
3182    for file in log_files_for_working_set(store) {
3183        let Ok(text) = std::fs::read_to_string(&file) else {
3184            continue;
3185        };
3186        for line in text.lines() {
3187            if !line.starts_with("## [") {
3188                continue;
3189            }
3190            if let Some((ts, kind, _)) = parse_log_header(line) {
3191                if kind == "validate" {
3192                    latest = Some(match latest {
3193                        Some(p) if p >= ts => p,
3194                        _ => ts,
3195                    });
3196                }
3197            }
3198        }
3199    }
3200    latest
3201}
3202
3203/// The set of content objects changed since `cutoff`, read from log entries
3204/// whose kind mutates a file. When `cutoff` is `None`, every mutating entry
3205/// counts (no prior validate window). Returns store-relative `.md` paths.
3206///
3207/// Scans the active `log.md` **and** every `log/<YYYY-MM>.md` archive: after a
3208/// month rollover [`Log::append`] rotates prior-month entries out of the active
3209/// file, so an object changed-but-never-validated in a prior month lives only in
3210/// an archive. Reading the archives here is what keeps `dbmd validate` from
3211/// silently skipping those files. Reads only log headers, never the content
3212/// store.
3213fn changed_objects_since(
3214    store: &Store,
3215    cutoff: Option<DateTime<FixedOffset>>,
3216) -> BTreeSet<PathBuf> {
3217    let mut out = BTreeSet::new();
3218    for file in log_files_for_working_set(store) {
3219        let Ok(text) = std::fs::read_to_string(&file) else {
3220            continue;
3221        };
3222        for line in text.lines() {
3223            if !line.starts_with("## [") {
3224                continue;
3225            }
3226            let Some((ts, kind, object)) = parse_log_header(line) else {
3227                continue;
3228            };
3229            if let Some(c) = cutoff {
3230                if ts < c {
3231                    continue;
3232                }
3233            }
3234            if !matches!(
3235                kind.as_str(),
3236                "create" | "update" | "ingest" | "rename" | "delete" | "link"
3237            ) {
3238                continue;
3239            }
3240            if let Some(obj) = object {
3241                // The object slot is a store-relative path (or a wiki-link target).
3242                let bare = obj
3243                    .trim()
3244                    .trim_start_matches("[[")
3245                    .trim_end_matches("]]")
3246                    .split('|')
3247                    .next()
3248                    .unwrap_or("")
3249                    .trim()
3250                    .trim_end_matches(".md")
3251                    .to_string();
3252                if bare.is_empty() {
3253                    continue;
3254                }
3255                out.insert(PathBuf::from(format!("{bare}.md")));
3256            }
3257        }
3258    }
3259    out
3260}
3261
3262/// The result of the [`derived_from_ignored_type`] policy check: the
3263/// `derived_from` target that resolves to an ignored-type record, plus that
3264/// record's type. Carries exactly what both the validate finding and the
3265/// write-time warning need to render their message.
3266#[derive(Debug, Clone, PartialEq, Eq)]
3267pub struct DerivedFromIgnored {
3268    /// The `derived_from` wiki-link target as written (bare store-relative path,
3269    /// no `.md`).
3270    pub target: String,
3271    /// The resolved `type` of that target, which is present in
3272    /// `store.config.ignored_types`.
3273    pub target_type: String,
3274}
3275
3276/// **The single authoritative `### Ignored types` derivation check.** Decides
3277/// whether a `wiki-page` derives from an ignored-type record: the type must be
3278/// `wiki-page`, `### Ignored types` must be non-empty, and some `derived_from`
3279/// target must resolve to a record whose `type` is in `ignored_types`. Returns
3280/// the first such target (and its type), or `None`.
3281///
3282/// Both surfaces call this so the policy lives in exactly one place:
3283/// [`check_content_file`] (read side — `dbmd validate`) feeds it the
3284/// `derived_from` targets it scanned from the raw frontmatter, and the write
3285/// surface (`dbmd write`) feeds it the targets from the composed frontmatter.
3286/// The link *extraction* differs per surface (text-scan with line numbers vs.
3287/// the parsed `Frontmatter`); the *decision* — type gate, target-type
3288/// resolution, and `ignored_types` membership — does not.
3289pub fn derived_from_ignored_type<I, S>(
3290    store: &Store,
3291    type_: &str,
3292    derived_from_targets: I,
3293) -> Option<DerivedFromIgnored>
3294where
3295    I: IntoIterator<Item = S>,
3296    S: AsRef<str>,
3297{
3298    if type_ != "wiki-page" || store.config.ignored_types.is_empty() {
3299        return None;
3300    }
3301    for target in derived_from_targets {
3302        let target = target.as_ref();
3303        if let Some(target_type) = link_target_type(store, target) {
3304            if store.config.ignored_types.contains(&target_type) {
3305                return Some(DerivedFromIgnored {
3306                    target: target.to_string(),
3307                    target_type,
3308                });
3309            }
3310        }
3311    }
3312    None
3313}
3314
3315/// Resolve the `type` of a wiki-link target file (bare, no `.md`), or `None`.
3316fn link_target_type(store: &Store, target: &str) -> Option<String> {
3317    let bare = target.trim_end_matches(".md");
3318    let abs = store.root.join(safe_md_target_rel(bare)?);
3319    let text = std::fs::read_to_string(&abs).ok()?;
3320    let (yaml, _, _) = split_frontmatter(&text)?;
3321    let value: Value = serde_norway::from_str(&yaml).ok()?;
3322    if let Value::Mapping(m) = value {
3323        m.get(Value::String("type".into())).and_then(scalar_string)
3324    } else {
3325        None
3326    }
3327}
3328
3329// ── Shape validators ─────────────────────────────────────────────────────────
3330
3331/// True if a string is RFC3339 / ISO-8601 with a time + zone (the
3332/// `created`/`updated` contract: `2026-05-27T08:00:00-07:00`).
3333fn is_iso8601(s: &str) -> bool {
3334    DateTime::parse_from_rfc3339(s.trim()).is_ok()
3335}
3336
3337/// True if a string is an ISO-8601 *date* (`2026-05-27`) or a full RFC3339
3338/// datetime. Type-specific date fields (`expense.date`, `contact.last_touch`)
3339/// accept the date-only form per the SPEC's worked example.
3340fn is_iso8601_date_or_datetime(s: &str) -> bool {
3341    let s = s.trim();
3342    if DateTime::parse_from_rfc3339(s).is_ok() {
3343        return true;
3344    }
3345    chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_ok()
3346}
3347
3348/// True for `<local>@<domain>` with a non-empty local part and a dotted domain.
3349/// There must be exactly one `@`: a domain that still contains an `@` after the
3350/// split (the common double-`@` typo `sarah@@acme.com`, or `a@b@c.com`) is
3351/// rejected — without this the domain `@acme.com` passed every other check.
3352fn is_email(s: &str) -> bool {
3353    let s = s.trim();
3354    let Some((local, domain)) = s.split_once('@') else {
3355        return false;
3356    };
3357    !local.is_empty()
3358        && !domain.contains('@')
3359        && domain.contains('.')
3360        && !domain.starts_with('.')
3361        && !domain.ends_with('.')
3362        && !domain.contains(' ')
3363        && !local.contains(' ')
3364}
3365
3366/// True for a currency amount: an optional symbol or 3-letter ISO code, then a
3367/// plain decimal number with optional thousands separators and ≤ 2 decimals.
3368///
3369/// The numeric part is validated by hand (not `f64::parse`) so the non-numeric
3370/// floats `f64` accepts — `inf`, `-inf`, `NaN`, and `1e3`-style exponents — are
3371/// rejected, and the ≤ 2-decimal rule is actually enforced.
3372fn is_currency(s: &str) -> bool {
3373    let mut t = s.trim();
3374    // Strip a leading currency symbol …
3375    for sym in ["$", "€", "£", "¥"] {
3376        if let Some(rest) = t.strip_prefix(sym) {
3377            t = rest.trim_start();
3378            break;
3379        }
3380    }
3381    // … or a leading 3-letter ISO-4217-ish code (`USD 100`, `EUR 9.50`). The
3382    // code must be exactly three ASCII letters and separated from the number by
3383    // whitespace, so a bare `USD` with no amount still fails.
3384    if let Some((head, rest)) = t.split_once(char::is_whitespace) {
3385        if head.len() == 3 && head.chars().all(|c| c.is_ascii_alphabetic()) {
3386            t = rest.trim_start();
3387        }
3388    }
3389
3390    let cleaned: String = t.chars().filter(|c| *c != ',').collect();
3391    is_plain_amount(cleaned.trim())
3392}
3393
3394/// True for a bare decimal amount: optional sign, ≥ 1 digit, an optional
3395/// fractional part of 1–2 digits. No exponents, no `inf`/`NaN`, no empty string.
3396fn is_plain_amount(s: &str) -> bool {
3397    let digits = s.strip_prefix(['+', '-']).unwrap_or(s);
3398    let (int_part, frac_part) = match digits.split_once('.') {
3399        Some((i, f)) => (i, Some(f)),
3400        None => (digits, None),
3401    };
3402    if int_part.is_empty() || !int_part.bytes().all(|b| b.is_ascii_digit()) {
3403        return false;
3404    }
3405    match frac_part {
3406        None => true,
3407        Some(f) => (1..=2).contains(&f.len()) && f.bytes().all(|b| b.is_ascii_digit()),
3408    }
3409}
3410
3411/// True for an http(s) URL: a recognized scheme prefix with at least one
3412/// character after it. The length guard uses the *matched* scheme's own length,
3413/// so a single-character host on the shorter `http://` scheme (`http://x`, 8
3414/// bytes — e.g. an intranet/container hostname) is accepted; a bare scheme with
3415/// nothing after it (`http://`, `https://`) is rejected.
3416fn is_url(s: &str) -> bool {
3417    let s = s.trim();
3418    for scheme in ["http://", "https://"] {
3419        if let Some(rest) = s.strip_prefix(scheme) {
3420            return !rest.is_empty();
3421        }
3422    }
3423    false
3424}
3425
3426/// A short, deterministic suggestion for a `SCHEMA_SHAPE_MISMATCH`.
3427fn shape_suggestion(shape: Shape) -> String {
3428    match shape {
3429        Shape::String => "use a scalar string".into(),
3430        Shape::Int => "use an integer".into(),
3431        Shape::Bool => "use `true` or `false`".into(),
3432        Shape::Date => "use an ISO-8601 date, e.g. 2026-05-27".into(),
3433        Shape::Email => "use a `<local>@<domain>` address".into(),
3434        Shape::Currency => "use a numeric amount, e.g. 1234.56".into(),
3435        Shape::Url => "use an http(s) URL".into(),
3436    }
3437}
3438
3439/// Suggest a full-path rewrite for a short-form wiki-link. Without the layer we
3440/// can't know the folder, so the suggestion is generic but actionable.
3441fn short_form_suggestion(bare: &str) -> Option<String> {
3442    Some(format!(
3443        "use a full store-relative path, e.g. [[records/contacts/{}]]",
3444        slugish(bare)
3445    ))
3446}
3447
3448/// A filesystem-ish leaf for a plain string (lowercase, spaces → hyphens).
3449fn slugish(s: &str) -> String {
3450    s.trim()
3451        .to_lowercase()
3452        .chars()
3453        .map(|c| if c.is_whitespace() { '-' } else { c })
3454        .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '/' || *c == '_')
3455        .collect()
3456}
3457
3458/// Push a fully-formed [`Issue`].
3459#[allow(clippy::too_many_arguments)]
3460fn push(
3461    issues: &mut Vec<Issue>,
3462    severity: Severity,
3463    code: &'static str,
3464    file: &Path,
3465    line: Option<u32>,
3466    key: Option<String>,
3467    message: String,
3468    suggestion: Option<String>,
3469    related: Vec<PathBuf>,
3470) {
3471    issues.push(Issue {
3472        severity,
3473        code,
3474        file: file.to_path_buf(),
3475        line,
3476        key,
3477        message,
3478        suggestion,
3479        related,
3480    });
3481}
3482
3483/// 1-based line of a top-level frontmatter key inside the YAML block, offset to
3484/// the file (the YAML starts at file line 2). `None` if not found.
3485fn fm_key_line(fm_yaml: &str, key: &str) -> Option<u32> {
3486    for (i, line) in fm_yaml.lines().enumerate() {
3487        let trimmed = line.trim_start();
3488        // A top-level key line: `key:` with no leading list dash.
3489        if let Some(rest) = trimmed.strip_prefix(key) {
3490            if rest.starts_with(':') && line.starts_with(key) {
3491                // +2: file line 1 is the opening `---`, YAML line 0 → file line 2.
3492                return Some((i as u32) + 2);
3493            }
3494        }
3495    }
3496    None
3497}
3498
3499/// The line a *field-absence* issue (a required key that is missing entirely)
3500/// anchors to: the key's line when present, else line `1` — the frontmatter
3501/// block's opening `---`. A missing key has no line of its own; anchoring it to
3502/// the block top gives the agent (and the `EXPECTED` golden) a stable, non-null
3503/// line to point at instead of an unhelpful `null`.
3504fn fm_key_line_or_top(fm_yaml: &str, key: &str) -> Option<u32> {
3505    fm_key_line(fm_yaml, key).or(Some(1))
3506}
3507
3508/// A stable sort order for issues: by file, then line, then code. Keeps `--json`
3509/// output deterministic across runs.
3510fn issue_order(a: &Issue, b: &Issue) -> std::cmp::Ordering {
3511    a.file
3512        .cmp(&b.file)
3513        .then(a.line.cmp(&b.line))
3514        .then(a.code.cmp(b.code))
3515        .then(a.key.cmp(&b.key))
3516}
3517
3518// ═════════════════════════════════════════════════════════════════════════════
3519//  Tests
3520// ═════════════════════════════════════════════════════════════════════════════
3521
3522#[cfg(test)]
3523mod tests {
3524    use super::*;
3525    use crate::parser::{Config, FieldSpec};
3526    use std::fs;
3527    use tempfile::TempDir;
3528
3529    #[test]
3530    fn split_frontmatter_tolerates_leading_bom() {
3531        // Regression (finding #19 cross-module): a UTF-8 BOM before the opening
3532        // fence must not make validate treat the file as frontmatter-less while
3533        // the catalog indexes it. Pre-fix `first.trim_end() != "---"` was true
3534        // for `\u{feff}---` and the function returned None.
3535        let text = "\u{feff}---\ntype: contact\nsummary: hi\n---\nbody\n";
3536        let parsed = split_frontmatter(text);
3537        assert!(
3538            parsed.is_some(),
3539            "a leading BOM must not hide frontmatter from validate"
3540        );
3541        let (yaml, body, close_line) = parsed.unwrap();
3542        assert_eq!(yaml, "type: contact\nsummary: hi\n");
3543        assert_eq!(body, "body");
3544        assert_eq!(close_line, 4, "BOM is inline on line 1, not a new line");
3545    }
3546
3547    /// A test store builder over a real tempdir. Every helper writes real files
3548    /// so the assertions exercise real behavior, not mocks.
3549    struct Fixture {
3550        dir: TempDir,
3551        config: Config,
3552    }
3553
3554    impl Fixture {
3555        /// A fresh store with a **valid** `DB.md` (the identity contract:
3556        /// `type: db-md` + `scope` + `owner`) and the three layer dirs. A valid
3557        /// DB.md keeps `check_db_md` silent so a "clean store" fixture is truly
3558        /// clean; tests that want a broken DB.md write their own via `write`.
3559        fn new() -> Self {
3560            let dir = TempDir::new().unwrap();
3561            fs::write(
3562                dir.path().join("DB.md"),
3563                "---\ntype: db-md\nscope: company\nowner: Test\n---\n",
3564            )
3565            .unwrap();
3566            for layer in ["sources", "records", "wiki"] {
3567                fs::create_dir_all(dir.path().join(layer)).unwrap();
3568            }
3569            Fixture {
3570                dir,
3571                config: Config::default(),
3572            }
3573        }
3574
3575        /// A store with no `DB.md` marker.
3576        fn bare() -> Self {
3577            let dir = TempDir::new().unwrap();
3578            Fixture {
3579                dir,
3580                config: Config::default(),
3581            }
3582        }
3583
3584        /// Write a file at a store-relative path, creating parent dirs.
3585        fn write(&self, rel: &str, contents: &str) {
3586            let abs = self.dir.path().join(rel);
3587            fs::create_dir_all(abs.parent().unwrap()).unwrap();
3588            fs::write(abs, contents).unwrap();
3589        }
3590
3591        fn store(&self) -> Store {
3592            Store {
3593                root: self.dir.path().to_path_buf(),
3594                config: self.config.clone(),
3595            }
3596        }
3597
3598        fn store_all(&self) -> Vec<Issue> {
3599            validate_all(&self.store()).unwrap()
3600        }
3601
3602        /// Write the canonical `index.md` + `index.jsonl` at every level via the
3603        /// real builder ([`crate::index::Index::rebuild_all`]) — the same
3604        /// projection a `dbmd index rebuild` produces. Use this (rather than a
3605        /// hand-typed sidecar line) whenever a test asserts a *clean* store, so
3606        /// the sidecar carries the COMPLETE per-field projection and the fixture
3607        /// can't silently drift from what the index writer emits.
3608        fn rebuild_indexes(&self) {
3609            crate::index::Index::rebuild_all(&self.store()).unwrap();
3610        }
3611    }
3612
3613    /// True if any issue has this code.
3614    fn has(issues: &[Issue], code: &str) -> bool {
3615        issues.iter().any(|i| i.code == code)
3616    }
3617
3618    /// Count issues with a code.
3619    fn count(issues: &[Issue], code: &str) -> usize {
3620        issues.iter().filter(|i| i.code == code).count()
3621    }
3622
3623    /// The first issue with a code, or panic.
3624    fn find<'a>(issues: &'a [Issue], code: &str) -> &'a Issue {
3625        issues
3626            .iter()
3627            .find(|i| i.code == code)
3628            .unwrap_or_else(|| panic!("expected an issue with code {code}; got {issues:#?}"))
3629    }
3630
3631    /// A minimal valid `contact` body for reuse.
3632    fn valid_contact(summary: &str) -> String {
3633        format!(
3634            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{summary}\"\nname: A\n---\n\n# A\n"
3635        )
3636    }
3637
3638    // ── store marker ──────────────────────────────────────────────────────────
3639
3640    #[test]
3641    fn not_a_store_when_db_md_absent() {
3642        let fx = Fixture::bare();
3643        let issues = fx.store_all();
3644        assert_eq!(issues.len(), 1, "only NOT_A_STORE expected: {issues:#?}");
3645        assert_eq!(issues[0].code, codes::NOT_A_STORE);
3646        assert!(issues[0].is_error());
3647    }
3648
3649    #[test]
3650    fn working_set_also_reports_not_a_store() {
3651        let fx = Fixture::bare();
3652        let issues = validate_working_set(&fx.store(), None).unwrap();
3653        assert!(has(&issues, codes::NOT_A_STORE));
3654    }
3655
3656    #[test]
3657    fn clean_store_has_no_issues() {
3658        let fx = Fixture::new();
3659        fx.write("records/contacts/a.md", &valid_contact("A contact"));
3660        // Build the canonical indexes (complete per-field jsonl included) the
3661        // same way `dbmd index rebuild` does, so a freshly-rebuilt store is
3662        // proven clean across every projected field, not just summary/type.
3663        fx.rebuild_indexes();
3664        let issues = fx.store_all();
3665        assert!(
3666            issues.is_empty(),
3667            "expected a clean store, got: {issues:#?}"
3668        );
3669    }
3670
3671    // ── DB.md structure ───────────────────────────────────────────────────────
3672
3673    /// The `Fixture::new` DB.md is valid → no `DB_MD_*` issue. This pins the
3674    /// "valid identity file is silent" half (a bug that flagged a valid DB.md
3675    /// would fail here).
3676    #[test]
3677    fn valid_db_md_emits_no_structure_issue() {
3678        let fx = Fixture::new();
3679        let issues = fx.store_all();
3680        assert!(
3681            !has(&issues, codes::DB_MD_BAD_TYPE)
3682                && !has(&issues, codes::DB_MD_MISSING_FIELD)
3683                && !has(&issues, codes::DB_MD_UNKNOWN_SECTION),
3684            "a valid DB.md (type: db-md + scope + owner, recognized sections) is silent: {issues:#?}"
3685        );
3686    }
3687
3688    /// A DB.md whose `type:` isn't `db-md` → `DB_MD_BAD_TYPE`, keyed on `type`,
3689    /// anchored to the `type:` line (file line 2). Failing to read the type, or
3690    /// accepting a non-`db-md` type, breaks this.
3691    #[test]
3692    fn db_md_wrong_type_is_error() {
3693        let fx = Fixture::new();
3694        fx.write("DB.md", "---\ntype: notes\nscope: company\nowner: T\n---\n");
3695        let issues = fx.store_all();
3696        let i = find(&issues, codes::DB_MD_BAD_TYPE);
3697        assert!(i.is_error());
3698        assert_eq!(i.file, PathBuf::from("DB.md"));
3699        assert_eq!(i.key.as_deref(), Some("type"));
3700        assert_eq!(i.line, Some(2), "anchors to the `type:` line");
3701    }
3702
3703    /// A DB.md missing `scope` and `owner` → one `DB_MD_MISSING_FIELD` per
3704    /// absent field, each keyed on its field name, anchored to the block top.
3705    #[test]
3706    fn db_md_missing_scope_and_owner_each_report() {
3707        let fx = Fixture::new();
3708        fx.write("DB.md", "---\ntype: db-md\n---\n");
3709        let issues = fx.store_all();
3710        assert_eq!(
3711            count(&issues, codes::DB_MD_MISSING_FIELD),
3712            2,
3713            "both scope and owner absent → two issues: {issues:#?}"
3714        );
3715        let keys: BTreeSet<Option<String>> = issues
3716            .iter()
3717            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
3718            .map(|i| i.key.clone())
3719            .collect();
3720        assert_eq!(
3721            keys,
3722            BTreeSet::from([Some("scope".to_string()), Some("owner".to_string())]),
3723            "one issue keyed on each missing field"
3724        );
3725        for i in issues
3726            .iter()
3727            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
3728        {
3729            assert!(i.is_error());
3730            assert_eq!(i.line, Some(1), "absent field anchors to the block top");
3731        }
3732    }
3733
3734    /// A present-but-blank required field is still missing (`DB_MD_MISSING_FIELD`),
3735    /// anchored to its own line — guarding against an "is the key textually
3736    /// present?" shortcut that would miss `owner:` with an empty value.
3737    #[test]
3738    fn db_md_blank_required_field_is_missing() {
3739        let fx = Fixture::new();
3740        fx.write(
3741            "DB.md",
3742            "---\ntype: db-md\nscope: company\nowner: \"\"\n---\n",
3743        );
3744        let issues = fx.store_all();
3745        let i = find(&issues, codes::DB_MD_MISSING_FIELD);
3746        assert_eq!(i.key.as_deref(), Some("owner"));
3747        assert_eq!(
3748            i.line,
3749            Some(4),
3750            "a present-but-empty field anchors to its line"
3751        );
3752        assert!(
3753            count(&issues, codes::DB_MD_MISSING_FIELD) == 1,
3754            "scope is present and non-empty → only owner reported"
3755        );
3756    }
3757
3758    /// An unrecognized `##` section → `DB_MD_UNKNOWN_SECTION` (warning), anchored
3759    /// to the heading's file line; the three recognized sections stay silent.
3760    #[test]
3761    fn db_md_unknown_section_is_warning() {
3762        let fx = Fixture::new();
3763        fx.write(
3764            "DB.md",
3765            // line 1 `---`, 2 type, 3 scope, 4 owner, 5 `---`, 6 blank,
3766            // 7 `## Agent instructions`, 8 blank, 9 prose, 10 blank,
3767            // 11 `## Glossary`.
3768            "---\ntype: db-md\nscope: company\nowner: T\n---\n\n## Agent instructions\n\nbe good\n\n## Glossary\n\nterms\n",
3769        );
3770        let issues = fx.store_all();
3771        let i = find(&issues, codes::DB_MD_UNKNOWN_SECTION);
3772        assert!(!i.is_error(), "unknown section is a warning, not an error");
3773        assert_eq!(i.severity, Severity::Warning);
3774        assert_eq!(
3775            i.line,
3776            Some(11),
3777            "anchors to the `## Glossary` heading line"
3778        );
3779        assert!(
3780            i.message.contains("Glossary"),
3781            "the message names the offending section: {}",
3782            i.message
3783        );
3784        // The recognized `## Agent instructions` section did NOT fire.
3785        assert_eq!(
3786            count(&issues, codes::DB_MD_UNKNOWN_SECTION),
3787            1,
3788            "only the unrecognized section is flagged: {issues:#?}"
3789        );
3790    }
3791
3792    /// A DB.md with no frontmatter at all → `DB_MD_BAD_TYPE` plus both
3793    /// `DB_MD_MISSING_FIELD`s (no provable type, no provable fields).
3794    #[test]
3795    fn db_md_no_frontmatter_reports_type_and_both_fields() {
3796        let fx = Fixture::new();
3797        fx.write("DB.md", "# just a heading, no frontmatter\n");
3798        let issues = fx.store_all();
3799        assert!(has(&issues, codes::DB_MD_BAD_TYPE));
3800        assert_eq!(count(&issues, codes::DB_MD_MISSING_FIELD), 2);
3801    }
3802
3803    // ── frontmatter ─────────────────────────────────────────────────────────
3804
3805    #[test]
3806    fn missing_type_is_error() {
3807        let fx = Fixture::new();
3808        fx.write(
3809            "records/contacts/a.md",
3810            "---\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\n# A\n",
3811        );
3812        let issues = fx.store_all();
3813        assert!(has(&issues, codes::FM_MISSING_TYPE));
3814        assert!(find(&issues, codes::FM_MISSING_TYPE).is_error());
3815    }
3816
3817    #[test]
3818    fn missing_universal_timestamps_are_errors_on_content_files() {
3819        let fx = Fixture::new();
3820        fx.write(
3821            "records/contacts/a.md",
3822            "---\ntype: contact\nsummary: x\nname: A\n---\n\n# A\n",
3823        );
3824        let issues = fx.store_all();
3825
3826        let missing_created = find(&issues, codes::FM_MISSING_CREATED);
3827        assert_eq!(missing_created.key.as_deref(), Some("created"));
3828        assert!(missing_created.is_error());
3829
3830        let missing_updated = find(&issues, codes::FM_MISSING_UPDATED);
3831        assert_eq!(missing_updated.key.as_deref(), Some("updated"));
3832        assert!(missing_updated.is_error());
3833    }
3834
3835    #[test]
3836    fn meta_files_do_not_require_universal_timestamps() {
3837        let fx = Fixture::new();
3838        let issues = fx.store_all();
3839
3840        assert!(
3841            !has(&issues, codes::FM_MISSING_CREATED),
3842            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
3843        );
3844        assert!(
3845            !has(&issues, codes::FM_MISSING_UPDATED),
3846            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
3847        );
3848    }
3849
3850    #[test]
3851    fn content_file_with_no_frontmatter_block_reports_type_and_summary() {
3852        let fx = Fixture::new();
3853        fx.write(
3854            "wiki/people/a.md",
3855            "# Just a heading\n\nNo frontmatter here.\n",
3856        );
3857        let issues = fx.store_all();
3858        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
3859        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
3860    }
3861
3862    #[test]
3863    fn content_file_with_empty_frontmatter_reports_type_and_summary() {
3864        let fx = Fixture::new();
3865        fx.write("wiki/people/a.md", "---\n---\n\nbody\n");
3866        let issues = fx.store_all();
3867        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
3868        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
3869    }
3870
3871    #[test]
3872    fn malformed_yaml_is_error_and_suppresses_field_checks() {
3873        let fx = Fixture::new();
3874        // A tab inside a mapping value is invalid YAML.
3875        fx.write(
3876            "records/contacts/a.md",
3877            "---\ntype: contact\n  bad: : : :\n: : nope\n---\n\nbody\n",
3878        );
3879        let issues = fx.store_all();
3880        let issue = find(&issues, codes::FM_MALFORMED_YAML);
3881        assert!(issue.is_error());
3882        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
3883        // When YAML doesn't parse we don't *also* claim the summary is missing;
3884        // the agent fixes the YAML first.
3885        assert!(
3886            !has(&issues, codes::SUMMARY_MISSING),
3887            "malformed YAML should suppress SUMMARY_MISSING: {issues:#?}"
3888        );
3889    }
3890
3891    #[test]
3892    fn bad_created_timestamp_is_error() {
3893        let fx = Fixture::new();
3894        fx.write(
3895            "records/contacts/a.md",
3896            "---\ntype: contact\ncreated: not-a-date\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
3897        );
3898        let issues = fx.store_all();
3899        let issue = find(&issues, codes::FM_BAD_TIMESTAMP);
3900        assert_eq!(issue.key.as_deref(), Some("created"));
3901        assert!(issue.is_error());
3902    }
3903
3904    #[test]
3905    fn date_only_created_is_rejected_but_type_date_field_accepted() {
3906        let fx = Fixture::new();
3907        // `created` must be a full RFC3339 datetime → a date-only value is bad.
3908        // `last_touch` is a type-specific date field → date-only is fine.
3909        fx.write(
3910            "records/contacts/a.md",
3911            "---\ntype: contact\ncreated: 2026-05-22\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\nlast_touch: 2026-05-22\n---\n\n# A\n",
3912        );
3913        let issues = fx.store_all();
3914        let created_issues: Vec<_> = issues
3915            .iter()
3916            .filter(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created"))
3917            .collect();
3918        assert_eq!(
3919            created_issues.len(),
3920            1,
3921            "date-only `created` must fail: {issues:#?}"
3922        );
3923        assert!(
3924            !issues.iter().any(
3925                |i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("last_touch")
3926            ),
3927            "date-only `last_touch` is valid: {issues:#?}"
3928        );
3929    }
3930
3931    // ── summary ─────────────────────────────────────────────────────────────
3932
3933    #[test]
3934    fn summary_missing_empty_multiline_toolong() {
3935        let fx = Fixture::new();
3936        fx.write(
3937            "wiki/people/missing.md",
3938            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\n---\n\nbody\n",
3939        );
3940        fx.write(
3941            "wiki/people/empty.md",
3942            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"   \"\n---\n\nbody\n",
3943        );
3944        let long = "x".repeat(201);
3945        fx.write(
3946            "wiki/people/long.md",
3947            &format!("---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{long}\"\n---\n\nbody\n"),
3948        );
3949        let issues = fx.store_all();
3950        assert!(has(&issues, codes::SUMMARY_MISSING));
3951        assert_eq!(
3952            find(&issues, codes::SUMMARY_MISSING).file,
3953            PathBuf::from("wiki/people/missing.md")
3954        );
3955        assert!(has(&issues, codes::SUMMARY_EMPTY));
3956        assert!(has(&issues, codes::SUMMARY_TOO_LONG));
3957        assert_eq!(
3958            find(&issues, codes::SUMMARY_TOO_LONG).severity,
3959            Severity::Warning
3960        );
3961    }
3962
3963    #[test]
3964    fn summary_multiline_via_yaml_block_scalar() {
3965        let fx = Fixture::new();
3966        // A literal block scalar produces a value with a newline.
3967        fx.write(
3968            "wiki/people/a.md",
3969            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: |\n  line one\n  line two\n---\n\nbody\n",
3970        );
3971        let issues = fx.store_all();
3972        assert!(has(&issues, codes::SUMMARY_MULTILINE), "{issues:#?}");
3973    }
3974
3975    #[test]
3976    fn summary_exactly_200_chars_is_ok() {
3977        let fx = Fixture::new();
3978        let s = "y".repeat(200);
3979        fx.write(
3980            "wiki/people/a.md",
3981            &format!("---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{s}\"\n---\n\nbody\n"),
3982        );
3983        let issues = fx.store_all();
3984        assert!(
3985            !has(&issues, codes::SUMMARY_TOO_LONG),
3986            "200 is the bound, inclusive: {issues:#?}"
3987        );
3988    }
3989
3990    #[test]
3991    fn meta_files_need_no_summary() {
3992        let fx = Fixture::new();
3993        // The root/layer/type indexes + log carry no summary and must not be
3994        // flagged. (A lone DB.md store with one contact and full indexes.)
3995        fx.write("records/contacts/a.md", &valid_contact("A contact"));
3996        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n# I\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
3997        fx.write(
3998            "records/index.md",
3999            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4000        );
4001        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — A contact\n");
4002        fx.write(
4003            "records/contacts/index.jsonl",
4004            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"A contact\"}\n",
4005        );
4006        fx.write("log.md", "---\ntype: log\n---\n\n# Log\n");
4007        let issues = fx.store_all();
4008        assert!(!has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4009    }
4010
4011    // ── tags ────────────────────────────────────────────────────────────────
4012
4013    #[test]
4014    fn nested_tags_warns_flat_tags_ok() {
4015        let fx = Fixture::new();
4016        fx.write(
4017            "records/contacts/nested.md",
4018            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags:\n  - good\n  - [nested, list]\n---\n\n# A\n",
4019        );
4020        fx.write(
4021            "records/contacts/flat.md",
4022            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags: [customer, vip]\n---\n\n# A\n",
4023        );
4024        let issues = fx.store_all();
4025        let tag_issues: Vec<_> = issues
4026            .iter()
4027            .filter(|i| i.code == codes::TAGS_MALFORMED)
4028            .collect();
4029        assert_eq!(
4030            tag_issues.len(),
4031            1,
4032            "only the nested-tags file should warn: {issues:#?}"
4033        );
4034        assert_eq!(
4035            tag_issues[0].file,
4036            PathBuf::from("records/contacts/nested.md")
4037        );
4038        assert_eq!(tag_issues[0].severity, Severity::Warning);
4039    }
4040
4041    // ── wiki-links ────────────────────────────────────────────────────────────
4042
4043    #[test]
4044    fn short_form_wiki_link_is_error() {
4045        let fx = Fixture::new();
4046        let mut body = valid_contact("links to a short form");
4047        body.push_str("\nSee [[sarah-chen]] for details.\n");
4048        fx.write("wiki/people/a.md", &body);
4049        let issues = fx.store_all();
4050        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
4051        assert!(issue.is_error());
4052        assert!(issue.message.contains("sarah-chen"));
4053        // A short-form link must NOT also be reported broken — fix the form first.
4054        assert!(
4055            !issues
4056                .iter()
4057                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.message.contains("sarah-chen")),
4058            "short-form should suppress broken: {issues:#?}"
4059        );
4060    }
4061
4062    #[test]
4063    fn broken_full_path_wiki_link_is_error() {
4064        let fx = Fixture::new();
4065        let mut body = valid_contact("links to a missing file");
4066        body.push_str("\nSee [[records/contacts/ghost]].\n");
4067        fx.write("wiki/people/a.md", &body);
4068        let issues = fx.store_all();
4069        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
4070        assert!(issue.is_error());
4071        assert!(issue.message.contains("records/contacts/ghost"));
4072        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4073    }
4074
4075    #[test]
4076    fn traversal_full_path_wiki_link_is_rejected_before_probe() {
4077        let fx = Fixture::new();
4078        let mut body = valid_contact("links with traversal");
4079        body.push_str("\nSee [[records/contacts/../../ghost]].\n");
4080        fx.write("wiki/people/a.md", &body);
4081        let issues = fx.store_all();
4082        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
4083        assert!(issue.message.contains("not a safe store-relative path"));
4084        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4085    }
4086
4087    #[test]
4088    fn valid_full_path_wiki_link_passes() {
4089        let fx = Fixture::new();
4090        fx.write("records/contacts/target.md", &valid_contact("target"));
4091        let mut body = valid_contact("links to target");
4092        body.push_str("\nSee [[records/contacts/target]].\n");
4093        fx.write("wiki/people/a.md", &body);
4094        let issues = fx.store_all();
4095        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4096        assert!(!has(&issues, codes::WIKI_LINK_SHORT_FORM), "{issues:#?}");
4097    }
4098
4099    #[test]
4100    fn md_extension_wiki_link_warns_and_resolves() {
4101        let fx = Fixture::new();
4102        fx.write("records/contacts/target.md", &valid_contact("target"));
4103        let mut body = valid_contact("links with extension");
4104        body.push_str("\nSee [[records/contacts/target.md]].\n");
4105        fx.write("wiki/people/a.md", &body);
4106        let issues = fx.store_all();
4107        let issue = find(&issues, codes::WIKI_LINK_HAS_EXTENSION);
4108        assert_eq!(issue.severity, Severity::Warning);
4109        assert_eq!(
4110            issue.suggestion.as_deref(),
4111            Some("drop the extension: [[records/contacts/target]]")
4112        );
4113        // The target exists once `.md` is stripped → not broken.
4114        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4115    }
4116
4117    #[test]
4118    fn wiki_links_in_code_fences_are_ignored() {
4119        let fx = Fixture::new();
4120        let mut body = valid_contact("has a fenced example");
4121        body.push_str("\n```\n[[sarah-chen]]\n```\n");
4122        fx.write("wiki/people/a.md", &body);
4123        let issues = fx.store_all();
4124        assert!(
4125            !has(&issues, codes::WIKI_LINK_SHORT_FORM),
4126            "fenced wiki-links must be ignored: {issues:#?}"
4127        );
4128    }
4129
4130    #[test]
4131    fn flow_form_link_list_in_frontmatter_is_error() {
4132        let fx = Fixture::new();
4133        fx.write(
4134            "records/meetings/m.md",
4135            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees: [[[records/contacts/a]], [[records/contacts/b]]]\n---\n\n# M\n",
4136        );
4137        let issues = fx.store_all();
4138        let issue = find(&issues, codes::WIKI_LINK_FLOW_FORM_LIST);
4139        assert!(issue.is_error());
4140        assert_eq!(issue.key.as_deref(), Some("attendees"));
4141    }
4142
4143    #[test]
4144    fn block_form_link_list_in_frontmatter_is_not_flow_form() {
4145        let fx = Fixture::new();
4146        fx.write("records/contacts/a.md", &valid_contact("a"));
4147        fx.write("records/contacts/b.md", &valid_contact("b"));
4148        fx.write(
4149            "records/meetings/m.md",
4150            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees:\n  - [[records/contacts/a]]\n  - [[records/contacts/b]]\n---\n\n# M\n",
4151        );
4152        let issues = fx.store_all();
4153        assert!(
4154            !has(&issues, codes::WIKI_LINK_FLOW_FORM_LIST),
4155            "{issues:#?}"
4156        );
4157        // Block-form link targets are still integrity-checked (both exist here).
4158        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4159    }
4160
4161    #[test]
4162    fn frontmatter_short_form_link_field_is_error() {
4163        let fx = Fixture::new();
4164        // `related` is a *custom* (non-schema) wiki-link field, so it goes
4165        // through the generic doctrine path → a short form is WIKI_LINK_SHORT_FORM.
4166        fx.write(
4167            "wiki/people/a.md",
4168            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: \"[[sarah-chen]]\"\n---\n\n# A\n",
4169        );
4170        let issues = fx.store_all();
4171        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
4172        assert!(issue.is_error());
4173        assert_eq!(issue.key.as_deref(), Some("related"));
4174    }
4175
4176    #[test]
4177    fn unquoted_frontmatter_link_is_recognized() {
4178        // An UNQUOTED `[[...]]` parses in YAML as a nested sequence, not a
4179        // string. The validator must still see it as a wiki-link (text-based
4180        // extraction). A short-form custom field must report SHORT_FORM, and a
4181        // full-path one with a missing target must report BROKEN.
4182        let fx = Fixture::new();
4183        fx.write(
4184            "wiki/people/short.md",
4185            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[sarah-chen]]\n---\n\n# A\n",
4186        );
4187        fx.write(
4188            "wiki/people/broken.md",
4189            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[records/contacts/ghost]]\n---\n\n# A\n",
4190        );
4191        let issues = fx.store_all();
4192        assert!(
4193            issues.iter().any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
4194                && i.file == Path::new("wiki/people/short.md")
4195                && i.key.as_deref() == Some("related")),
4196            "unquoted short-form frontmatter link must be caught: {issues:#?}"
4197        );
4198        assert!(
4199            issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN
4200                && i.file == Path::new("wiki/people/broken.md")),
4201            "unquoted full-path frontmatter link to a missing file must be caught: {issues:#?}"
4202        );
4203    }
4204
4205    #[test]
4206    fn short_form_in_declared_link_field_is_prefix_mismatch_not_double_reported() {
4207        // A short-form value in a *declared* link field (a `### contact` schema
4208        // with `company link to records/companies/`) is SCHEMA_LINK_PREFIX_MISMATCH
4209        // (the target isn't under the prefix), and must NOT also be reported as a
4210        // bare WIKI_LINK_SHORT_FORM — the schema path owns that field once.
4211        let mut fx = Fixture::new();
4212        fx.config.schemas.insert(
4213            "contact".into(),
4214            Schema {
4215                fields: vec![FieldSpec {
4216                    name: "company".into(),
4217                    link_prefix: Some(PathBuf::from("records/companies")),
4218                    ..Default::default()
4219                }],
4220                ..Default::default()
4221            },
4222        );
4223        fx.write(
4224            "records/contacts/a.md",
4225            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[northstar]]\"\n---\n\n# A\n",
4226        );
4227        let issues = fx.store_all();
4228        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
4229        assert_eq!(issue.key.as_deref(), Some("company"));
4230        // The same link must NOT also be double-reported via the generic path.
4231        assert!(
4232            !issues
4233                .iter()
4234                .any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
4235                    && i.key.as_deref() == Some("company")),
4236            "schema link fields are checked once, by the schema path: {issues:#?}"
4237        );
4238    }
4239
4240    #[test]
4241    fn schema_link_field_with_md_extension_still_warns() {
4242        let mut fx = Fixture::new();
4243        fx.config.schemas.insert(
4244            "contact".into(),
4245            Schema {
4246                fields: vec![FieldSpec {
4247                    name: "company".into(),
4248                    link_prefix: Some(PathBuf::from("records/companies")),
4249                    ..Default::default()
4250                }],
4251                ..Default::default()
4252            },
4253        );
4254        fx.write(
4255            "records/companies/acme.md",
4256            "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: Acme\nname: Acme\n---\n\n# Acme\n",
4257        );
4258        fx.write(
4259            "records/contacts/a.md",
4260            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[records/companies/acme.md]]\"\n---\n\n# A\n",
4261        );
4262        let issues = fx.store_all();
4263        let issue = issues
4264            .iter()
4265            .find(|i| {
4266                i.code == codes::WIKI_LINK_HAS_EXTENSION && i.key.as_deref() == Some("company")
4267            })
4268            .unwrap_or_else(|| panic!("schema link extension warning missing: {issues:#?}"));
4269        assert_eq!(issue.severity, Severity::Warning);
4270        assert!(
4271            !issues
4272                .iter()
4273                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.key.as_deref() == Some("company")),
4274            "extensionless existence check should still find acme.md: {issues:#?}"
4275        );
4276    }
4277
4278    // ── schema: explicit DB.md schema (required / shape / enum) ───────────────
4279
4280    #[test]
4281    fn explicit_schema_required_shape_enum() {
4282        let fx = {
4283            let mut fx = Fixture::new();
4284            // contact schema: name required, email required+email shape,
4285            // status enum: active|inactive
4286            let schema = Schema {
4287                fields: vec![
4288                    FieldSpec {
4289                        name: "name".into(),
4290                        required: true,
4291                        ..Default::default()
4292                    },
4293                    FieldSpec {
4294                        name: "email".into(),
4295                        required: true,
4296                        shape: Some(Shape::Email),
4297                        ..Default::default()
4298                    },
4299                    FieldSpec {
4300                        name: "status".into(),
4301                        enum_values: Some(vec!["active".into(), "inactive".into()]),
4302                        ..Default::default()
4303                    },
4304                ],
4305                ..Default::default()
4306            };
4307            fx.config.schemas.insert("contact".into(), schema);
4308            fx
4309        };
4310        fx.write(
4311            "records/contacts/a.md",
4312            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nemail: not-an-email\nstatus: archived\n---\n\n# A\n",
4313        );
4314        let issues = fx.store_all();
4315        // name absent → MISSING_REQUIRED
4316        assert!(
4317            issues
4318                .iter()
4319                .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
4320                    && i.key.as_deref() == Some("name")),
4321            "{issues:#?}"
4322        );
4323        // email malformed → SHAPE_MISMATCH
4324        assert!(
4325            issues.iter().any(
4326                |i| i.code == codes::SCHEMA_SHAPE_MISMATCH && i.key.as_deref() == Some("email")
4327            ),
4328            "{issues:#?}"
4329        );
4330        // status archived not in enum → ENUM_VIOLATION
4331        assert!(
4332            issues
4333                .iter()
4334                .any(|i| i.code == codes::SCHEMA_ENUM_VIOLATION
4335                    && i.key.as_deref() == Some("status")),
4336            "{issues:#?}"
4337        );
4338    }
4339
4340    #[test]
4341    fn schema_without_link_field_allows_plain_value() {
4342        // A `contact` schema with no `company` link field means a plain `company`
4343        // string is fine — schema enforcement is exactly what the store declares,
4344        // nothing implicit.
4345        let mut fx = Fixture::new();
4346        fx.config.schemas.insert(
4347            "contact".into(),
4348            Schema {
4349                fields: vec![FieldSpec {
4350                    name: "name".into(),
4351                    required: true,
4352                    ..Default::default()
4353                }],
4354                ..Default::default()
4355            },
4356        );
4357        fx.write(
4358            "records/contacts/a.md",
4359            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
4360        );
4361        let issues = fx.store_all();
4362        assert!(
4363            !has(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH),
4364            "no declared link field for `company` → a plain value is fine: {issues:#?}"
4365        );
4366    }
4367
4368    #[test]
4369    fn schema_link_field_plain_value_is_prefix_mismatch() {
4370        // The surviving link-enforcement path: a declared `link to <prefix>/`
4371        // field with a plain-string value is SCHEMA_LINK_PREFIX_MISMATCH.
4372        let mut fx = Fixture::new();
4373        fx.config.schemas.insert(
4374            "contact".into(),
4375            Schema {
4376                fields: vec![FieldSpec {
4377                    name: "company".into(),
4378                    link_prefix: Some(PathBuf::from("records/companies")),
4379                    ..Default::default()
4380                }],
4381                ..Default::default()
4382            },
4383        );
4384        fx.write(
4385            "records/contacts/a.md",
4386            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
4387        );
4388        let issues = fx.store_all();
4389        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
4390        assert_eq!(issue.key.as_deref(), Some("company"));
4391        assert!(issue
4392            .suggestion
4393            .as_deref()
4394            .unwrap()
4395            .contains("records/companies/"));
4396    }
4397
4398    #[test]
4399    fn schema_shape_int_and_url_and_currency() {
4400        let mut fx = Fixture::new();
4401        fx.config.schemas.insert(
4402            "widget".into(),
4403            Schema {
4404                fields: vec![
4405                    FieldSpec {
4406                        name: "qty".into(),
4407                        shape: Some(Shape::Int),
4408                        ..Default::default()
4409                    },
4410                    FieldSpec {
4411                        name: "site".into(),
4412                        shape: Some(Shape::Url),
4413                        ..Default::default()
4414                    },
4415                    FieldSpec {
4416                        name: "price".into(),
4417                        shape: Some(Shape::Currency),
4418                        ..Default::default()
4419                    },
4420                ],
4421                ..Default::default()
4422            },
4423        );
4424        // `USD 100` is the corpus-realistic shape (an `expense.currency`-style
4425        // ISO code + amount). It must pass — it used to spuriously fail.
4426        fx.write(
4427            "records/widgets/ok.md",
4428            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nqty: 5\nsite: https://example.com\nprice: \"USD 1,234.50\"\n---\n\n# ok\n",
4429        );
4430        // `free` is non-numeric; `inf`/`NaN`/3-decimal used to slip through
4431        // because the old impl leaned on `f64::parse`. `price: inf` here guards
4432        // the under-rejection half of the finding.
4433        fx.write(
4434            "records/widgets/bad.md",
4435            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nqty: five\nsite: ftp://nope\nprice: inf\n---\n\n# bad\n",
4436        );
4437        let issues = fx.store_all();
4438        let bad_shape: Vec<_> = issues
4439            .iter()
4440            .filter(|i| {
4441                i.code == codes::SCHEMA_SHAPE_MISMATCH
4442                    && i.file == Path::new("records/widgets/bad.md")
4443            })
4444            .map(|i| i.key.clone().unwrap_or_default())
4445            .collect();
4446        assert!(bad_shape.contains(&"qty".to_string()), "{issues:#?}");
4447        assert!(bad_shape.contains(&"site".to_string()), "{issues:#?}");
4448        assert!(
4449            bad_shape.contains(&"price".to_string()),
4450            "inf must be rejected as currency: {issues:#?}"
4451        );
4452        assert!(
4453            !issues.iter().any(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH
4454                && i.file == Path::new("records/widgets/ok.md")),
4455            "valid shapes (incl. `USD 1,234.50`) must not fire: {issues:#?}"
4456        );
4457    }
4458
4459    #[test]
4460    fn schema_shape_or_enum_field_with_non_scalar_value_is_shape_mismatch() {
4461        let mut fx = Fixture::new();
4462        fx.config.schemas.insert(
4463            "contact".into(),
4464            Schema {
4465                fields: vec![
4466                    FieldSpec {
4467                        name: "email".into(),
4468                        required: true,
4469                        shape: Some(Shape::Email),
4470                        ..Default::default()
4471                    },
4472                    FieldSpec {
4473                        name: "status".into(),
4474                        enum_values: Some(vec!["active".into(), "inactive".into()]),
4475                        ..Default::default()
4476                    },
4477                ],
4478                ..Default::default()
4479            },
4480        );
4481        // A required EMAIL field and an ENUM field, each holding a LIST. Both
4482        // used to slip through entirely (`scalar_string` → None → the shape and
4483        // enum bodies silently no-op); now they flag SCHEMA_SHAPE_MISMATCH.
4484        fx.write(
4485            "records/contacts/bad.md",
4486            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nemail:\n  - a@b.com\n  - c@d.com\nstatus:\n  - active\n---\n\n# bad\n",
4487        );
4488        let issues = fx.store_all();
4489        let mismatched: Vec<_> = issues
4490            .iter()
4491            .filter(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH)
4492            .map(|i| i.key.clone().unwrap_or_default())
4493            .collect();
4494        assert!(
4495            mismatched.contains(&"email".to_string()),
4496            "list-valued required email must flag: {issues:#?}"
4497        );
4498        assert!(
4499            mismatched.contains(&"status".to_string()),
4500            "list-valued enum must flag: {issues:#?}"
4501        );
4502    }
4503
4504    #[test]
4505    fn is_currency_accepts_codes_and_rejects_non_numeric() {
4506        // Symbols and 3-letter ISO codes both strip; plain numbers pass.
4507        for ok in [
4508            "100",
4509            "1234.56",
4510            "$1,234.50",
4511            "USD 100", // the finding's headline probe — used to be false
4512            "usd 100", // case-insensitive code
4513            "EUR 9.50",
4514            "£12",
4515            "¥1000",
4516            "-5.00", // signed amounts are real (refunds)
4517            "+5",
4518            "1,000,000",
4519        ] {
4520            assert!(is_currency(ok), "expected currency: {ok:?}");
4521        }
4522        // Non-numeric floats `f64::parse` would accept, and the > 2-decimal /
4523        // bare-code / exponent cases the docstring forbids.
4524        for bad in [
4525            "inf", "-inf", "infinity", "NaN", "nan",    // f64 accepts these; we must not
4526            "12.999", // 3 decimals
4527            "1.2345", // 4 decimals
4528            "USD",    // bare code, no amount
4529            "$",      // bare symbol
4530            "free", "", " ", "1e3",      // exponent form
4531            "1.",       // trailing dot, no fractional digits
4532            ".5",       // leading dot, no integer digits
4533            "1 000",    // space as separator is not a thousands separator
4534            "USDD 100", // 4-letter "code" must not strip
4535        ] {
4536            assert!(!is_currency(bad), "expected NOT currency: {bad:?}");
4537        }
4538    }
4539
4540    // ── policies ───────────────────────────────────────────────────────────
4541
4542    #[test]
4543    fn ignored_type_present_is_info() {
4544        let mut fx = Fixture::new();
4545        fx.config.ignored_types.push("temp".into());
4546        fx.write(
4547            "records/temps/x.md",
4548            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
4549        );
4550        let issues = fx.store_all();
4551        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_PRESENT);
4552        assert_eq!(issue.severity, Severity::Info);
4553        assert!(!issue.is_error());
4554        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4555    }
4556
4557    #[test]
4558    fn wiki_page_derived_from_ignored_type_warns() {
4559        let mut fx = Fixture::new();
4560        fx.config.ignored_types.push("temp".into());
4561        fx.write(
4562            "records/temps/x.md",
4563            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
4564        );
4565        fx.write(
4566            "wiki/themes/t.md",
4567            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: derived\nderived_from: \"[[records/temps/x]]\"\n---\n\n# t\n",
4568        );
4569        let issues = fx.store_all();
4570        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_DERIVED);
4571        assert_eq!(issue.severity, Severity::Warning);
4572        assert_eq!(issue.key.as_deref(), Some("derived_from"));
4573        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4574    }
4575
4576    /// The shared `derived_from_ignored_type` entry point — the single
4577    /// policy-decision both `dbmd validate` (read) and `dbmd write` (write-time
4578    /// warning) now route through, so they cannot diverge. This pins its
4579    /// contract directly: the type gate, the empty-ignored-types gate, a
4580    /// positive match carrying the resolved target type, and a non-ignored
4581    /// target rejected.
4582    #[test]
4583    fn derived_from_ignored_type_is_the_shared_policy_decision() {
4584        let mut fx = Fixture::new();
4585        fx.config.ignored_types.push("secret".into());
4586        // An ignored-type record …
4587        fx.write(
4588            "records/secrets/s.md",
4589            "---\ntype: secret\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: hush\n---\n\n# s\n",
4590        );
4591        // … and a non-ignored record.
4592        fx.write(
4593            "records/contacts/c.md",
4594            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nname: C\n---\n\n# c\n",
4595        );
4596        let store = fx.store();
4597
4598        // Positive: a wiki-page deriving from the ignored-type record matches,
4599        // and the hit carries both the target (as written) and its resolved type.
4600        let hit =
4601            derived_from_ignored_type(&store, "wiki-page", std::iter::once("records/secrets/s"))
4602                .expect("wiki-page → ignored-type record must match");
4603        assert_eq!(hit.target, "records/secrets/s");
4604        assert_eq!(hit.target_type, "secret");
4605
4606        // Type gate: a non-`wiki-page` type never triggers, even with the same
4607        // ignored-type target.
4608        assert_eq!(
4609            derived_from_ignored_type(&store, "contact", std::iter::once("records/secrets/s")),
4610            None,
4611            "only wiki-page derivation is policed"
4612        );
4613
4614        // Target gate: a wiki-page deriving from a non-ignored record is fine.
4615        assert_eq!(
4616            derived_from_ignored_type(&store, "wiki-page", std::iter::once("records/contacts/c")),
4617            None,
4618            "deriving from a non-ignored type is allowed"
4619        );
4620
4621        // First match wins across multiple targets (here the second is the hit).
4622        let hit = derived_from_ignored_type(
4623            &store,
4624            "wiki-page",
4625            ["records/contacts/c", "records/secrets/s"],
4626        )
4627        .expect("a later ignored-type target must still be found");
4628        assert_eq!(hit.target, "records/secrets/s");
4629
4630        // Empty-policy gate: with no `### Ignored types`, nothing is policed.
4631        fx.config.ignored_types.clear();
4632        let store = fx.store();
4633        assert_eq!(
4634            derived_from_ignored_type(&store, "wiki-page", std::iter::once("records/secrets/s")),
4635            None,
4636            "an empty ignored-types policy short-circuits"
4637        );
4638    }
4639
4640    // ── duplicates ───────────────────────────────────────────────────────────
4641
4642    #[test]
4643    fn dup_id_is_hard_error_with_related() {
4644        let fx = Fixture::new();
4645        fx.write(
4646            "records/contacts/a.md",
4647            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
4648        );
4649        fx.write(
4650            "records/contacts/b.md",
4651            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
4652        );
4653        let issues = fx.store_all();
4654        // Reporting rule #1: ONE issue per collision group, keyed on the
4655        // lexicographically smallest path (`a.md`), partner in `related`.
4656        assert_eq!(
4657            count(&issues, codes::DUP_ID),
4658            1,
4659            "one issue per group: {issues:#?}"
4660        );
4661        let a = issues.iter().find(|i| i.code == codes::DUP_ID).unwrap();
4662        assert_eq!(a.file, PathBuf::from("records/contacts/a.md"));
4663        assert!(a.is_error());
4664        assert_eq!(a.key.as_deref(), Some("id"));
4665        assert_eq!(
4666            a.line,
4667            Some(3),
4668            "anchors to the `id` line on the reported file"
4669        );
4670        assert_eq!(a.related, vec![PathBuf::from("records/contacts/b.md")]);
4671    }
4672
4673    #[test]
4674    fn dup_id_not_fired_in_working_set() {
4675        // DUP_* is an --all-only cross-file check; the working set must not run it.
4676        let fx = Fixture::new();
4677        fx.write(
4678            "records/contacts/a.md",
4679            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
4680        );
4681        fx.write(
4682            "records/contacts/b.md",
4683            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
4684        );
4685        // Log says both changed since epoch, so they're in the working set.
4686        fx.write(
4687            "log.md",
4688            "---\ntype: log\n---\n\n## [2026-05-22 10:00] create | records/contacts/a\nx\n\n## [2026-05-22 10:01] create | records/contacts/b\nx\n",
4689        );
4690        let issues = validate_working_set(&fx.store(), None).unwrap();
4691        assert!(
4692            !has(&issues, codes::DUP_ID),
4693            "DUP_ID is --all only: {issues:#?}"
4694        );
4695    }
4696
4697    #[test]
4698    fn dup_unique_key_single_field_is_warning() {
4699        let mut fx = Fixture::new();
4700        // contact declares `- unique: email`.
4701        fx.config.schemas.insert(
4702            "contact".into(),
4703            Schema {
4704                unique_keys: vec![vec!["email".into()]],
4705                ..Default::default()
4706            },
4707        );
4708        for (f, name) in [("a", "A"), ("b", "B")] {
4709            fx.write(
4710                &format!("records/contacts/{f}.md"),
4711                &format!("---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: s\nname: {name}\nemail: dup@x.com\n---\n\n# {name}\n"),
4712            );
4713        }
4714        let issues = fx.store_all();
4715        // One issue per group (rule #1), keyed on the smallest path, anchored to
4716        // the single `email` field.
4717        assert_eq!(count(&issues, codes::DUP_UNIQUE_KEY), 1);
4718        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
4719        assert_eq!(dup.severity, Severity::Warning);
4720        assert_eq!(dup.file, PathBuf::from("records/contacts/a.md"));
4721        assert_eq!(dup.key.as_deref(), Some("email"));
4722        assert_eq!(dup.related, vec![PathBuf::from("records/contacts/b.md")]);
4723    }
4724
4725    #[test]
4726    fn dup_unique_key_compound_and_clean_when_one_field_differs() {
4727        let mut fx = Fixture::new();
4728        // expense declares `- unique: date, amount, vendor` (a compound key).
4729        fx.config.schemas.insert(
4730            "expense".into(),
4731            Schema {
4732                unique_keys: vec![vec!["date".into(), "amount".into(), "vendor".into()]],
4733                ..Default::default()
4734            },
4735        );
4736        fx.write("records/companies/acme.md", "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: c\nname: Acme\n---\n# A\n");
4737        let exp = |f: &str, amount: &str| {
4738            format!(
4739            "---\ntype: expense\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: e\ndate: 2026-05-01\namount: {amount}\nvendor: \"[[records/companies/acme]]\"\n---\n\n# {f}\n"
4740        )
4741        };
4742        fx.write("records/expenses/e1.md", &exp("e1", "100"));
4743        fx.write("records/expenses/e2.md", &exp("e2", "100"));
4744        fx.write("records/expenses/e3.md", &exp("e3", "200")); // different amount
4745        let issues = fx.store_all();
4746        // One issue for the e1+e2 group (rule #1), keyed on the smallest path
4747        // (e1) with e2 in `related`; e3 differs on amount and never appears.
4748        assert_eq!(
4749            count(&issues, codes::DUP_UNIQUE_KEY),
4750            1,
4751            "only e1+e2 collide, one issue: {issues:#?}"
4752        );
4753        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
4754        assert_eq!(dup.file, PathBuf::from("records/expenses/e1.md"));
4755        assert_eq!(
4756            dup.line,
4757            Some(1),
4758            "compound-key collision anchors to line 1"
4759        );
4760        assert_eq!(dup.related, vec![PathBuf::from("records/expenses/e2.md")]);
4761        assert!(
4762            !issues.iter().any(|i| i.code == codes::DUP_UNIQUE_KEY
4763                && i.related.contains(&PathBuf::from("records/expenses/e3.md"))),
4764            "e3 differs on amount and must not collide: {issues:#?}"
4765        );
4766    }
4767
4768    #[test]
4769    fn dup_unique_key_list_field_is_order_independent() {
4770        let mut fx = Fixture::new();
4771        // meeting declares `- unique: date, attendees`; the list field is a set.
4772        fx.config.schemas.insert(
4773            "meeting".into(),
4774            Schema {
4775                unique_keys: vec![vec!["date".into(), "attendees".into()]],
4776                ..Default::default()
4777            },
4778        );
4779        fx.write("records/contacts/a.md", &valid_contact("a"));
4780        fx.write("records/contacts/b.md", &valid_contact("b"));
4781        let m = |f: &str, order: &str| {
4782            let attendees = if order == "ab" {
4783                "  - [[records/contacts/a]]\n  - [[records/contacts/b]]"
4784            } else {
4785                "  - [[records/contacts/b]]\n  - [[records/contacts/a]]"
4786            };
4787            format!(
4788                "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nattendees:\n{attendees}\n---\n\n# {f}\n"
4789            )
4790        };
4791        fx.write("records/meetings/m1.md", &m("m1", "ab"));
4792        fx.write("records/meetings/m2.md", &m("m2", "ba"));
4793        let issues = fx.store_all();
4794        // The attendee SET is order-independent, so m1 (ab) and m2 (ba) collide
4795        // → a single issue on the smaller path.
4796        assert_eq!(
4797            count(&issues, codes::DUP_UNIQUE_KEY),
4798            1,
4799            "same date + same attendee set (any order) collide as one issue: {issues:#?}"
4800        );
4801        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
4802        assert_eq!(dup.file, PathBuf::from("records/meetings/m1.md"));
4803        assert_eq!(dup.related, vec![PathBuf::from("records/meetings/m2.md")]);
4804    }
4805
4806    // ── indexes ───────────────────────────────────────────────────────────────
4807
4808    #[test]
4809    fn missing_indexes_at_all_three_levels() {
4810        let fx = Fixture::new();
4811        fx.write("records/contacts/a.md", &valid_contact("a"));
4812        let issues = fx.store_all();
4813        // root, layer (records), and type-folder (records/contacts) all missing.
4814        // The type-folder INDEX_MISSING is keyed on the FOLDER path (not its
4815        // would-be index.md), per the field convention `EXPECTED` pins.
4816        let missing_files: BTreeSet<PathBuf> = issues
4817            .iter()
4818            .filter(|i| i.code == codes::INDEX_MISSING)
4819            .map(|i| i.file.clone())
4820            .collect();
4821        assert!(
4822            missing_files.contains(&PathBuf::from("index.md")),
4823            "{issues:#?}"
4824        );
4825        assert!(
4826            missing_files.contains(&PathBuf::from("records/index.md")),
4827            "{issues:#?}"
4828        );
4829        assert!(
4830            missing_files.contains(&PathBuf::from("records/contacts")),
4831            "{issues:#?}"
4832        );
4833        // When the index.md is entirely absent we do NOT additionally fire
4834        // INDEX_JSONL_MISSING — one INDEX_MISSING covers the folder (rule #4).
4835        assert!(!has(&issues, codes::INDEX_JSONL_MISSING), "{issues:#?}");
4836    }
4837
4838    #[test]
4839    fn index_stale_entry_and_missing_entry() {
4840        let fx = Fixture::new();
4841        fx.write(
4842            "records/contacts/present.md",
4843            &valid_contact("present contact"),
4844        );
4845        // Indexes for the parents (root/layer) present so we isolate type-folder.
4846        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4847        fx.write(
4848            "records/index.md",
4849            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4850        );
4851        // Type-folder index lists a GHOST (stale) and omits `present` (missing).
4852        fx.write(
4853            "records/contacts/index.md",
4854            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/ghost]] — gone\n",
4855        );
4856        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/present.md\",\"type\":\"contact\",\"summary\":\"present contact\"}\n");
4857        let issues = fx.store_all();
4858        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
4859        assert!(stale.message.contains("ghost"));
4860        assert!(stale.is_error());
4861        let missing = find(&issues, codes::INDEX_MISSING_ENTRY);
4862        assert!(
4863            missing.message.contains("present.md"),
4864            "{}",
4865            missing.message
4866        );
4867    }
4868
4869    #[test]
4870    fn index_md_entry_with_traversal_path_is_stale_not_probe() {
4871        let fx = Fixture::new();
4872        fx.write("records/contacts/a.md", &valid_contact("a"));
4873        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4874        fx.write(
4875            "records/index.md",
4876            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4877        );
4878        fx.write(
4879            "records/contacts/index.md",
4880            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/../../ghost]] — unsafe\n",
4881        );
4882        fx.write(
4883            "records/contacts/index.jsonl",
4884            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
4885        );
4886        let issues = fx.store_all();
4887        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
4888        assert!(stale.message.contains("not a safe store-relative path"));
4889    }
4890
4891    #[test]
4892    fn index_summary_mismatch() {
4893        let fx = Fixture::new();
4894        fx.write("records/contacts/a.md", &valid_contact("the real summary"));
4895        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4896        fx.write(
4897            "records/index.md",
4898            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4899        );
4900        fx.write(
4901            "records/contacts/index.md",
4902            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a STALE summary\n",
4903        );
4904        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"the real summary\"}\n");
4905        let issues = fx.store_all();
4906        let issue = find(&issues, codes::INDEX_SUMMARY_MISMATCH);
4907        assert!(issue.is_error());
4908        assert_eq!(issue.related, vec![PathBuf::from("records/contacts/a.md")]);
4909    }
4910
4911    #[test]
4912    fn index_summary_match_passes() {
4913        let fx = Fixture::new();
4914        fx.write("records/contacts/a.md", &valid_contact("matching summary"));
4915        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4916        fx.write(
4917            "records/index.md",
4918            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4919        );
4920        fx.write(
4921            "records/contacts/index.md",
4922            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — matching summary\n",
4923        );
4924        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"matching summary\"}\n");
4925        let issues = fx.store_all();
4926        assert!(!has(&issues, codes::INDEX_SUMMARY_MISMATCH), "{issues:#?}");
4927    }
4928
4929    #[test]
4930    fn index_entry_with_tag_suffix_matches_summary() {
4931        let fx = Fixture::new();
4932        fx.write("records/contacts/a.md", &valid_contact("clean summary"));
4933        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4934        fx.write(
4935            "records/index.md",
4936            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4937        );
4938        // Entry carries the renderer's `  ·  #tag` suffix (the EXACT double-spaced
4939        // delimiter `crate::index::format_md_entry` emits for a tagged file),
4940        // which must be stripped before comparing against the file's summary.
4941        fx.write(
4942            "records/contacts/index.md",
4943            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — clean summary  ·  #customer\n",
4944        );
4945        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"clean summary\"}\n");
4946        let issues = fx.store_all();
4947        assert!(
4948            !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
4949            "tag suffix should be stripped: {issues:#?}"
4950        );
4951    }
4952
4953    #[test]
4954    fn index_entry_single_spaced_middot_tail_is_part_of_summary() {
4955        // Regression (the finding): a tagless file whose `summary` legitimately
4956        // ends in a single-spaced ` · #word` tail round-trips through `index
4957        // rebuild` verbatim (the renderer appends NO `  ·  #tag` block, since the
4958        // file has no tags). The validator must NOT mistake that single-spaced
4959        // tail for the renderer's tag suffix, or it reports a spurious — and
4960        // unfixable — INDEX_SUMMARY_MISMATCH on a freshly rebuilt store.
4961        let fx = Fixture::new();
4962        fx.write(
4963            "records/contacts/a.md",
4964            &valid_contact("Standup notes · #standup"),
4965        );
4966        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4967        fx.write(
4968            "records/index.md",
4969            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4970        );
4971        fx.write(
4972            "records/contacts/index.md",
4973            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — Standup notes · #standup\n",
4974        );
4975        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"Standup notes · #standup\"}\n");
4976        let issues = fx.store_all();
4977        assert!(
4978            !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
4979            "a single-spaced middot tail is part of the summary, not a tag block: {issues:#?}"
4980        );
4981    }
4982
4983    #[test]
4984    fn index_jsonl_desync_missing_file_in_jsonl() {
4985        let fx = Fixture::new();
4986        fx.write("records/contacts/a.md", &valid_contact("a"));
4987        fx.write("records/contacts/b.md", &valid_contact("b"));
4988        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (2 files)\n");
4989        fx.write(
4990            "records/index.md",
4991            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4992        );
4993        fx.write(
4994            "records/contacts/index.md",
4995            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n- [[records/contacts/b]] — b\n",
4996        );
4997        // jsonl only lists `a` → `b` is a desync (the twin must be complete).
4998        fx.write(
4999            "records/contacts/index.jsonl",
5000            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5001        );
5002        let issues = fx.store_all();
5003        let desync = find(&issues, codes::INDEX_JSONL_DESYNC);
5004        assert!(desync.message.contains("b.md"), "{}", desync.message);
5005    }
5006
5007    #[test]
5008    fn index_jsonl_desync_record_points_at_missing_file() {
5009        let fx = Fixture::new();
5010        fx.write("records/contacts/a.md", &valid_contact("a"));
5011        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5012        fx.write(
5013            "records/index.md",
5014            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5015        );
5016        fx.write(
5017            "records/contacts/index.md",
5018            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
5019        );
5020        fx.write(
5021            "records/contacts/index.jsonl",
5022            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
5023        );
5024        let issues = fx.store_all();
5025        assert!(
5026            issues
5027                .iter()
5028                .any(|i| i.code == codes::INDEX_JSONL_DESYNC && i.message.contains("ghost.md")),
5029            "{issues:#?}"
5030        );
5031    }
5032
5033    #[test]
5034    fn index_jsonl_record_with_traversal_path_is_desync_not_probe() {
5035        let fx = Fixture::new();
5036        fx.write("records/contacts/a.md", &valid_contact("a"));
5037        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5038        fx.write(
5039            "records/index.md",
5040            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5041        );
5042        fx.write(
5043            "records/contacts/index.md",
5044            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
5045        );
5046        fx.write(
5047            "records/contacts/index.jsonl",
5048            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/../../ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
5049        );
5050        let issues = fx.store_all();
5051        assert!(
5052            issues.iter().any(|i| i.code == codes::INDEX_JSONL_DESYNC
5053                && i.message.contains("not a safe store-relative path")),
5054            "{issues:#?}"
5055        );
5056    }
5057
5058    #[test]
5059    fn index_jsonl_stale_summary() {
5060        let fx = Fixture::new();
5061        fx.write("records/contacts/a.md", &valid_contact("real summary"));
5062        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5063        fx.write(
5064            "records/index.md",
5065            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5066        );
5067        fx.write(
5068            "records/contacts/index.md",
5069            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — real summary\n",
5070        );
5071        // jsonl summary disagrees with the file frontmatter.
5072        fx.write(
5073            "records/contacts/index.jsonl",
5074            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"OUTDATED\"}\n",
5075        );
5076        let issues = fx.store_all();
5077        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5078        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
5079        assert!(stale.key.as_deref().unwrap().contains("summary"));
5080    }
5081
5082    /// The whole point of `INDEX_JSONL_STALE`: a sidecar field the query/search
5083    /// path actually reads (`email`, `domain`, the `(date,amount,vendor)` dedup
5084    /// tuple, `tags`, `updated`, `links`, `company` …) that disagrees with the
5085    /// `.md` is STALE — even when `summary` and `type` are perfectly correct.
5086    /// Pre-fix the validator only diffed summary+type, so a sidecar with a wrong
5087    /// `email` validated clean and answered `--where email=…` with a phantom
5088    /// value present in no file. This is the direct regression guard.
5089    #[test]
5090    fn index_jsonl_stale_queryable_field_email() {
5091        let fx = Fixture::new();
5092        let contact = "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"a contact\"\nname: A\nemail: real@correct.com\n---\n\n# A\n";
5093        fx.write("records/contacts/a.md", contact);
5094        // Start from the canonical, fully-correct sidecar set …
5095        fx.rebuild_indexes();
5096        let jsonl_path = fx.dir.path().join("records/contacts/index.jsonl");
5097        let good = fs::read_to_string(&jsonl_path).unwrap();
5098        // sanity: the canonical store is clean (no STALE on a fresh rebuild).
5099        assert!(
5100            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
5101            "freshly-rebuilt sidecar must not be stale"
5102        );
5103        // … then desync ONLY the email so it's the single differing field.
5104        assert!(
5105            good.contains("real@correct.com"),
5106            "sidecar projects email: {good}"
5107        );
5108        fx.write(
5109            "records/contacts/index.jsonl",
5110            &good.replace("real@correct.com", "STALE-WRONG@evil.com"),
5111        );
5112
5113        let issues = fx.store_all();
5114        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5115        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
5116        // The mismatch is reported precisely on `email`, and summary/type — which
5117        // still match — are NOT named.
5118        let key = stale.key.as_deref().unwrap();
5119        assert!(
5120            key.contains("email"),
5121            "expected `email` in stale key, got {key:?}"
5122        );
5123        assert!(!key.contains("summary"), "summary still matches: {key:?}");
5124        assert!(!key.contains("type"), "type still matches: {key:?}");
5125    }
5126
5127    /// Broaden the guard across the typed/list/timestamp projections at once:
5128    /// a wrong `tags`, `updated`, and a custom dedup field (`amount`) are each
5129    /// caught, with all three named in one issue.
5130    #[test]
5131    fn index_jsonl_stale_typed_and_list_fields() {
5132        let fx = Fixture::new();
5133        let expense = "---\ntype: expense\ncreated: 2026-05-20T08:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"office chairs\"\ntags: [furniture, q2]\namount: 1299\nvendor: Acme\ndate: 2026-05-20\n---\n\n# Expense\n";
5134        fx.write("records/expenses/e.md", expense);
5135        fx.rebuild_indexes();
5136        let jsonl_path = fx.dir.path().join("records/expenses/index.jsonl");
5137        let good = fs::read_to_string(&jsonl_path).unwrap();
5138        assert!(
5139            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
5140            "freshly-rebuilt sidecar must not be stale"
5141        );
5142        // Desync a list field (tags), a timestamp (updated), and a number (amount).
5143        let stale_line = good
5144            .replace("\"q2\"", "\"WRONG-TAG\"")
5145            .replace("2026-05-22T10:00:00-07:00", "2099-01-01T00:00:00-07:00")
5146            .replace("1299", "9999");
5147        fx.write("records/expenses/index.jsonl", &stale_line);
5148
5149        let issues = fx.store_all();
5150        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5151        let key = stale.key.as_deref().unwrap();
5152        for expected in ["amount", "tags", "updated"] {
5153            assert!(
5154                key.contains(expected),
5155                "expected `{expected}` in stale key, got {key:?}"
5156            );
5157        }
5158    }
5159
5160    #[test]
5161    fn index_orphan_in_noncanonical_folder() {
5162        let fx = Fixture::new();
5163        fx.write("records/contacts/a.md", &valid_contact("a"));
5164        // Build the canonical indexes so they aren't reported as orphans.
5165        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5166        fx.write(
5167            "records/index.md",
5168            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5169        );
5170        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
5171        fx.write(
5172            "records/contacts/index.jsonl",
5173            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5174        );
5175        // An index.md inside a sub-sub-folder (operator territory) is an orphan.
5176        fx.write(
5177            "records/contacts/subfolder/index.md",
5178            "---\ntype: index\nscope: type-folder\n---\n\n# stray\n",
5179        );
5180        let issues = fx.store_all();
5181        let orphan = find(&issues, codes::INDEX_ORPHAN);
5182        assert_eq!(orphan.severity, Severity::Warning);
5183        assert_eq!(
5184            orphan.file,
5185            PathBuf::from("records/contacts/subfolder/index.md")
5186        );
5187    }
5188
5189    #[test]
5190    fn index_wrong_scope() {
5191        let fx = Fixture::new();
5192        fx.write("records/contacts/a.md", &valid_contact("a"));
5193        // Root index declares the wrong scope.
5194        fx.write("index.md", "---\ntype: index\nscope: layer\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5195        fx.write(
5196            "records/index.md",
5197            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5198        );
5199        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
5200        fx.write(
5201            "records/contacts/index.jsonl",
5202            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5203        );
5204        let issues = fx.store_all();
5205        let issue = find(&issues, codes::INDEX_WRONG_SCOPE);
5206        assert_eq!(issue.severity, Severity::Warning);
5207        assert_eq!(issue.file, PathBuf::from("index.md"));
5208    }
5209
5210    #[test]
5211    fn capped_type_folder_index_does_not_flag_missing_entries() {
5212        // Over the 500-entry cap, omitted entries are expected, not an error.
5213        let fx = Fixture::new();
5214        for i in 0..501 {
5215            fx.write(
5216                &format!("records/contacts/c{i:04}.md"),
5217                &valid_contact(&format!("contact {i}")),
5218            );
5219        }
5220        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (501 files)\n");
5221        fx.write(
5222            "records/index.md",
5223            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5224        );
5225        // Type-folder index lists only ONE entry + a More footer.
5226        fx.write(
5227            "records/contacts/index.md",
5228            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/c0000]] — contact 0\n\n## More\n\nThis folder has 501 files.\n",
5229        );
5230        // jsonl must still be complete — write all 501 lines.
5231        let mut jsonl = String::new();
5232        for i in 0..501 {
5233            jsonl.push_str(&format!(
5234                "{{\"path\":\"records/contacts/c{i:04}.md\",\"type\":\"contact\",\"summary\":\"contact {i}\"}}\n"
5235            ));
5236        }
5237        fx.write("records/contacts/index.jsonl", &jsonl);
5238        let issues = fx.store_all();
5239        assert!(
5240            !has(&issues, codes::INDEX_MISSING_ENTRY),
5241            "over the cap, missing browse entries are expected: {issues:#?}"
5242        );
5243        // But the jsonl is complete → no desync.
5244        assert!(
5245            !has(&issues, codes::INDEX_JSONL_DESYNC),
5246            "{:#?}",
5247            issues
5248                .iter()
5249                .filter(|i| i.code == codes::INDEX_JSONL_DESYNC)
5250                .collect::<Vec<_>>()
5251        );
5252    }
5253
5254    // ── log ────────────────────────────────────────────────────────────────
5255
5256    #[test]
5257    fn log_bad_timestamp_unknown_kind_out_of_order() {
5258        let fx = Fixture::new();
5259        fx.write(
5260            "log.md",
5261            concat!(
5262                "---\ntype: log\n---\n\n# Log\n\n",
5263                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5264                "## [2026-05-27 09:00] update | records/contacts/b\nx\n\n", // out of order
5265                "## [2026-05-27 11:00] frobnicate | records/contacts/c\nx\n\n", // unknown kind
5266                "## [not-a-date] create | records/contacts/d\nx\n",         // bad timestamp
5267            ),
5268        );
5269        let issues = fx.store_all();
5270        assert!(has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
5271        assert_eq!(
5272            find(&issues, codes::LOG_OUT_OF_ORDER).severity,
5273            Severity::Warning
5274        );
5275        let unknown = find(&issues, codes::LOG_UNKNOWN_KIND);
5276        assert_eq!(unknown.severity, Severity::Warning);
5277        assert!(unknown.message.contains("frobnicate"));
5278        assert!(unknown
5279            .suggestion
5280            .as_deref()
5281            .is_some_and(|s| s.contains("create")));
5282        let bad = find(&issues, codes::LOG_BAD_TIMESTAMP);
5283        assert!(bad.is_error());
5284    }
5285
5286    #[test]
5287    fn log_validate_entry_without_object_is_well_formed() {
5288        let fx = Fixture::new();
5289        fx.write(
5290            "log.md",
5291            "---\ntype: log\n---\n\n## [2026-05-27 10:00] validate\nPASS\n",
5292        );
5293        let issues = fx.store_all();
5294        assert!(!has(&issues, codes::LOG_BAD_TIMESTAMP), "{issues:#?}");
5295        assert!(!has(&issues, codes::LOG_UNKNOWN_KIND), "{issues:#?}");
5296    }
5297
5298    #[test]
5299    fn log_in_order_is_clean() {
5300        let fx = Fixture::new();
5301        fx.write(
5302            "log.md",
5303            concat!(
5304                "---\ntype: log\n---\n\n",
5305                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5306                "## [2026-05-27 10:05] update | records/contacts/a\nx\n",
5307            ),
5308        );
5309        let issues = fx.store_all();
5310        assert!(!has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
5311    }
5312
5313    #[test]
5314    fn log_not_checked_in_working_set() {
5315        // log.md ordering is an --all-only check.
5316        let fx = Fixture::new();
5317        fx.write(
5318            "log.md",
5319            concat!(
5320                "---\ntype: log\n---\n\n",
5321                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5322                "## [2026-05-27 09:00] update | records/contacts/a\nx\n",
5323            ),
5324        );
5325        let issues = validate_working_set(&fx.store(), None).unwrap();
5326        assert!(
5327            !has(&issues, codes::LOG_OUT_OF_ORDER),
5328            "log ordering is --all only: {issues:#?}"
5329        );
5330    }
5331
5332    // ── working-set scoping ───────────────────────────────────────────────────
5333
5334    #[test]
5335    fn working_set_validates_only_changed_files() {
5336        let fx = Fixture::new();
5337        // `dirty` has a bad timestamp; `clean_but_unlogged` also does but is NOT
5338        // in the log → working set must skip it.
5339        fx.write(
5340            "records/contacts/dirty.md",
5341            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5342        );
5343        fx.write(
5344            "records/contacts/unlogged.md",
5345            "---\ntype: contact\ncreated: ALSO-BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5346        );
5347        fx.write(
5348            "log.md",
5349            "---\ntype: log\n---\n\n## [2026-05-22 10:00] update | records/contacts/dirty\nedited\n",
5350        );
5351        let issues = validate_working_set(&fx.store(), None).unwrap();
5352        assert!(
5353            issues.iter().any(|i| i.code == codes::FM_BAD_TIMESTAMP
5354                && i.file == Path::new("records/contacts/dirty.md")),
5355            "{issues:#?}"
5356        );
5357        assert!(
5358            !issues
5359                .iter()
5360                .any(|i| i.file == Path::new("records/contacts/unlogged.md")),
5361            "unlogged file must not be in the working set: {issues:#?}"
5362        );
5363    }
5364
5365    #[test]
5366    fn working_set_includes_incoming_linkers_to_changed_path() {
5367        let fx = Fixture::new();
5368        // `changed` was renamed/removed (logged). `linker` points at it with a
5369        // now-broken link and was NOT itself logged — but must be pulled in.
5370        fx.write(
5371            "wiki/people/linker.md",
5372            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: links to a removed page\n---\n\nSee [[records/contacts/changed]].\n",
5373        );
5374        // `changed.md` does NOT exist on disk (removed).
5375        fx.write(
5376            "log.md",
5377            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/changed\nremoved\n",
5378        );
5379        let issues = validate_working_set(&fx.store(), None).unwrap();
5380        assert!(
5381            issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN
5382                && i.file == Path::new("wiki/people/linker.md")),
5383            "incoming linker to a removed path must be validated: {issues:#?}"
5384        );
5385    }
5386
5387    #[test]
5388    fn working_set_respects_explicit_since_cutoff() {
5389        let fx = Fixture::new();
5390        fx.write(
5391            "records/contacts/old.md",
5392            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5393        );
5394        fx.write(
5395            "records/contacts/new.md",
5396            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5397        );
5398        fx.write(
5399            "log.md",
5400            concat!(
5401                "---\ntype: log\n---\n\n",
5402                "## [2026-05-20 10:00] update | records/contacts/old\nx\n\n",
5403                "## [2026-05-25 10:00] update | records/contacts/new\nx\n",
5404            ),
5405        );
5406        // Cutoff after `old` but before `new`.
5407        let since = DateTime::parse_from_rfc3339("2026-05-22T00:00:00+00:00").unwrap();
5408        let issues = validate_working_set(&fx.store(), Some(since)).unwrap();
5409        assert!(
5410            issues
5411                .iter()
5412                .any(|i| i.file == Path::new("records/contacts/new.md")),
5413            "{issues:#?}"
5414        );
5415        assert!(
5416            !issues
5417                .iter()
5418                .any(|i| i.file == Path::new("records/contacts/old.md")),
5419            "old change is before the cutoff: {issues:#?}"
5420        );
5421    }
5422
5423    #[test]
5424    fn working_set_default_since_is_last_validate_entry() {
5425        let fx = Fixture::new();
5426        // `before` changed before the last validate; `after` changed after.
5427        fx.write(
5428            "records/contacts/before.md",
5429            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5430        );
5431        fx.write(
5432            "records/contacts/after.md",
5433            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5434        );
5435        fx.write(
5436            "log.md",
5437            concat!(
5438                "---\ntype: log\n---\n\n",
5439                "## [2026-05-20 10:00] update | records/contacts/before\nx\n\n",
5440                "## [2026-05-21 10:00] validate\nPASS\n\n",
5441                "## [2026-05-22 10:00] update | records/contacts/after\nx\n",
5442            ),
5443        );
5444        let issues = validate_working_set(&fx.store(), None).unwrap();
5445        assert!(
5446            issues
5447                .iter()
5448                .any(|i| i.file == Path::new("records/contacts/after.md")),
5449            "{issues:#?}"
5450        );
5451        assert!(
5452            !issues
5453                .iter()
5454                .any(|i| i.file == Path::new("records/contacts/before.md")),
5455            "change before the last validate entry is outside the default window: {issues:#?}"
5456        );
5457    }
5458
5459    // ── ordering / determinism ────────────────────────────────────────────────
5460
5461    #[test]
5462    fn issues_are_sorted_by_file_then_line() {
5463        let fx = Fixture::new();
5464        fx.write("wiki/people/z.md", "---\ntype: wiki-page\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
5465        fx.write("wiki/people/a.md", "---\ntype: wiki-page\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
5466        let issues = fx.store_all();
5467        let files: Vec<&PathBuf> = issues.iter().map(|i| &i.file).collect();
5468        let mut sorted = files.clone();
5469        sorted.sort();
5470        assert_eq!(
5471            files, sorted,
5472            "issues must be emitted in a stable file order"
5473        );
5474    }
5475
5476    // ── boundaries: codes validate must NOT emit ──────────────────────────────
5477
5478    #[test]
5479    fn frozen_page_is_not_a_validate_error() {
5480        // POLICY_FROZEN_PAGE is a *write-time* refusal, never a validate finding.
5481        // A clean file listed in `### Frozen pages` must validate clean.
5482        let mut fx = Fixture::new();
5483        fx.config
5484            .frozen_pages
5485            .push(PathBuf::from("records/decisions/d.md"));
5486        fx.write(
5487            "records/decisions/d.md",
5488            "---\ntype: decision\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a finalized decision\n---\n\n# D\n",
5489        );
5490        let issues = fx.store_all();
5491        assert!(
5492            !has(&issues, codes::POLICY_FROZEN_PAGE),
5493            "frozen pages are enforced at write-time, not by validate: {issues:#?}"
5494        );
5495    }
5496
5497    #[test]
5498    fn wiki_link_ambiguous_is_never_emitted_under_full_path_doctrine() {
5499        // The full-path doctrine makes ambiguity impossible; the defensive code
5500        // must never fire on a normal store.
5501        let fx = Fixture::new();
5502        fx.write("records/contacts/sarah-chen.md", &valid_contact("sarah"));
5503        let mut body = valid_contact("links to sarah");
5504        body.push_str("\nSee [[records/contacts/sarah-chen]].\n");
5505        fx.write("wiki/people/p.md", &body);
5506        let issues = fx.store_all();
5507        assert!(!has(&issues, codes::WIKI_LINK_AMBIGUOUS), "{issues:#?}");
5508    }
5509
5510    // ── unknown-type / unknown-field passthrough ──────────────────────────────
5511
5512    #[test]
5513    fn unknown_type_passes_through() {
5514        // A custom type is ambient context: it has a `type`, so no
5515        // FM_MISSING_TYPE, and with no matching schema there are no schema
5516        // errors. Only the universal contract (summary, timestamps) applies.
5517        let fx = Fixture::new();
5518        fx.write(
5519            "records/proposals/x.md",
5520            "---\ntype: proposal\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a proposal\ncustom_field: anything\nbudget: 5000\n---\n\n# Proposal\n",
5521        );
5522        let issues = fx.store_all();
5523        assert!(!has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
5524        assert!(!has(&issues, codes::SCHEMA_MISSING_REQUIRED), "{issues:#?}");
5525        assert!(!has(&issues, codes::SCHEMA_SHAPE_MISMATCH), "{issues:#?}");
5526        // The unknown fields don't trip anything.
5527        assert!(
5528            !issues
5529                .iter()
5530                .any(|i| i.key.as_deref() == Some("custom_field")
5531                    || i.key.as_deref() == Some("budget")),
5532            "unknown fields are ambient context: {issues:#?}"
5533        );
5534    }
5535
5536    // ── find_links_to prefix-collision safety (working set) ───────────────────
5537
5538    #[test]
5539    fn incoming_linker_scan_does_not_prefix_match() {
5540        // A changed `records/contacts/sarah` must NOT pull in a file that only
5541        // links to `records/contacts/sarah-chen` (a longer path sharing a prefix).
5542        let fx = Fixture::new();
5543        fx.write(
5544            "wiki/people/only-sarah-chen.md",
5545            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
5546        );
5547        // The log says `records/contacts/sarah` (the shorter path) changed.
5548        fx.write(
5549            "log.md",
5550            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah\nremoved\n",
5551        );
5552        let issues = validate_working_set(&fx.store(), None).unwrap();
5553        assert!(
5554            !issues
5555                .iter()
5556                .any(|i| i.file == Path::new("wiki/people/only-sarah-chen.md")),
5557            "a prefix-sharing link must not pull a file into the working set: {issues:#?}"
5558        );
5559    }
5560
5561    #[test]
5562    fn incoming_linker_scan_pulls_in_catalog_index_md() {
5563        // CONTRACT: the working-set incoming-linker scan rides the embedded-
5564        // ripgrep `Store::find_links_to`, which scans EVERY `.md` (including
5565        // `index.md` catalogs) — NOT the walk-and-read over `walk_content_files`,
5566        // which excludes `index.md`. A type-folder `index.md` that lists a now-
5567        // deleted target must be pulled into the working set so its dangling
5568        // catalog entry is flagged `WIKI_LINK_BROKEN`. The old walk-and-read
5569        // implementation skipped `index.md` and let this broken link survive the
5570        // loop silently; this test fails if anyone reverts to that path.
5571        let fx = Fixture::new();
5572        // A catalog that still lists the deleted contact (a real, common stale
5573        // state after a `delete`). No other file references the target, so the
5574        // catalog is the ONLY incoming linker — if it isn't scanned, nothing is.
5575        fx.write(
5576            "records/contacts/index.md",
5577            "---\ntype: index\n---\n\n- [[records/contacts/sarah-chen]] — Sarah Chen\n",
5578        );
5579        // The log says `records/contacts/sarah-chen` was deleted.
5580        fx.write(
5581            "log.md",
5582            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n",
5583        );
5584        let issues = validate_working_set(&fx.store(), None).unwrap();
5585        assert!(
5586            issues
5587                .iter()
5588                .any(|i| i.file == Path::new("records/contacts/index.md")
5589                    && i.code == codes::WIKI_LINK_BROKEN),
5590            "the catalog `index.md` linking to the deleted target must be pulled \
5591             into the working set and flagged WIKI_LINK_BROKEN (proves the scan \
5592             uses embedded-ripgrep `Store::find_links_to`, not the index-skipping \
5593             walk-and-read): {issues:#?}"
5594        );
5595    }
5596
5597    #[test]
5598    fn incoming_linker_scan_covers_the_whole_changed_set_in_one_pass() {
5599        // CONTRACT (the O(changed × store) fix): the working-set scan finds
5600        // incoming linkers for EVERY changed object, and does so via the single
5601        // batch pass `Store::find_links_to_any` — not one full store read per
5602        // changed object. This test pins the behavior that makes the single-pass
5603        // correct: with two DISTINCT deleted targets, the linker to EACH is pulled
5604        // into the working set and flagged. A regression that scanned for only the
5605        // first/last changed object, or that dropped the batch union, would leave
5606        // one of the two broken links unreported and fail here.
5607        let fx = Fixture::new();
5608        // Linker A → deleted target #1 (in the body).
5609        fx.write(
5610            "wiki/people/refers-sarah.md",
5611            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
5612        );
5613        // Linker B → deleted target #2 (in a typed frontmatter field — an edge the
5614        // sidecar `links` projection would miss, which is why this must be a
5615        // content scan, not a sidecar read).
5616        fx.write(
5617            "records/meetings/2026/05/kickoff.md",
5618            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\ncompany: \"[[records/companies/acme]]\"\n---\n\n# Kickoff\n",
5619        );
5620        // The log says BOTH targets were deleted in this window.
5621        fx.write(
5622            "log.md",
5623            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n\n## [2026-05-22 10:05] delete | records/companies/acme\nremoved\n",
5624        );
5625
5626        let issues = validate_working_set(&fx.store(), None).unwrap();
5627        assert!(
5628            issues
5629                .iter()
5630                .any(|i| i.file == Path::new("wiki/people/refers-sarah.md")
5631                    && i.code == codes::WIKI_LINK_BROKEN),
5632            "linker to the FIRST deleted target must be pulled in and flagged: {issues:#?}"
5633        );
5634        assert!(
5635            issues.iter().any(
5636                |i| i.file == Path::new("records/meetings/2026/05/kickoff.md")
5637                    && i.code == codes::WIKI_LINK_BROKEN
5638            ),
5639            "linker to the SECOND deleted target (typed-field edge) must also be \
5640             pulled in and flagged — proves the scan covers the whole changed set, \
5641             not just one object: {issues:#?}"
5642        );
5643    }
5644
5645    #[test]
5646    fn frontmatter_block_sequence_links_each_get_their_own_line() {
5647        // Each block-sequence wiki-link reports on its own source line.
5648        let fx = Fixture::new();
5649        // Neither target exists → two WIKI_LINK_BROKEN, on different lines.
5650        fx.write(
5651            "records/meetings/m.md",
5652            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nparticipants:\n  - [[records/contacts/ghost1]]\n  - [[records/contacts/ghost2]]\n---\n\n# M\n",
5653        );
5654        let issues = fx.store_all();
5655        let broken_lines: BTreeSet<Option<u32>> = issues
5656            .iter()
5657            .filter(|i| i.code == codes::WIKI_LINK_BROKEN)
5658            .map(|i| i.line)
5659            .collect();
5660        assert_eq!(
5661            broken_lines.len(),
5662            2,
5663            "two distinct broken-link lines: {issues:#?}"
5664        );
5665    }
5666
5667    // ── Regression: null / non-scalar created/updated ────────────────────────
5668
5669    #[test]
5670    fn null_created_is_missing_not_silently_passed() {
5671        // Regression: a present-but-`null` `created:` previously slipped past
5672        // both FM_MISSING_CREATED (only `!contains_key` was checked) and
5673        // FM_BAD_TIMESTAMP (`scalar_string(null)` is None → branch no-oped).
5674        let fx = Fixture::new();
5675        fx.write(
5676            "records/contacts/a.md",
5677            "---\ntype: contact\ncreated:\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5678        );
5679        let issues = fx.store_all();
5680        assert!(
5681            has(&issues, codes::FM_MISSING_CREATED),
5682            "null `created:` must read as missing: {issues:#?}"
5683        );
5684    }
5685
5686    #[test]
5687    fn sequence_created_is_bad_timestamp() {
5688        // A non-scalar `created: [2026]` is not a timestamp string → FM_BAD_TIMESTAMP.
5689        let fx = Fixture::new();
5690        fx.write(
5691            "records/contacts/a.md",
5692            "---\ntype: contact\ncreated: [2026]\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5693        );
5694        let issues = fx.store_all();
5695        assert!(
5696            issues
5697                .iter()
5698                .any(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created")),
5699            "a sequence `created:` must be FM_BAD_TIMESTAMP: {issues:#?}"
5700        );
5701    }
5702
5703    // ── Regression: schema required null / empty-collection ──────────────────
5704
5705    #[test]
5706    fn required_field_null_or_empty_collection_is_missing() {
5707        // Regression: a plain required field (no shape/enum) holding YAML null
5708        // (`name:`), an empty list (`name: []`), or an empty mapping (`name: {}`)
5709        // previously validated with 0 issues — `scalar_string` returned None and
5710        // `.unwrap_or(false)` treated the value as non-empty.
5711        for value in ["", " []", " {}"] {
5712            let mut fx = Fixture::new();
5713            fx.config.schemas.insert(
5714                "contact".into(),
5715                Schema {
5716                    fields: vec![FieldSpec {
5717                        name: "name".into(),
5718                        required: true,
5719                        ..Default::default()
5720                    }],
5721                    ..Default::default()
5722                },
5723            );
5724            fx.write(
5725                "records/contacts/a.md",
5726                &format!(
5727                    "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname:{value}\n---\n\n# A\n"
5728                ),
5729            );
5730            let issues = fx.store_all();
5731            assert!(
5732                issues
5733                    .iter()
5734                    .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
5735                        && i.key.as_deref() == Some("name")),
5736                "required `name:{value}` must be SCHEMA_MISSING_REQUIRED: {issues:#?}"
5737            );
5738        }
5739    }
5740
5741    // ── Regression: WIKI_LINK_BROKEN on raw source files ─────────────────────
5742
5743    #[test]
5744    fn wiki_link_to_raw_source_file_resolves() {
5745        // Regression: a body link to a raw `.eml`/`.pdf` source kept verbatim
5746        // under `sources/` was flagged WIKI_LINK_BROKEN because the existence
5747        // probe only ever stat'd `{bare}.md`. It must resolve the literal path.
5748        let fx = Fixture::new();
5749        fx.write("sources/emails/2026-05-22-elena.eml", "raw email bytes\n");
5750        fx.write(
5751            "records/contacts/a.md",
5752            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\nSee [[sources/emails/2026-05-22-elena.eml]] for context.\n",
5753        );
5754        let issues = fx.store_all();
5755        assert!(
5756            !issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN),
5757            "a link to an existing raw source file must not be broken: {issues:#?}"
5758        );
5759    }
5760
5761    // ── Regression: unreadable (non-UTF-8) content file ──────────────────────
5762
5763    #[test]
5764    fn non_utf8_content_file_is_reported() {
5765        // Regression: a content file with invalid UTF-8 bytes made
5766        // check_content_file return None silently, so the store passed with exit
5767        // 0. It must surface FM_UNREADABLE instead of passing vacuously.
5768        let fx = Fixture::new();
5769        let abs = fx.dir.path().join("records/notes/corrupt.md");
5770        fs::create_dir_all(abs.parent().unwrap()).unwrap();
5771        fs::write(&abs, [0xFF, 0xFE, 0x00, 0x01]).unwrap();
5772        let issues = validate_working_set(&fx.store(), None).unwrap();
5773        assert!(
5774            has(&issues, codes::FM_UNREADABLE),
5775            "an unreadable content file must be reported, not silently skipped: {issues:#?}"
5776        );
5777    }
5778
5779    // ── Regression: code-fence char/run tracking ─────────────────────────────
5780
5781    #[test]
5782    fn tilde_fence_containing_backtick_fence_does_not_invert() {
5783        // Regression: a `~~~` block legally contains ``` lines (documenting a
5784        // backtick fence); a naive toggle inverted `in_fence` and checked the
5785        // demo `[[fake]]` inside the code block as a live link. The link inside
5786        // BOTH fences must be skipped.
5787        let body = "~~~markdown\n```\n[[fake-link]]\n```\n~~~\n";
5788        let links = extract_wiki_links(body);
5789        assert!(
5790            links.is_empty(),
5791            "wiki-link inside a nested code fence must be skipped: {links:?}"
5792        );
5793    }
5794
5795    // ── Regression: --all skips in-layer `log/` folder ───────────────────────
5796
5797    #[test]
5798    fn all_sweep_visits_in_layer_log_folder() {
5799        // Regression: `validate --all` pruned every dir named `log`, so a real
5800        // content folder like `records/log/` was invisible to the full sweep —
5801        // reporting FEWER errors than the default scope. A frontmatter-less file
5802        // there must still surface FM_MISSING_TYPE under --all.
5803        let fx = Fixture::new();
5804        fx.write("records/log/2026-06-01-pricing.md", "no frontmatter here\n");
5805        let issues = fx.store_all();
5806        assert!(
5807            has(&issues, codes::FM_MISSING_TYPE),
5808            "--all must validate files under an in-layer `log/` folder: {issues:#?}"
5809        );
5810    }
5811
5812    // ── Regression: flow-form list with whitespace ───────────────────────────
5813
5814    #[test]
5815    fn flow_form_link_list_with_spaces_is_flagged() {
5816        // Regression: `attendees: [ [[a]] ]` parses to the same nested-sequence
5817        // mis-encoding as `[[[a]]]` but evaded the literal `starts_with("[[[")`
5818        // text test. The value-based detector must catch the whitespace variant.
5819        let keys = detect_flow_form_link_lists("attendees: [ [[records/contacts/elena]] ]\n");
5820        assert!(
5821            keys.iter().any(|k| k == "attendees"),
5822            "spaced flow-form list must be detected: {keys:?}"
5823        );
5824    }
5825
5826    // ── Regression: INDEX_SUMMARY_MISMATCH middot tail ───────────────────────
5827
5828    #[test]
5829    fn middot_hashtag_summary_tail_round_trips() {
5830        // Regression: a tagless summary that legitimately ends in a single-spaced
5831        // ` · #word` tail round-trips through the renderer verbatim, but the loose
5832        // ` · ` strip mistook it for the tag block and reported a spurious,
5833        // unfixable INDEX_SUMMARY_MISMATCH. The strip must use the renderer's
5834        // exact double-spaced `  ·  ` delimiter.
5835        assert_eq!(
5836            extract_index_entry_summary("— Standup notes · #standup").as_deref(),
5837            Some("Standup notes · #standup"),
5838            "a single-spaced middot tail is part of the summary, not a tag block"
5839        );
5840        // The renderer's real double-spaced tag suffix IS still stripped.
5841        assert_eq!(
5842            extract_index_entry_summary("— Renewal champion  ·  #renewal #acme").as_deref(),
5843            Some("Renewal champion"),
5844            "the renderer's double-spaced `  ·  #tag` suffix is stripped"
5845        );
5846    }
5847
5848    // ── Regression: shape Url / Email edge cases ─────────────────────────────
5849
5850    #[test]
5851    fn url_shape_accepts_short_http_and_rejects_bare_scheme() {
5852        assert!(is_url("http://x"), "an 8-char http URL is valid");
5853        assert!(is_url("https://x"), "a 9-char https URL is valid");
5854        assert!(!is_url("http://"), "a bare scheme with no host is rejected");
5855        assert!(!is_url("https://"), "a bare https scheme is rejected");
5856    }
5857
5858    #[test]
5859    fn email_shape_rejects_double_at() {
5860        assert!(!is_email("sarah@@acme.com"), "double-@ domain is rejected");
5861        assert!(!is_email("a@b@c.com"), "two @ signs are rejected");
5862        assert!(is_email("sarah@acme.com"), "a normal address still passes");
5863    }
5864
5865    // ── Regression: working-set vs --all agree on log.md links ───────────────
5866
5867    #[test]
5868    fn working_set_does_not_flag_log_md_body_links() {
5869        // Regression: the working-set incoming-linker scan runs root `log.md`
5870        // through the body wiki-link check, flagging a historical `[[deleted]]`
5871        // mention as WIKI_LINK_BROKEN — an error `--all` never reports and that
5872        // the append-only log can't have "fixed". The root meta files must be
5873        // excluded from the body link check, matching --all.
5874        let fx = Fixture::new();
5875        fx.write("records/contacts/a.md", &valid_contact("A"));
5876        fx.write(
5877            "log.md",
5878            "---\ntype: log\n---\n\n## [2026-06-01 10:00] delete | records/contacts/ghost\n\nRemoved [[records/contacts/ghost]] per cleanup.\n",
5879        );
5880        let issues = validate_working_set(&fx.store(), None).unwrap();
5881        assert!(
5882            !issues
5883                .iter()
5884                .any(|i| i.code == codes::WIKI_LINK_BROKEN
5885                    && i.file == std::path::Path::new("log.md")),
5886            "a broken wiki-link inside append-only log.md must not be flagged: {issues:#?}"
5887        );
5888    }
5889
5890    // ── Regression: DB.md schema field lint ──────────────────────────────────
5891
5892    #[test]
5893    fn schema_duplicate_field_name_is_flagged() {
5894        let mut fx = Fixture::new();
5895        fx.config.schemas.insert(
5896            "contact".into(),
5897            Schema {
5898                fields: vec![
5899                    FieldSpec {
5900                        name: "name".into(),
5901                        required: true,
5902                        ..Default::default()
5903                    },
5904                    FieldSpec {
5905                        name: "name".into(),
5906                        ..Default::default()
5907                    },
5908                ],
5909                ..Default::default()
5910            },
5911        );
5912        let issues = fx.store_all();
5913        assert!(
5914            issues
5915                .iter()
5916                .any(|i| i.code == codes::DB_MD_SCHEMA_FIELD && i.key.as_deref() == Some("name")),
5917            "a duplicate schema field name must be flagged: {issues:#?}"
5918        );
5919    }
5920
5921    #[test]
5922    fn schema_unknown_modifier_is_info() {
5923        let mut fx = Fixture::new();
5924        fx.config.schemas.insert(
5925            "contact".into(),
5926            Schema {
5927                fields: vec![FieldSpec {
5928                    name: "name".into(),
5929                    unknown_modifiers: vec!["requierd".into()],
5930                    ..Default::default()
5931                }],
5932                ..Default::default()
5933            },
5934        );
5935        let issues = fx.store_all();
5936        assert!(
5937            issues.iter().any(|i| i.code == codes::DB_MD_SCHEMA_FIELD
5938                && i.severity == Severity::Info
5939                && i.key.as_deref() == Some("name")),
5940            "an unrecognized schema modifier must surface as Info: {issues:#?}"
5941        );
5942    }
5943
5944    /// Every code in `mod codes` must appear as a row in SPEC.md § Validation —
5945    /// the SPEC table is the declared "complete vocabulary" an agent branches on,
5946    /// and the module doc-comment promises this code implements "exactly those
5947    /// codes — no more, no fewer." This guards against the code/SPEC drift where a
5948    /// new validation code is added to the engine but never documented.
5949    #[test]
5950    fn every_code_constant_is_documented_in_spec() {
5951        // Parse the canonical constant *values* straight out of this module's
5952        // source, so a future `pub const X: &str = "X";` is covered with no test
5953        // edit. Format is uniform: `    pub const NAME: &str = "VALUE";`.
5954        let this_src = include_str!("validate.rs");
5955        let mut codes_in_module: Vec<String> = Vec::new();
5956        let mut in_codes_mod = false;
5957        for line in this_src.lines() {
5958            let t = line.trim();
5959            if t.starts_with("pub mod codes") {
5960                in_codes_mod = true;
5961                continue;
5962            }
5963            // The `mod codes` block ends at its closing brace at column 0.
5964            if in_codes_mod && line == "}" {
5965                break;
5966            }
5967            if in_codes_mod {
5968                if let Some(rest) = t.strip_prefix("pub const ") {
5969                    // rest = `NAME: &str = "VALUE";`
5970                    let value = rest
5971                        .split_once('=')
5972                        .map(|(_, v)| v.trim())
5973                        .and_then(|v| v.strip_prefix('"'))
5974                        .and_then(|v| v.strip_suffix("\";"))
5975                        .unwrap_or_else(|| panic!("unparseable code constant line: {line:?}"));
5976                    codes_in_module.push(value.to_string());
5977                }
5978            }
5979        }
5980        assert!(
5981            codes_in_module.len() >= 36,
5982            "parsed only {} code constants from `mod codes`; the parser likely \
5983             broke against a source-format change",
5984            codes_in_module.len()
5985        );
5986
5987        // SPEC.md lives at the repo root, two levels up from this crate's manifest.
5988        let spec_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../../SPEC.md");
5989        let spec = fs::read_to_string(&spec_path)
5990            .unwrap_or_else(|e| panic!("cannot read {}: {e}", spec_path.display()));
5991
5992        // Each code must appear as a SPEC § Validation table cell: `` | `CODE` | ``.
5993        let missing: Vec<&String> = codes_in_module
5994            .iter()
5995            .filter(|code| !spec.contains(&format!("| `{code}` |")))
5996            .collect();
5997        assert!(
5998            missing.is_empty(),
5999            "validation codes emitted by the engine but absent from SPEC.md \
6000             § Validation (the declared complete vocabulary): {missing:?}"
6001        );
6002    }
6003}