Skip to main content

dbmd_core/
validate.rs

1//! `validate` — the validation engine.
2//!
3//! The canonical issue-code vocabulary is **SPEC.md § Validation** (that table
4//! is the single source of truth). This module implements exactly those codes
5//! — no more, no fewer. If a code is added here it must be added to the SPEC
6//! table in the same change. The codes are exposed as the [`codes`] constants
7//! so call sites never spell a code as a bare string literal.
8//!
9//! **Two scopes.** [`validate_working_set`] is the loop default: content files
10//! changed since `since`, plus any file whose wiki-links target a changed path.
11//! The changed set and the per-file checks are O(changed); the incoming linkers
12//! are found by a *single* embedded-ripgrep pass over the store for the whole
13//! changed set at once ([`Store::find_links_to_any`], one scan — not a full read
14//! per changed object, and not the parse-the-tree walk `--all` does). It never
15//! calls [`Store::walk`] and never builds the global cross-file state.
16//! [`validate_all`] is the full SWEEP: it adds the checks that need that global
17//! state — entity-dedup `DUP_*`, every-index sync, and `log.md` ordering.
18//!
19//! ## Why this module is self-contained
20//!
21//! Validation does its own frontmatter split, YAML parse, wiki-link scan,
22//! log-header parse, and file walk here, reading only the two public,
23//! caller-populated fields of a [`Store`]: [`Store::root`] and
24//! [`Store::config`] — rather than routing through the sibling modules
25//! ([`crate::parser`], [`crate::store`], [`crate::log`], [`crate::index`]).
26//! Keeping the checks local lets the validator report precise, per-issue
27//! diagnostics (exact codes, file, and context) without coupling its output to
28//! incidental behavior of the shared readers; the public surface and the
29//! emitted issue vocabulary are the contract.
30
31use std::collections::{BTreeMap, BTreeSet, HashMap};
32use std::path::{Component, Path, PathBuf};
33
34use chrono::{DateTime, FixedOffset, NaiveDateTime};
35use serde_norway::Value;
36
37use crate::parser::{Schema, Shape};
38use crate::store::Store;
39
40/// Severity of a validation [`Issue`]. Any [`Severity::Error`] fails validation
41/// (non-zero exit); warnings and info do not.
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub enum Severity {
44    /// Blocks: a hard violation of the format or doctrine.
45    Error,
46    /// A decision point the agent resolves at its discretion.
47    Warning,
48    /// Visibility only; never affects exit status.
49    Info,
50}
51
52/// A single structured validation finding. Agent-primary and machine-parseable
53/// via `--json`; `suggestion` is a deterministic remediation hint the agent
54/// applies without guessing.
55#[derive(Debug, Clone, PartialEq, Eq)]
56pub struct Issue {
57    /// The severity; only [`Severity::Error`] fails validation.
58    pub severity: Severity,
59    /// The structured code, e.g. `"WIKI_LINK_SHORT_FORM"` — one of [`codes`].
60    pub code: &'static str,
61    /// The file the issue is about.
62    pub file: PathBuf,
63    /// The 1-based line, when applicable.
64    pub line: Option<u32>,
65    /// The frontmatter key, when the issue is about a specific field.
66    pub key: Option<String>,
67    /// A human-readable message.
68    pub message: String,
69    /// A deterministic remediation hint, when one exists.
70    pub suggestion: Option<String>,
71    /// Other files involved (e.g. the duplicate partner in a collision).
72    pub related: Vec<PathBuf>,
73}
74
75impl Issue {
76    /// True if this issue fails validation (i.e. its severity is
77    /// [`Severity::Error`]).
78    pub fn is_error(&self) -> bool {
79        matches!(self.severity, Severity::Error)
80    }
81}
82
83/// The canonical validation issue codes — one constant per row of the SPEC.md
84/// § Validation table. Call sites reference these instead of bare strings so
85/// the code and the SPEC table can never silently drift.
86pub mod codes {
87    /// path has no `DB.md`; not a db.md store.
88    pub const NOT_A_STORE: &str = "NOT_A_STORE";
89    /// the store's `DB.md` is not `type: db-md`.
90    pub const DB_MD_BAD_TYPE: &str = "DB_MD_BAD_TYPE";
91    /// the store's `DB.md` frontmatter lacks `scope` or `owner`.
92    pub const DB_MD_MISSING_FIELD: &str = "DB_MD_MISSING_FIELD";
93    /// `DB.md` has an `##` section other than the three recognized ones.
94    pub const DB_MD_UNKNOWN_SECTION: &str = "DB_MD_UNKNOWN_SECTION";
95    /// content file has no `type:`.
96    pub const FM_MISSING_TYPE: &str = "FM_MISSING_TYPE";
97    /// content file has no `created:`.
98    pub const FM_MISSING_CREATED: &str = "FM_MISSING_CREATED";
99    /// content file has no `updated:`.
100    pub const FM_MISSING_UPDATED: &str = "FM_MISSING_UPDATED";
101    /// frontmatter block isn't valid YAML.
102    pub const FM_MALFORMED_YAML: &str = "FM_MALFORMED_YAML";
103    /// `created` or `updated` isn't ISO-8601.
104    pub const FM_BAD_TIMESTAMP: &str = "FM_BAD_TIMESTAMP";
105    /// content file has no `summary`.
106    pub const SUMMARY_MISSING: &str = "SUMMARY_MISSING";
107    /// `summary` present but empty.
108    pub const SUMMARY_EMPTY: &str = "SUMMARY_EMPTY";
109    /// `summary` contains newlines.
110    pub const SUMMARY_MULTILINE: &str = "SUMMARY_MULTILINE";
111    /// `summary` > 200 chars.
112    pub const SUMMARY_TOO_LONG: &str = "SUMMARY_TOO_LONG";
113    /// wiki-link target isn't a full store-relative path.
114    pub const WIKI_LINK_SHORT_FORM: &str = "WIKI_LINK_SHORT_FORM";
115    /// wiki-link target file doesn't exist.
116    pub const WIKI_LINK_BROKEN: &str = "WIKI_LINK_BROKEN";
117    /// wiki-link target matches multiple files (defensive).
118    pub const WIKI_LINK_AMBIGUOUS: &str = "WIKI_LINK_AMBIGUOUS";
119    /// wiki-link target carries a `.md` extension — drop it.
120    pub const WIKI_LINK_HAS_EXTENSION: &str = "WIKI_LINK_HAS_EXTENSION";
121    /// frontmatter list uses inline `[[[a]], [[b]]]` — use block form.
122    pub const WIKI_LINK_FLOW_FORM_LIST: &str = "WIKI_LINK_FLOW_FORM_LIST";
123    /// two files declare the same explicit `id`.
124    pub const DUP_ID: &str = "DUP_ID";
125    /// two records of a type collide on a `DB.md ## Schemas` `unique:` key.
126    pub const DUP_UNIQUE_KEY: &str = "DUP_UNIQUE_KEY";
127    /// a `DB.md` schema requires a field that's absent.
128    pub const SCHEMA_MISSING_REQUIRED: &str = "SCHEMA_MISSING_REQUIRED";
129    /// a value doesn't match the schema's shape modifier.
130    pub const SCHEMA_SHAPE_MISMATCH: &str = "SCHEMA_SHAPE_MISMATCH";
131    /// a `link to <prefix>/` field has a plain or wrong-prefix value.
132    pub const SCHEMA_LINK_PREFIX_MISMATCH: &str = "SCHEMA_LINK_PREFIX_MISMATCH";
133    /// a value isn't in the schema's `enum`.
134    pub const SCHEMA_ENUM_VIOLATION: &str = "SCHEMA_ENUM_VIOLATION";
135    /// a write was attempted on a `### Frozen pages` path (write-time).
136    pub const POLICY_FROZEN_PAGE: &str = "POLICY_FROZEN_PAGE";
137    /// a file with an `### Ignored types` type exists.
138    pub const POLICY_IGNORED_TYPE_PRESENT: &str = "POLICY_IGNORED_TYPE_PRESENT";
139    /// a `wiki-page` derives from an ignored-type record.
140    pub const POLICY_IGNORED_TYPE_DERIVED: &str = "POLICY_IGNORED_TYPE_DERIVED";
141    /// a `log.md` entry header timestamp is unparseable.
142    pub const LOG_BAD_TIMESTAMP: &str = "LOG_BAD_TIMESTAMP";
143    /// a `log.md` entry kind isn't recognized.
144    pub const LOG_UNKNOWN_KIND: &str = "LOG_UNKNOWN_KIND";
145    /// `log.md` entries aren't in non-decreasing time order (possible rewrite).
146    pub const LOG_OUT_OF_ORDER: &str = "LOG_OUT_OF_ORDER";
147    /// a non-empty canonical folder lacks `index.md`.
148    pub const INDEX_MISSING: &str = "INDEX_MISSING";
149    /// an `index.md` lists a file that no longer exists.
150    pub const INDEX_STALE_ENTRY: &str = "INDEX_STALE_ENTRY";
151    /// a file isn't listed in its folder's `index.md`.
152    pub const INDEX_MISSING_ENTRY: &str = "INDEX_MISSING_ENTRY";
153    /// an `index.md` sits in an empty / non-canonical folder.
154    pub const INDEX_ORPHAN: &str = "INDEX_ORPHAN";
155    /// an index's `scope:` doesn't match its filesystem location.
156    pub const INDEX_WRONG_SCOPE: &str = "INDEX_WRONG_SCOPE";
157    /// an index entry's text doesn't match the target file's `summary`.
158    pub const INDEX_SUMMARY_MISMATCH: &str = "INDEX_SUMMARY_MISMATCH";
159    /// a type-folder's `index.jsonl` twin is missing.
160    pub const INDEX_JSONL_MISSING: &str = "INDEX_JSONL_MISSING";
161    /// a file isn't in the `index.jsonl`, or a jsonl record points at a missing
162    /// file.
163    pub const INDEX_JSONL_DESYNC: &str = "INDEX_JSONL_DESYNC";
164    /// a `index.jsonl` record's fields don't match the file's frontmatter.
165    pub const INDEX_JSONL_STALE: &str = "INDEX_JSONL_STALE";
166    /// `tags` isn't a flat YAML list of short scalar labels.
167    pub const TAGS_MALFORMED: &str = "TAGS_MALFORMED";
168}
169
170/// The SPEC's `summary` length bound (chars). Over it → `SUMMARY_TOO_LONG`.
171const MAX_SUMMARY_LEN: usize = 200;
172
173/// Recognized `log.md` entry kinds (SPEC § `log.md`). Anything else →
174/// `LOG_UNKNOWN_KIND` (warning, not error).
175const RECOGNIZED_LOG_KINDS: &[&str] = &[
176    "ingest",
177    "create",
178    "update",
179    "delete",
180    "rename",
181    "link",
182    "validate",
183    "index-rebuild",
184    "contradiction",
185];
186
187// ─────────────────────────────────────────────────────────────────────────────
188//  Public entrypoints
189// ─────────────────────────────────────────────────────────────────────────────
190
191/// **Loop default.** Validate the working set: content files changed since
192/// `since` (default: the last `validate` entry in `log.md`), plus any file whose
193/// wiki-links target a changed/renamed/removed path. Per-file *checks* only —
194/// none of the cross-file global passes (entity-dedup, every-index sync,
195/// `log.md` ordering) that `--all` adds. If the default call finds no logged
196/// changed objects, it falls back to a per-file content sweep so an externally
197/// edited or freshly copied store cannot pass vacuously.
198///
199/// **Cost.** The changed set is read from `log.md` — O(changed): every
200/// `create`/`update`/`ingest`/`rename`/`delete`/`link` entry newer than the
201/// cutoff names an object. Per-file frontmatter + link-doctrine checks then run
202/// over that set plus its incoming linkers — also O(changed). The one part that
203/// is *not* O(changed) is discovering those incoming linkers: a link to a
204/// changed path can live in the body or a typed frontmatter field of any file,
205/// so it is found by a **single** embedded-ripgrep pass over the store
206/// ([`Store::find_links_to_any`]) for the whole changed set at once — one store
207/// scan, flat in the changed-set size. (It was previously a full store read
208/// *per* changed object — `O(changed × store)`; that is the blow-up this path
209/// no longer pays.) The unavoidable single content scan is the same shape as
210/// free-text `dbmd search`; the sidecar `links` projection can't replace it
211/// because it omits body/typed-field edges.
212pub fn validate_working_set(
213    store: &Store,
214    since: Option<DateTime<FixedOffset>>,
215) -> crate::Result<Vec<Issue>> {
216    if !store_marker_present(store) {
217        return Ok(vec![not_a_store_issue(store)]);
218    }
219
220    let cutoff = match since {
221        Some(ts) => Some(ts),
222        None => last_validate_at(store),
223    };
224
225    // 1. Changed objects, straight from the log (O(changed) — never a walk).
226    let changed = changed_objects_since(store, cutoff);
227    if changed.is_empty() && since.is_none() {
228        return validate_content_sweep(store);
229    }
230
231    // 2. Add every file with an incoming wiki-link to a changed/renamed/removed
232    //    path (the linker may now be stale even though it didn't change). The
233    //    incoming-linker scan is `Store::find_links_to_any` — ONE embedded-ripgrep
234    //    pass over the store for the WHOLE changed set (one `.md` walk, one
235    //    presence-only/early-exit scan per file), not one walk per object. This
236    //    is the fix for the `O(changed × store)` blow-up that calling
237    //    `find_links_to` in a loop produced (a full store read per changed
238    //    object); the cost is now a single store scan regardless of how many
239    //    objects changed. A returned self-link is harmlessly deduped by the set
240    //    (the object is already inserted below).
241    let changed_targets: Vec<PathBuf> = changed.iter().cloned().collect();
242    let mut working: BTreeSet<PathBuf> = changed;
243    for linker in store.find_links_to_any(&changed_targets)? {
244        working.insert(linker);
245    }
246
247    let mut issues = Vec::new();
248    for rel in &working {
249        let abs = store.root.join(rel);
250        // A changed path can be a *deletion* — skip files that no longer exist;
251        // the incoming-linker scan above already flagged links into them.
252        if !abs.is_file() {
253            continue;
254        }
255        // `None` basename index: the working-set pass does not build the
256        // store-wide basename map (that is a `--all`-only structure), so a bare
257        // short-form target is reported as plain `WIKI_LINK_SHORT_FORM` and the
258        // `--all` sweep does the ambiguity upgrade.
259        check_content_file(store, rel, &abs, None, &mut issues);
260    }
261    issues.sort_by(issue_order);
262    Ok(issues)
263}
264
265fn validate_content_sweep(store: &Store) -> crate::Result<Vec<Issue>> {
266    let mut issues = Vec::new();
267    for rel in store.walk()? {
268        let abs = store.root.join(&rel);
269        check_content_file(store, &rel, &abs, None, &mut issues);
270    }
271    issues.sort_by(issue_order);
272    Ok(issues)
273}
274
275/// **Full SWEEP (O(store)).** Validate every file, every link, and every index,
276/// adding the cross-file checks that need global state: entity-dedup `DUP_*`,
277/// every-index sync (md + jsonl), and `log.md` ordering. CI / recovery, not the
278/// loop.
279pub fn validate_all(store: &Store) -> crate::Result<Vec<Issue>> {
280    if !store_marker_present(store) {
281        return Ok(vec![not_a_store_issue(store)]);
282    }
283
284    let mut issues = Vec::new();
285
286    // Store-identity file: `DB.md` shape (type / required fields / section
287    // headers). A single root file, checked once in the sweep — not a content
288    // file (it carries no `summary`), so it is not part of `walk_content_files`.
289    check_db_md(store, &mut issues);
290
291    let files = walk_content_files(&store.root);
292
293    // The basename index makes the short-form wiki-link check able to upgrade a
294    // bare-basename target to `WIKI_LINK_AMBIGUOUS` when it matches ≥2 files.
295    // Built once from the already-gathered sweep list (no extra walk); only the
296    // `--all` path has it (the working-set path stays O(changed)).
297    let basenames = build_basename_index(&files);
298
299    // Per-file checks over the whole store.
300    let mut parsed: Vec<(PathBuf, Parsed)> = Vec::new();
301    for rel in &files {
302        let abs = store.root.join(rel);
303        if let Some(p) = check_content_file(store, rel, &abs, Some(&basenames), &mut issues) {
304            parsed.push((rel.clone(), p));
305        }
306    }
307
308    // Cross-file: hard `id` + soft schema-declared `unique:` dedup collisions.
309    check_duplicates(store, &parsed, &mut issues);
310
311    // Cross-file: hierarchical index.md + index.jsonl sync.
312    check_indexes(store, &files, &mut issues);
313
314    // Cross-file: log.md well-formedness + ordering.
315    check_log(store, &mut issues);
316
317    issues.sort_by(issue_order);
318    Ok(issues)
319}
320
321// ─────────────────────────────────────────────────────────────────────────────
322//  Per-file content checks (shared by both scopes)
323// ─────────────────────────────────────────────────────────────────────────────
324
325/// What `validate_all`'s cross-file pass needs from a per-file parse: the
326/// parsed YAML mapping (for dedup keys) and the raw frontmatter text (for
327/// text-based wiki-link extraction). The body and fence-line are consumed
328/// inline during the per-file pass and not carried here.
329struct Parsed {
330    /// The parsed top-level YAML mapping, keyed by string. `None` ⇒ malformed
331    /// YAML (a `FM_MALFORMED_YAML` was already emitted).
332    fm: Option<BTreeMap<String, Value>>,
333    /// The raw frontmatter YAML text (between the fences) — the source for
334    /// text-based wiki-link extraction in dedup.
335    fm_yaml: String,
336}
337
338/// Run every per-file check on one content file, pushing issues. Returns the
339/// parsed file so `validate_all` can reuse it for cross-file checks. Returns
340/// `None` only when the file is unreadable or has no frontmatter block at all
341/// (which for a content file is itself reported).
342fn check_content_file(
343    store: &Store,
344    rel: &Path,
345    abs: &Path,
346    basenames: Option<&BasenameIndex>,
347    issues: &mut Vec<Issue>,
348) -> Option<Parsed> {
349    let text = match std::fs::read_to_string(abs) {
350        Ok(t) => t,
351        Err(_) => return None,
352    };
353
354    let is_content = is_content_file(rel);
355
356    let (fm_yaml, body, fm_end_line) = match split_frontmatter(&text) {
357        Some(split) => split,
358        None => {
359            // No frontmatter at all. For a content file that means there's no
360            // `type:` and no `summary:` — report both the way a parsed-but-empty
361            // file would, so the agent gets the same actionable codes.
362            if is_content {
363                push(
364                    issues,
365                    Severity::Error,
366                    codes::FM_MISSING_TYPE,
367                    rel,
368                    None,
369                    Some("type".into()),
370                    "content file has no frontmatter `type:`".into(),
371                    Some("add a YAML frontmatter block with `type:`".into()),
372                    vec![],
373                );
374                push(
375                    issues,
376                    Severity::Error,
377                    codes::SUMMARY_MISSING,
378                    rel,
379                    None,
380                    Some("summary".into()),
381                    "content file has no `summary`".into(),
382                    Some("run `dbmd fm init`".into()),
383                    vec![],
384                );
385            }
386            return None;
387        }
388    };
389
390    // Parse the YAML block.
391    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
392        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
393        // An empty frontmatter block parses as Null; treat as an empty mapping.
394        Ok(Value::Null) => Some(BTreeMap::new()),
395        Ok(_) => {
396            // A scalar / sequence at the top level isn't a frontmatter mapping.
397            // Anchor to line 1 — the frontmatter block's opening `---`; the whole
398            // block is opaque, so there is no single offending field line.
399            push(
400                issues,
401                Severity::Error,
402                codes::FM_MALFORMED_YAML,
403                rel,
404                Some(1),
405                None,
406                "frontmatter is not a YAML mapping".into(),
407                Some("repair the frontmatter YAML mapping, then rerun `dbmd validate`".into()),
408                vec![],
409            );
410            None
411        }
412        Err(e) => {
413            // Anchor to line 1 (the opening `---`): an unparseable block has no
414            // single offending field line; the agent re-reads the whole block.
415            push(
416                issues,
417                Severity::Error,
418                codes::FM_MALFORMED_YAML,
419                rel,
420                Some(1),
421                None,
422                format!("frontmatter block isn't valid YAML: {e}"),
423                Some("repair the frontmatter YAML block, then rerun `dbmd validate`".into()),
424                vec![],
425            );
426            None
427        }
428    };
429
430    if let Some(map) = &fm {
431        // The detailed frontmatter checks only run when the YAML parsed.
432        check_frontmatter(store, rel, map, &fm_yaml, basenames, issues, is_content);
433    }
434
435    // Wiki-link doctrine checks run on the body of every content file (and
436    // also on index/log meta files, whose entries are wiki-links too).
437    check_body_wiki_links(store, rel, &body, fm_end_line, basenames, issues);
438
439    Some(Parsed { fm, fm_yaml })
440}
441
442/// All frontmatter-level checks for a content file with valid YAML.
443fn check_frontmatter(
444    store: &Store,
445    rel: &Path,
446    fm: &BTreeMap<String, Value>,
447    fm_yaml: &str,
448    basenames: Option<&BasenameIndex>,
449    issues: &mut Vec<Issue>,
450    is_content: bool,
451) {
452    let type_ = fm.get("type").and_then(scalar_string);
453
454    // ── type ────────────────────────────────────────────────────────────────
455    if is_content && type_.is_none() {
456        push(
457            issues,
458            Severity::Error,
459            codes::FM_MISSING_TYPE,
460            rel,
461            fm_key_line_or_top(fm_yaml, "type"),
462            Some("type".into()),
463            "content file has no `type:`".into(),
464            Some("add a `type:` field (e.g. `type: contact`)".into()),
465            vec![],
466        );
467    }
468
469    // ── summary (universal on content files) ──────────────────────────────────
470    if is_content {
471        check_summary(rel, fm, fm_yaml, issues);
472    }
473
474    // ── timestamps: created / updated ─────────────────────────────────────────
475    for (key, missing_code) in [
476        ("created", codes::FM_MISSING_CREATED),
477        ("updated", codes::FM_MISSING_UPDATED),
478    ] {
479        if is_content && !fm.contains_key(key) {
480            push(
481                issues,
482                Severity::Error,
483                missing_code,
484                rel,
485                fm_key_line_or_top(fm_yaml, key),
486                Some(key.into()),
487                format!("content file has no `{key}:` timestamp"),
488                Some(format!(
489                    "set `{key}` to an RFC3339 timestamp, e.g. 2026-05-27T08:00:00-07:00"
490                )),
491                vec![],
492            );
493        } else if let Some(v) = fm.get(key) {
494            if let Some(s) = scalar_string(v) {
495                if !is_iso8601(&s) {
496                    push(
497                        issues,
498                        Severity::Error,
499                        codes::FM_BAD_TIMESTAMP,
500                        rel,
501                        fm_key_line(fm_yaml, key),
502                        Some(key.into()),
503                        format!("`{key}` is not ISO-8601: {s:?}"),
504                        Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
505                        vec![],
506                    );
507                }
508            }
509        }
510    }
511    // ── tags shape ────────────────────────────────────────────────────────────
512    if let Some(tags) = fm.get("tags") {
513        if !is_flat_scalar_list(tags) {
514            push(
515                issues,
516                Severity::Warning,
517                codes::TAGS_MALFORMED,
518                rel,
519                fm_key_line(fm_yaml, "tags"),
520                Some("tags".into()),
521                "`tags` must be a flat YAML list of short scalar labels".into(),
522                Some("use block form: one `- <tag>` per line".into()),
523                vec![],
524            );
525        }
526    }
527
528    // ── inline flow-form wiki-link lists in frontmatter ──────────────────────
529    for key in detect_flow_form_link_lists(fm_yaml) {
530        push(
531            issues,
532            Severity::Error,
533            codes::WIKI_LINK_FLOW_FORM_LIST,
534            rel,
535            fm_key_line(fm_yaml, &key),
536            Some(key.clone()),
537            format!("`{key}` uses inline flow form `[[[a]], [[b]]]`"),
538            Some("use YAML block-sequence form: one `- [[...]]` per line".into()),
539            vec![],
540        );
541    }
542
543    // ── frontmatter wiki-link fields: doctrine + integrity ───────────────────
544    // Skip keys that have an explicit `link to` schema spec — those are checked
545    // (with prefix enforcement) in `check_schema`, and double-reporting the same
546    // link via two paths would be noise.
547    let schema_link_keys: BTreeSet<String> =
548        effective_schema(store, type_.as_deref().unwrap_or(""))
549            .map(|s| {
550                s.fields
551                    .iter()
552                    .filter(|f| f.link_prefix.is_some())
553                    .map(|f| f.name.clone())
554                    .collect()
555            })
556            .unwrap_or_default();
557    for (key, link) in frontmatter_link_fields_text(fm_yaml, 2) {
558        if schema_link_keys.contains(&key) {
559            continue;
560        }
561        check_wiki_link(
562            store,
563            rel,
564            &link,
565            Some(link.line),
566            Some(&key),
567            basenames,
568            issues,
569        );
570    }
571
572    // ── policies: ignored types ──────────────────────────────────────────────
573    if let Some(t) = &type_ {
574        if store.config.ignored_types.iter().any(|it| it == t) {
575            push(
576                issues,
577                Severity::Info,
578                codes::POLICY_IGNORED_TYPE_PRESENT,
579                rel,
580                fm_key_line(fm_yaml, "type"),
581                Some("type".into()),
582                format!("file has ignored type `{t}` (per DB.md ## Policies)"),
583                Some(
584                    "change the `type`, or remove it from DB.md `### Ignored types` if it should be managed"
585                        .into(),
586                ),
587                // The policy source: `DB.md` declares the ignored type.
588                vec![PathBuf::from("DB.md")],
589            );
590        }
591        // A wiki-page deriving from an ignored-type record → warning. The
592        // decision lives in the shared `derived_from_ignored_type` entry point;
593        // this side only supplies the `derived_from` targets (with their line,
594        // which the issue carries) and renders the finding.
595        for link in frontmatter_links_for_key(fm_yaml, "derived_from", 2) {
596            if let Some(hit) =
597                derived_from_ignored_type(store, t, std::iter::once(link.target.as_str()))
598            {
599                push(
600                    issues,
601                    Severity::Warning,
602                    codes::POLICY_IGNORED_TYPE_DERIVED,
603                    rel,
604                    Some(link.line),
605                    Some("derived_from".into()),
606                    format!(
607                        "wiki-page derives from ignored-type record `{}` (type `{}`)",
608                        hit.target, hit.target_type
609                    ),
610                    Some(
611                        "drop this `derived_from` link, or remove the target type from DB.md `### Ignored types`"
612                            .into(),
613                    ),
614                    // The ignored-type source record, plus `DB.md` (the policy
615                    // source that lists the ignored type).
616                    vec![
617                        PathBuf::from(format!("{}.md", hit.target)),
618                        PathBuf::from("DB.md"),
619                    ],
620                );
621            }
622        }
623    }
624
625    // ── schema enforcement: DB.md ## Schemas (the only schema source) ─────────
626    if let Some(t) = &type_ {
627        if let Some(schema) = effective_schema(store, t) {
628            check_schema(store, rel, fm, fm_yaml, &schema, issues);
629        }
630    }
631}
632
633/// `summary` rules: required, non-empty, single-line, ≤ 200 chars.
634fn check_summary(rel: &Path, fm: &BTreeMap<String, Value>, fm_yaml: &str, issues: &mut Vec<Issue>) {
635    let line = fm_key_line(fm_yaml, "summary");
636    match fm.get("summary") {
637        None => push(
638            issues,
639            Severity::Error,
640            codes::SUMMARY_MISSING,
641            rel,
642            // A missing `summary` key has no line of its own → anchor to the
643            // frontmatter block top (line 1), the EXPECTED field-absence rule.
644            fm_key_line_or_top(fm_yaml, "summary"),
645            Some("summary".into()),
646            "content file has no `summary`".into(),
647            Some("run `dbmd fm init`".into()),
648            vec![],
649        ),
650        Some(v) => {
651            let s = scalar_string(v).unwrap_or_default();
652            if s.trim().is_empty() {
653                push(
654                    issues,
655                    Severity::Error,
656                    codes::SUMMARY_EMPTY,
657                    rel,
658                    line,
659                    Some("summary".into()),
660                    "`summary` is present but empty".into(),
661                    Some("write a one-line summary, or run `dbmd fm init`".into()),
662                    vec![],
663                );
664            } else if s.contains('\n') {
665                push(
666                    issues,
667                    Severity::Error,
668                    codes::SUMMARY_MULTILINE,
669                    rel,
670                    line,
671                    Some("summary".into()),
672                    "`summary` must be one line (contains a newline)".into(),
673                    Some("collapse the summary to a single line".into()),
674                    vec![],
675                );
676            } else if s.chars().count() > MAX_SUMMARY_LEN {
677                push(
678                    issues,
679                    Severity::Warning,
680                    codes::SUMMARY_TOO_LONG,
681                    rel,
682                    line,
683                    Some("summary".into()),
684                    format!(
685                        "`summary` is {} chars (> {MAX_SUMMARY_LEN})",
686                        s.chars().count()
687                    ),
688                    Some(format!("trim the summary to ≤ {MAX_SUMMARY_LEN} chars")),
689                    vec![],
690                );
691            }
692        }
693    }
694}
695
696/// Wiki-link checks for a body. Per-link doctrine (`WIKI_LINK_*`).
697fn check_body_wiki_links(
698    store: &Store,
699    rel: &Path,
700    body: &str,
701    fm_end_line: u32,
702    basenames: Option<&BasenameIndex>,
703    issues: &mut Vec<Issue>,
704) {
705    for link in extract_wiki_links(body) {
706        // Body lines are offset past the frontmatter block. `link.line` is
707        // 1-based within `body`; the body starts at `fm_end_line + 1`.
708        let abs_line = fm_end_line + link.line;
709        check_wiki_link(store, rel, &link, Some(abs_line), None, basenames, issues);
710    }
711}
712
713/// A store-wide map from a file's bare basename (its stem, no `.md`) to every
714/// store-relative path carrying that basename. Built once per `validate --all`
715/// sweep so the short-form wiki-link check can distinguish a merely short-form
716/// target (`WIKI_LINK_SHORT_FORM`) from one that is *ambiguous* because the bare
717/// basename matches two or more files (`WIKI_LINK_AMBIGUOUS`, the defensive
718/// code). `None` in the working-set path — that loop is O(changed) and never
719/// walks the store, so it reports the plain short-form error without the scan.
720type BasenameIndex = HashMap<String, Vec<PathBuf>>;
721
722/// Build the [`BasenameIndex`] from the swept file list (already gathered by
723/// `validate_all`; no extra walk).
724fn build_basename_index(files: &[PathBuf]) -> BasenameIndex {
725    let mut idx: BasenameIndex = HashMap::new();
726    for rel in files {
727        if let Some(stem) = rel.file_stem().and_then(|s| s.to_str()) {
728            idx.entry(stem.to_string()).or_default().push(rel.clone());
729        }
730    }
731    idx
732}
733
734/// The shared per-wiki-link doctrine + integrity check used by both body links
735/// and frontmatter link-fields. `basenames` is `Some` only in the `--all`
736/// sweep, where a no-slash short-form target is upgraded to `WIKI_LINK_AMBIGUOUS`
737/// when its bare basename matches ≥2 files.
738fn check_wiki_link(
739    store: &Store,
740    rel: &Path,
741    link: &Link,
742    line: Option<u32>,
743    key: Option<&str>,
744    basenames: Option<&BasenameIndex>,
745    issues: &mut Vec<Issue>,
746) {
747    let bare = link.target.trim_end_matches(".md");
748
749    // Short-form: not a full store-relative path (no `/`, or first segment isn't
750    // a known layer).
751    if !is_full_store_path(bare) {
752        // Ambiguous (defensive) takes precedence over plain short-form when the
753        // target is a bare basename (no `/`) that matches ≥2 files in the store.
754        // Only computable in the sweep (where `basenames` is populated); the
755        // working-set path falls through to the plain short-form error.
756        if !bare.contains('/') {
757            if let Some(idx) = basenames {
758                if let Some(matches) = idx.get(bare) {
759                    if matches.len() >= 2 {
760                        let mut related = matches.clone();
761                        related.sort();
762                        push(
763                            issues,
764                            Severity::Error,
765                            codes::WIKI_LINK_AMBIGUOUS,
766                            rel,
767                            line,
768                            key.map(str::to_string),
769                            format!(
770                                "short-form wiki-link `[[{}]]` matches multiple files",
771                                link.target
772                            ),
773                            Some("use the full store-relative path to disambiguate".into()),
774                            related,
775                        );
776                        return;
777                    }
778                }
779            }
780        }
781        push(
782            issues,
783            Severity::Error,
784            codes::WIKI_LINK_SHORT_FORM,
785            rel,
786            line,
787            key.map(str::to_string),
788            format!(
789                "wiki-link `[[{}]]` is not a full store-relative path",
790                link.target
791            ),
792            short_form_suggestion(bare),
793            vec![],
794        );
795        // Don't also report broken; the agent must fix the form first.
796        return;
797    }
798
799    // `.md` extension → warning, then still check existence.
800    if link.target.ends_with(".md") {
801        push(
802            issues,
803            Severity::Warning,
804            codes::WIKI_LINK_HAS_EXTENSION,
805            rel,
806            line,
807            key.map(str::to_string),
808            format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
809            Some(format!("drop the extension: [[{bare}]]")),
810            vec![],
811        );
812    }
813
814    let Some(target_rel) = safe_md_target_rel(bare) else {
815        push(
816            issues,
817            Severity::Error,
818            codes::WIKI_LINK_BROKEN,
819            rel,
820            line,
821            key.map(str::to_string),
822            format!("wiki-link target `{bare}` is not a safe store-relative path"),
823            Some("use a full store-relative path under sources/, records/, or wiki/".into()),
824            vec![],
825        );
826        return;
827    };
828
829    // Broken: target file doesn't exist (O(1) stat).
830    let target_abs = store.root.join(target_rel);
831    if !target_abs.is_file() {
832        push(
833            issues,
834            Severity::Error,
835            codes::WIKI_LINK_BROKEN,
836            rel,
837            line,
838            key.map(str::to_string),
839            format!("wiki-link target `{bare}` doesn't exist"),
840            Some(format!(
841                "create `{bare}.md`, or point the link at an existing file"
842            )),
843            vec![],
844        );
845    }
846}
847
848// ─────────────────────────────────────────────────────────────────────────────
849//  Schema enforcement (user-declared DB.md ## Schemas — the only source)
850// ─────────────────────────────────────────────────────────────────────────────
851
852/// The effective schema for a type: the store's explicit `DB.md ## Schemas`
853/// block, or `None`. This is the **only** source of schema enforcement — the
854/// toolkit ships no implicit or built-in per-type schema (SPEC § Schemas). A
855/// store that wants its `contact` / `expense` / etc. fields enforced declares
856/// them in `## Schemas`; the example schema pack in SPEC § Example types is a
857/// copy-in starting point.
858fn effective_schema(store: &Store, type_: &str) -> Option<Schema> {
859    store.config.schemas.get(type_).cloned()
860}
861
862/// Validate a file's frontmatter against a schema's [`FieldSpec`]s.
863fn check_schema(
864    store: &Store,
865    rel: &Path,
866    fm: &BTreeMap<String, Value>,
867    fm_yaml: &str,
868    schema: &Schema,
869    issues: &mut Vec<Issue>,
870) {
871    for spec in &schema.fields {
872        let present = fm.get(&spec.name);
873        let line = fm_key_line(fm_yaml, &spec.name);
874
875        // Required.
876        let is_empty = match present {
877            None => true,
878            Some(v) => scalar_string(v)
879                .map(|s| s.trim().is_empty())
880                .unwrap_or(false),
881        };
882        if spec.required && is_empty {
883            push(
884                issues,
885                Severity::Error,
886                codes::SCHEMA_MISSING_REQUIRED,
887                rel,
888                // Absent key → anchor to the frontmatter top (line 1); a
889                // present-but-empty value keeps its own line.
890                fm_key_line_or_top(fm_yaml, &spec.name),
891                Some(spec.name.clone()),
892                format!("required field `{}` is absent or empty", spec.name),
893                Some(format!("set `{}` to a non-empty value", spec.name)),
894                vec![],
895            );
896            continue;
897        }
898        let Some(value) = present else { continue };
899
900        // An OPTIONAL field that is `null` or empty is simply unset — there is
901        // no value to shape/enum/link-check. (The required+empty case already
902        // returned above as `SCHEMA_MISSING_REQUIRED`.) Without this, an
903        // `paid_at: null` on an `invoice` whose schema marks `paid_at (date)`
904        // would wrongly fire `SCHEMA_SHAPE_MISMATCH` against the empty string.
905        let value_empty = value.is_null()
906            || scalar_string(value)
907                .map(|s| s.trim().is_empty())
908                .unwrap_or(false);
909        if !spec.required && value_empty {
910            continue;
911        }
912
913        // link to <prefix>/ — extract the link target(s) from the raw frontmatter
914        // text (unquoted `[[...]]` is a YAML nested-sequence, not a string).
915        if let Some(prefix) = &spec.link_prefix {
916            check_schema_link(store, rel, &spec.name, fm_yaml, prefix, line, issues);
917            continue; // a link field is never also shape/enum-checked
918        }
919
920        // enum
921        if let Some(allowed) = &spec.enum_values {
922            if let Some(s) = scalar_string(value) {
923                if !allowed.iter().any(|a| a == &s) {
924                    push(
925                        issues,
926                        Severity::Error,
927                        codes::SCHEMA_ENUM_VIOLATION,
928                        rel,
929                        line,
930                        Some(spec.name.clone()),
931                        format!("`{}` value {s:?} not in enum {allowed:?}", spec.name),
932                        Some(format!("use one of: {}", allowed.join(", "))),
933                        vec![],
934                    );
935                }
936            }
937            continue;
938        }
939
940        // shape
941        if let Some(shape) = spec.shape {
942            check_schema_shape(rel, &spec.name, value, shape, line, issues);
943        }
944    }
945}
946
947/// `link to <prefix>/` enforcement: the value must be a wiki-link whose target
948/// starts with `<prefix>`. Reads the link target(s) from the raw frontmatter
949/// text so unquoted `field: [[...]]` (a YAML nested-sequence, not a string) is
950/// recognized exactly like the quoted form.
951fn check_schema_link(
952    store: &Store,
953    rel: &Path,
954    field: &str,
955    fm_yaml: &str,
956    prefix: &Path,
957    line: Option<u32>,
958    issues: &mut Vec<Issue>,
959) {
960    let prefix_str = prefix.to_string_lossy();
961    let prefix_str = prefix_str.trim_end_matches('/');
962    let suggestion = |target_leaf: &str| {
963        Some(format!(
964            "expected `link to {prefix_str}/`; replace with [[{prefix_str}/{target_leaf}]]"
965        ))
966    };
967
968    let links = frontmatter_links_for_key(fm_yaml, field, 2);
969    if links.is_empty() {
970        // No wiki-link in the field's value → it's a plain string.
971        let raw = frontmatter_raw_value_for_key(fm_yaml, field, 2).unwrap_or_default();
972        let raw = raw.trim().trim_matches('"').trim_matches('\'').trim();
973        let leaf = slugish(raw);
974        push(
975            issues,
976            Severity::Error,
977            codes::SCHEMA_LINK_PREFIX_MISMATCH,
978            rel,
979            line,
980            Some(field.to_string()),
981            format!(
982                "`{field}` is a plain string {raw:?}, expected a wiki-link under `{prefix_str}/`"
983            ),
984            suggestion(&leaf),
985            vec![],
986        );
987        return;
988    }
989
990    for link in links {
991        if link.target.ends_with(".md") {
992            let bare = link.target.trim_end_matches(".md");
993            push(
994                issues,
995                Severity::Warning,
996                codes::WIKI_LINK_HAS_EXTENSION,
997                rel,
998                Some(link.line),
999                Some(field.to_string()),
1000                format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
1001                Some(format!("drop the extension: [[{bare}]]")),
1002                vec![],
1003            );
1004        }
1005        let bare = link.target.trim_end_matches(".md");
1006        if !path_under_prefix(bare, prefix_str) {
1007            let leaf = bare.rsplit('/').next().unwrap_or(bare);
1008            push(
1009                issues,
1010                Severity::Error,
1011                codes::SCHEMA_LINK_PREFIX_MISMATCH,
1012                rel,
1013                line,
1014                Some(field.to_string()),
1015                format!("`{field}` target `{bare}` is not under `{prefix_str}/`"),
1016                suggestion(leaf),
1017                vec![],
1018            );
1019        } else {
1020            let Some(target_rel) = safe_md_target_rel(bare) else {
1021                push(
1022                    issues,
1023                    Severity::Error,
1024                    codes::WIKI_LINK_BROKEN,
1025                    rel,
1026                    line,
1027                    Some(field.to_string()),
1028                    format!("wiki-link target `{bare}` is not a safe store-relative path"),
1029                    Some(
1030                        "use a full store-relative path under sources/, records/, or wiki/".into(),
1031                    ),
1032                    vec![],
1033                );
1034                continue;
1035            };
1036            // Correct prefix — still surface a broken target so the agent sees
1037            // one consistent vocabulary.
1038            let target_abs = store.root.join(target_rel);
1039            if !target_abs.is_file() {
1040                push(
1041                    issues,
1042                    Severity::Error,
1043                    codes::WIKI_LINK_BROKEN,
1044                    rel,
1045                    line,
1046                    Some(field.to_string()),
1047                    format!("wiki-link target `{bare}` doesn't exist"),
1048                    Some(format!(
1049                        "create `{bare}.md`, or point the link at an existing file"
1050                    )),
1051                    vec![],
1052                );
1053            }
1054        }
1055    }
1056}
1057
1058/// Shape enforcement for a non-link, non-enum schema field.
1059fn check_schema_shape(
1060    rel: &Path,
1061    field: &str,
1062    value: &Value,
1063    shape: Shape,
1064    line: Option<u32>,
1065    issues: &mut Vec<Issue>,
1066) {
1067    let s = scalar_string(value).unwrap_or_default();
1068    let ok = match shape {
1069        Shape::String => true, // any scalar string
1070        Shape::Int => value.is_i64() || value.is_u64() || s.trim().parse::<i64>().is_ok(),
1071        Shape::Bool => value.is_bool() || matches!(s.trim(), "true" | "false"),
1072        Shape::Date => is_iso8601_date_or_datetime(&s),
1073        Shape::Email => is_email(&s),
1074        Shape::Currency => is_currency(&s),
1075        Shape::Url => is_url(&s),
1076    };
1077    if !ok {
1078        push(
1079            issues,
1080            Severity::Error,
1081            codes::SCHEMA_SHAPE_MISMATCH,
1082            rel,
1083            line,
1084            Some(field.to_string()),
1085            format!("`{field}` value {s:?} doesn't match shape {shape:?}"),
1086            Some(shape_suggestion(shape)),
1087            vec![],
1088        );
1089    }
1090}
1091
1092// ─────────────────────────────────────────────────────────────────────────────
1093//  Cross-file: entity-dedup collisions (validate_all only)
1094// ─────────────────────────────────────────────────────────────────────────────
1095
1096/// Hard `DUP_ID` + the soft, schema-declared `DUP_UNIQUE_KEY` collisions.
1097///
1098/// `DUP_ID` is universal (two files with the same explicit `id`).
1099/// `DUP_UNIQUE_KEY` is driven entirely by the store's `DB.md ## Schemas`: each
1100/// `- unique: <field>[, <field> …]` directive on a `### <type>` declares a
1101/// uniqueness constraint, and two records of that type whose declared values
1102/// collide warn. No type carries a built-in dedup key — the store opts in.
1103///
1104/// **Reporting precedence (rule #1 in `corpus-b-edges/EXPECTED/README.md`):** a
1105/// collision group of N files yields exactly ONE issue, not N. Its `file` is the
1106/// lexicographically smallest store-relative path in the group (a total order →
1107/// deterministic); `related` is the rest, sorted. A single-field key anchors to
1108/// that field's line on the reported file and carries it as `key`; a multi-field
1109/// key anchors to line 1 with a null key.
1110fn check_duplicates(store: &Store, parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
1111    // Path → frontmatter YAML, for resolving the anchor field's line on the
1112    // reported (smallest-path) member.
1113    let fm_yaml_of: HashMap<&PathBuf, &str> = parsed
1114        .iter()
1115        .map(|(rel, p)| (rel, p.fm_yaml.as_str()))
1116        .collect();
1117
1118    // ── DUP_ID (hard error): two files with the same explicit `id`. ──────────
1119    let mut by_id: HashMap<String, Vec<PathBuf>> = HashMap::new();
1120    for (rel, p) in parsed {
1121        if let Some(map) = &p.fm {
1122            if let Some(id) = map.get("id").and_then(scalar_string) {
1123                if !id.trim().is_empty() {
1124                    by_id.entry(id).or_default().push(rel.clone());
1125                }
1126            }
1127        }
1128    }
1129    for (id, files) in &by_id {
1130        if files.len() > 1 {
1131            let (reported, related) = canonical_and_related(files);
1132            let line = fm_yaml_of.get(&reported).and_then(|y| fm_key_line(y, "id"));
1133            push(
1134                issues,
1135                Severity::Error,
1136                codes::DUP_ID,
1137                &reported,
1138                line,
1139                Some("id".into()),
1140                format!("id {id:?} is declared by more than one file"),
1141                Some("give each file a unique `id` (or drop it to derive from the path)".into()),
1142                related,
1143            );
1144        }
1145    }
1146
1147    // ── DUP_UNIQUE_KEY (warning): schema-declared `unique:` collisions. ───────
1148    // Every constraint comes from the store's `## Schemas`; a type with no
1149    // `unique:` directive is never dedup-checked. Iteration over the BTreeMap is
1150    // key-ordered, so emitted issues are deterministic across runs.
1151    for (type_name, schema) in &store.config.schemas {
1152        for key_fields in &schema.unique_keys {
1153            soft_dup(parsed, issues, type_name, key_fields, &fm_yaml_of);
1154        }
1155    }
1156}
1157
1158/// Emit ONE `DUP_UNIQUE_KEY` warning per group of ≥2 files of `type_` whose
1159/// declared `key_fields` render to the same token tuple. Files missing any key
1160/// field are skipped — an incomplete key is never a collision.
1161///
1162/// Per reporting rule #1 the issue is keyed on the lexicographically smallest
1163/// store-relative path; `related` is the rest. A single-field key anchors to
1164/// that field's line on the reported file and carries it as `key`; a multi-field
1165/// key anchors to line 1 with a null key. `fm_yaml_of` resolves the field line.
1166fn soft_dup(
1167    parsed: &[(PathBuf, Parsed)],
1168    issues: &mut Vec<Issue>,
1169    type_: &str,
1170    key_fields: &[String],
1171    fm_yaml_of: &HashMap<&PathBuf, &str>,
1172) {
1173    if key_fields.is_empty() {
1174        return;
1175    }
1176    let mut groups: HashMap<Vec<String>, Vec<PathBuf>> = HashMap::new();
1177    for (rel, p) in parsed {
1178        let is_type =
1179            p.fm.as_ref()
1180                .and_then(|m| m.get("type"))
1181                .and_then(scalar_string)
1182                .map(|t| t == type_)
1183                .unwrap_or(false);
1184        if !is_type {
1185            continue;
1186        }
1187        if let Some(key) = dedup_key(p, key_fields) {
1188            groups.entry(key).or_default().push(rel.clone());
1189        }
1190    }
1191    // HashMap iteration is nondeterministic; sort by reported member so the
1192    // emitted issue order is stable across runs.
1193    let mut collisions: Vec<(PathBuf, Vec<PathBuf>)> = groups
1194        .values()
1195        .filter(|files| files.len() > 1)
1196        .map(|files| canonical_and_related(files))
1197        .collect();
1198    collisions.sort_by(|a, b| a.0.cmp(&b.0));
1199
1200    let fields_disp = key_fields.join(", ");
1201    for (reported, related) in collisions {
1202        // Single-field keys anchor to the field's line + carry the key; multi-
1203        // field keys anchor to line 1 with a null key.
1204        let (line, key) = if key_fields.len() == 1 {
1205            (
1206                fm_yaml_of
1207                    .get(&reported)
1208                    .and_then(|y| fm_key_line(y, &key_fields[0])),
1209                Some(key_fields[0].clone()),
1210            )
1211        } else {
1212            (Some(1), None)
1213        };
1214        let n = related.len();
1215        push(
1216            issues,
1217            Severity::Warning,
1218            codes::DUP_UNIQUE_KEY,
1219            &reported,
1220            line,
1221            key,
1222            format!("`{type_}` unique key ({fields_disp}) collides with {n} other record(s)"),
1223            Some("merge with `dbmd rename`, or cross-link with `dbmd link`".into()),
1224            related,
1225        );
1226    }
1227}
1228
1229/// Render a type's `unique:` key for one file: each field's dedup token in
1230/// order, or `None` if any field is absent/empty (an incomplete key never
1231/// collides).
1232fn dedup_key(p: &Parsed, key_fields: &[String]) -> Option<Vec<String>> {
1233    let mut out = Vec::with_capacity(key_fields.len());
1234    for f in key_fields {
1235        out.push(dedup_token(p, f)?);
1236    }
1237    Some(out)
1238}
1239
1240/// One field's normalized dedup token, or `None` when absent/empty. Wiki-link
1241/// values (single or block-sequence list) reduce to their lower-cased target
1242/// path(s); a list collapses to a sorted, de-duplicated set so item order never
1243/// matters. Plain scalars (and YAML scalar lists) lower-case and trim.
1244fn dedup_token(p: &Parsed, field: &str) -> Option<String> {
1245    // Wiki-links first — read from the raw frontmatter text so the unquoted
1246    // `field: [[...]]` (a YAML nested-sequence, not a string) is handled.
1247    let links = frontmatter_links_for_key(&p.fm_yaml, field, 2);
1248    if !links.is_empty() {
1249        let set: BTreeSet<String> = links
1250            .into_iter()
1251            .map(|l| l.target.trim_end_matches(".md").to_lowercase())
1252            .filter(|t| !t.is_empty())
1253            .collect();
1254        return if set.is_empty() {
1255            None
1256        } else {
1257            Some(set.into_iter().collect::<Vec<_>>().join(","))
1258        };
1259    }
1260    match p.fm.as_ref()?.get(field) {
1261        Some(Value::Sequence(items)) => {
1262            let set: BTreeSet<String> = items
1263                .iter()
1264                .filter_map(scalar_string)
1265                .map(|s| s.trim().to_lowercase())
1266                .filter(|t| !t.is_empty())
1267                .collect();
1268            if set.is_empty() {
1269                None
1270            } else {
1271                Some(set.into_iter().collect::<Vec<_>>().join(","))
1272            }
1273        }
1274        Some(v) => {
1275            let s = scalar_string(v)?.trim().to_lowercase();
1276            if s.is_empty() {
1277                None
1278            } else {
1279                Some(s)
1280            }
1281        }
1282        None => None,
1283    }
1284}
1285
1286/// Split a non-empty collision group into `(reported, related)`: the
1287/// lexicographically smallest store-relative path is the reported member; the
1288/// rest, sorted ascending, are `related`. Deterministic because store-relative
1289/// path is a total order — the property reporting rule #1 relies on.
1290fn canonical_and_related(files: &[PathBuf]) -> (PathBuf, Vec<PathBuf>) {
1291    let mut sorted = files.to_vec();
1292    sorted.sort();
1293    let reported = sorted[0].clone();
1294    let related = sorted[1..].to_vec();
1295    (reported, related)
1296}
1297
1298// ─────────────────────────────────────────────────────────────────────────────
1299//  Cross-file: hierarchical index.md + index.jsonl sync (validate_all only)
1300// ─────────────────────────────────────────────────────────────────────────────
1301
1302/// All `INDEX_*` and `INDEX_JSONL_*` checks across the three canonical levels.
1303fn check_indexes(store: &Store, files: &[PathBuf], issues: &mut Vec<Issue>) {
1304    // Group content files by their immediate parent folder (the type-folder,
1305    // *across date shards* — a sharded file's "type folder" is the folder right
1306    // under the layer). We key on the type-folder so shards roll up correctly.
1307    let mut type_folders: BTreeMap<PathBuf, Vec<PathBuf>> = BTreeMap::new();
1308    let mut layers_present: BTreeSet<&'static str> = BTreeSet::new();
1309    for rel in files {
1310        // The layer is the first path component — recorded independently of the
1311        // type-folder so a layer containing only loose files still requires an
1312        // `index.md`.
1313        if let Some(layer) = rel.iter().next().and_then(|s| s.to_str()) {
1314            match layer {
1315                "sources" => layers_present.insert("sources"),
1316                "records" => layers_present.insert("records"),
1317                "wiki" => layers_present.insert("wiki"),
1318                _ => false,
1319            };
1320        }
1321        if let Some(tf) = type_folder_of(rel) {
1322            type_folders.entry(tf).or_default().push(rel.clone());
1323        }
1324    }
1325
1326    // ── Root index.md ─────────────────────────────────────────────────────────
1327    if !files.is_empty() {
1328        let root_index = store.root.join("index.md");
1329        if !root_index.is_file() {
1330            push(
1331                issues,
1332                Severity::Error,
1333                codes::INDEX_MISSING,
1334                Path::new("index.md"),
1335                None,
1336                None,
1337                "store has files but no root `index.md`".into(),
1338                Some("run `dbmd index rebuild`".into()),
1339                vec![],
1340            );
1341        } else {
1342            check_index_scope(store, Path::new("index.md"), "root", None, issues);
1343        }
1344    }
1345
1346    // ── Layer index.md ────────────────────────────────────────────────────────
1347    for layer in &layers_present {
1348        let layer_index_rel = PathBuf::from(layer).join("index.md");
1349        let abs = store.root.join(&layer_index_rel);
1350        if !abs.is_file() {
1351            push(
1352                issues,
1353                Severity::Error,
1354                codes::INDEX_MISSING,
1355                &layer_index_rel,
1356                None,
1357                None,
1358                format!("layer `{layer}/` has files but no `index.md`"),
1359                Some("run `dbmd index rebuild`".into()),
1360                vec![],
1361            );
1362        } else {
1363            check_index_scope(store, &layer_index_rel, "layer", Some(layer), issues);
1364        }
1365    }
1366
1367    // ── Type-folder index.md + index.jsonl ───────────────────────────────────
1368    for (tf, members) in &type_folders {
1369        let index_md_rel = tf.join("index.md");
1370        let index_md_abs = store.root.join(&index_md_rel);
1371        let index_md_present = index_md_abs.is_file();
1372        if !index_md_present {
1373            // The whole folder index is absent → a single `INDEX_MISSING` keyed
1374            // on the FOLDER (not the would-be `index.md` path). When the index is
1375            // entirely missing we do NOT additionally evaluate per-entry
1376            // completeness or the `index.jsonl` twin: one `INDEX_MISSING` covers
1377            // the folder (precedence rule #4 in `corpus-b-edges/EXPECTED`).
1378            push(
1379                issues,
1380                Severity::Error,
1381                codes::INDEX_MISSING,
1382                tf,
1383                None,
1384                None,
1385                format!("non-empty folder `{}` has no index.md", tf.display()),
1386                Some(format!(
1387                    "run `dbmd index rebuild --folder {}`",
1388                    tf.display()
1389                )),
1390                vec![],
1391            );
1392            continue;
1393        }
1394
1395        check_index_scope(store, &index_md_rel, "type-folder", tf.to_str(), issues);
1396        check_type_folder_index_md(store, tf, &index_md_rel, members, issues);
1397
1398        // index.jsonl twin — must exist and be complete (uncapped). Only checked
1399        // when the `index.md` is present (above): a folder whose entire index is
1400        // missing is one `INDEX_MISSING`, not also an `INDEX_JSONL_MISSING`.
1401        let jsonl_rel = tf.join("index.jsonl");
1402        let jsonl_abs = store.root.join(&jsonl_rel);
1403        if !jsonl_abs.is_file() {
1404            push(
1405                issues,
1406                Severity::Error,
1407                codes::INDEX_JSONL_MISSING,
1408                &jsonl_rel,
1409                None,
1410                None,
1411                format!("type-folder `{}/` has no `index.jsonl` twin", tf.display()),
1412                Some("run `dbmd index rebuild`".into()),
1413                vec![],
1414            );
1415        } else {
1416            check_type_folder_index_jsonl(store, tf, &jsonl_rel, members, issues);
1417        }
1418    }
1419
1420    // ── Orphan index.md: an index file in a folder with no content. ──────────
1421    for rel in walk_index_files(&store.root) {
1422        let parent = rel.parent().unwrap_or(Path::new("")).to_path_buf();
1423        let parent_str = parent.to_string_lossy().to_string();
1424        let is_canonical = parent_str.is_empty() // root
1425            || matches!(parent_str.as_str(), "sources" | "records" | "wiki")
1426            || type_folders.contains_key(&parent);
1427        if !is_canonical {
1428            push(
1429                issues,
1430                Severity::Warning,
1431                codes::INDEX_ORPHAN,
1432                &rel,
1433                None,
1434                None,
1435                format!(
1436                    "`{}` sits in an empty or non-canonical folder",
1437                    rel.display()
1438                ),
1439                Some("remove it, or run `dbmd index rebuild`".into()),
1440                vec![],
1441            );
1442        }
1443    }
1444}
1445
1446/// Check a type-folder `index.md`'s entries against the folder's actual files:
1447/// stale entries (target gone), missing entries (file not listed), and
1448/// summary mismatches.
1449fn check_type_folder_index_md(
1450    store: &Store,
1451    tf: &Path,
1452    index_rel: &Path,
1453    members: &[PathBuf],
1454    issues: &mut Vec<Issue>,
1455) {
1456    let abs = store.root.join(index_rel);
1457    let Ok(text) = std::fs::read_to_string(&abs) else {
1458        return;
1459    };
1460    let entries = parse_index_entries(&text);
1461
1462    let listed: BTreeSet<PathBuf> = entries
1463        .iter()
1464        .map(|e| PathBuf::from(e.target.trim_end_matches(".md")))
1465        .collect();
1466
1467    // Stale entries + summary mismatch.
1468    for entry in &entries {
1469        let bare = entry.target.trim_end_matches(".md");
1470        let Some(target_rel) = safe_md_target_rel(bare) else {
1471            push(
1472                issues,
1473                Severity::Error,
1474                codes::INDEX_STALE_ENTRY,
1475                index_rel,
1476                Some(entry.line),
1477                None,
1478                format!("index entry `[[{bare}]]` is not a safe store-relative path"),
1479                Some("run `dbmd index rebuild`".into()),
1480                vec![],
1481            );
1482            continue;
1483        };
1484        let target_abs = store.root.join(target_rel);
1485        if !target_abs.is_file() {
1486            push(
1487                issues,
1488                Severity::Error,
1489                codes::INDEX_STALE_ENTRY,
1490                index_rel,
1491                Some(entry.line),
1492                None,
1493                format!("index entry `[[{bare}]]` points at a missing file"),
1494                Some("run `dbmd index rebuild`".into()),
1495                // The stale target the entry names (the file that no longer
1496                // exists) — so the agent can locate the dangling reference.
1497                vec![PathBuf::from(format!("{bare}.md"))],
1498            );
1499            continue;
1500        }
1501        // Summary mismatch: the entry text must equal the file's `summary`.
1502        if let Some(expected) = read_summary(&target_abs) {
1503            if let Some(text_part) = &entry.summary_text {
1504                if text_part.trim() != expected.trim() {
1505                    push(
1506                        issues,
1507                        Severity::Error,
1508                        codes::INDEX_SUMMARY_MISMATCH,
1509                        index_rel,
1510                        Some(entry.line),
1511                        None,
1512                        format!("index entry for `{bare}` text doesn't match the file's `summary`"),
1513                        Some("run `dbmd index rebuild`".into()),
1514                        vec![PathBuf::from(format!("{bare}.md"))],
1515                    );
1516                }
1517            }
1518        }
1519    }
1520
1521    // Missing entries: a member file not listed. Skip the index/log meta files.
1522    // The browse view caps at 500; only flag a missing entry when the folder is
1523    // under the cap (a capped folder legitimately omits older files).
1524    let content_members: Vec<&PathBuf> = members.iter().filter(|m| is_content_file(m)).collect();
1525    if content_members.len() <= 500 {
1526        for m in content_members {
1527            let bare = PathBuf::from(m.to_string_lossy().trim_end_matches(".md").to_string());
1528            if !listed.contains(&bare) {
1529                push(
1530                    issues,
1531                    Severity::Error,
1532                    codes::INDEX_MISSING_ENTRY,
1533                    index_rel,
1534                    None,
1535                    None,
1536                    format!(
1537                        "file `{}` is not listed in its folder's `index.md`",
1538                        m.display()
1539                    ),
1540                    Some("run `dbmd index rebuild`".into()),
1541                    vec![(*m).clone()],
1542                );
1543            }
1544        }
1545    }
1546    let _ = tf;
1547}
1548
1549/// Check a type-folder `index.jsonl` twin: it must list **every** file in the
1550/// folder (uncapped), every record must point at a real file, and each record's
1551/// fields must match the file's frontmatter.
1552fn check_type_folder_index_jsonl(
1553    store: &Store,
1554    tf: &Path,
1555    jsonl_rel: &Path,
1556    members: &[PathBuf],
1557    issues: &mut Vec<Issue>,
1558) {
1559    let abs = store.root.join(jsonl_rel);
1560    let Ok(text) = std::fs::read_to_string(&abs) else {
1561        return;
1562    };
1563
1564    // Parse records (last-write-wins by path), tolerating tombstones/blank lines.
1565    let mut records: BTreeMap<PathBuf, serde_json::Value> = BTreeMap::new();
1566    for (i, line) in text.lines().enumerate() {
1567        let line = line.trim();
1568        if line.is_empty() {
1569            continue;
1570        }
1571        let rec: serde_json::Value = match serde_json::from_str(line) {
1572            Ok(v) => v,
1573            Err(e) => {
1574                push(
1575                    issues,
1576                    Severity::Error,
1577                    codes::INDEX_JSONL_DESYNC,
1578                    jsonl_rel,
1579                    Some((i + 1) as u32),
1580                    None,
1581                    format!("`index.jsonl` line {} is not valid JSON: {e}", i + 1),
1582                    Some("run `dbmd index rebuild`".into()),
1583                    vec![],
1584                );
1585                continue;
1586            }
1587        };
1588        if let Some(path) = rec.get("path").and_then(|v| v.as_str()) {
1589            if !is_safe_store_relative_path(Path::new(path)) {
1590                push(
1591                    issues,
1592                    Severity::Error,
1593                    codes::INDEX_JSONL_DESYNC,
1594                    jsonl_rel,
1595                    Some((i + 1) as u32),
1596                    None,
1597                    format!("`index.jsonl` record path `{path}` is not a safe store-relative path"),
1598                    Some("run `dbmd index rebuild`".into()),
1599                    vec![],
1600                );
1601                continue;
1602            }
1603            records.insert(PathBuf::from(path), rec);
1604        }
1605    }
1606
1607    let member_set: BTreeSet<PathBuf> = members
1608        .iter()
1609        .filter(|m| is_content_file(m))
1610        .cloned()
1611        .collect();
1612
1613    // jsonl record → missing file = desync.
1614    for path in records.keys() {
1615        let target_abs = store.root.join(path);
1616        if !target_abs.is_file() {
1617            push(
1618                issues,
1619                Severity::Error,
1620                codes::INDEX_JSONL_DESYNC,
1621                jsonl_rel,
1622                None,
1623                None,
1624                format!(
1625                    "`index.jsonl` record points at missing file `{}`",
1626                    path.display()
1627                ),
1628                Some("run `dbmd index rebuild`".into()),
1629                vec![],
1630            );
1631        }
1632    }
1633
1634    // file not in jsonl = desync (the jsonl is the complete twin — no cap).
1635    for m in &member_set {
1636        if !records.contains_key(m) {
1637            push(
1638                issues,
1639                Severity::Error,
1640                codes::INDEX_JSONL_DESYNC,
1641                jsonl_rel,
1642                None,
1643                None,
1644                format!(
1645                    "file `{}` is missing from the complete `index.jsonl`",
1646                    m.display()
1647                ),
1648                Some("run `dbmd index rebuild`".into()),
1649                vec![m.clone()],
1650            );
1651        }
1652    }
1653
1654    // Record fields stale vs. frontmatter. SPEC § Validation defines
1655    // `INDEX_JSONL_STALE` as "an `index.jsonl` record's fields don't match the
1656    // file's frontmatter" — ANY field, not just `summary`/`type`. The query and
1657    // search paths read every field straight from these sidecars (`tags`,
1658    // `links`, `created`, `updated`, plus type-specific `email` / `domain` /
1659    // `company` / `amount` / `vendor` …), so a single field left unchecked lets
1660    // a stale value answer queries with data that exists in no `.md` file.
1661    //
1662    // Rather than re-list (and drift from) every projected key, rebuild the
1663    // record the canonical projection would write for this file
1664    // ([`IndexRecord::expected_from_file`], the same path `index rebuild` uses)
1665    // and diff the two as flat JSON maps. Every key the projection emits is
1666    // covered automatically; `path` is the join key and is skipped.
1667    for (path, rec) in &records {
1668        let target_abs = store.root.join(path);
1669        if !target_abs.is_file() {
1670            continue;
1671        }
1672        let Ok(expected) = crate::index::IndexRecord::expected_from_file(&target_abs, path.clone())
1673        else {
1674            continue; // unreadable / unparseable frontmatter is reported elsewhere
1675        };
1676        let Ok(expected_json) = serde_json::to_value(&expected) else {
1677            continue;
1678        };
1679        let (Some(have), Some(want)) = (rec.as_object(), expected_json.as_object()) else {
1680            continue;
1681        };
1682
1683        // Compare the union of keys present on either side; a key the file
1684        // projects but the sidecar omits is just as stale as a wrong value.
1685        let mut mismatched_keys: BTreeSet<&str> = BTreeSet::new();
1686        for key in have.keys().chain(want.keys()) {
1687            if key == "path" {
1688                continue;
1689            }
1690            if have.get(key) != want.get(key) {
1691                mismatched_keys.insert(key);
1692            }
1693        }
1694
1695        if !mismatched_keys.is_empty() {
1696            let keys: Vec<&str> = mismatched_keys.into_iter().collect();
1697            push(
1698                issues,
1699                Severity::Error,
1700                codes::INDEX_JSONL_STALE,
1701                jsonl_rel,
1702                None,
1703                Some(keys.join(",")),
1704                format!(
1705                    "`index.jsonl` record for `{}` is stale ({})",
1706                    path.display(),
1707                    keys.join(", ")
1708                ),
1709                Some("run `dbmd index rebuild`".into()),
1710                vec![path.clone()],
1711            );
1712        }
1713    }
1714    let _ = tf;
1715}
1716
1717/// Check an index's `scope:` frontmatter against its filesystem location.
1718fn check_index_scope(
1719    store: &Store,
1720    index_rel: &Path,
1721    expected_scope: &str,
1722    expected_folder: Option<&str>,
1723    issues: &mut Vec<Issue>,
1724) {
1725    let abs = store.root.join(index_rel);
1726    let Ok(text) = std::fs::read_to_string(&abs) else {
1727        return;
1728    };
1729    let Some((yaml, _, _)) = split_frontmatter(&text) else {
1730        return;
1731    };
1732    let Ok(Value::Mapping(map)) = serde_norway::from_str::<Value>(&yaml) else {
1733        return;
1734    };
1735    let fm = yaml_map_to_btree(&map);
1736
1737    if let Some(scope) = fm.get("scope").and_then(scalar_string) {
1738        // Accept "type-folder" and the SPEC example's looser "folder" alias.
1739        let scope_ok =
1740            scope == expected_scope || (expected_scope == "type-folder" && scope == "folder");
1741        if !scope_ok {
1742            push(
1743                issues,
1744                Severity::Warning,
1745                codes::INDEX_WRONG_SCOPE,
1746                index_rel,
1747                fm_key_line(&yaml, "scope"),
1748                Some("scope".into()),
1749                format!(
1750                    "index `scope: {scope}` doesn't match location (expected `{expected_scope}`)"
1751                ),
1752                Some(format!("set `scope: {expected_scope}`")),
1753                vec![],
1754            );
1755        }
1756    }
1757    // folder: must match for layer/type-folder indexes.
1758    if let Some(expected) = expected_folder {
1759        if let Some(folder) = fm.get("folder").and_then(scalar_string) {
1760            if folder.trim_end_matches('/') != expected.trim_end_matches('/') {
1761                push(
1762                    issues,
1763                    Severity::Warning,
1764                    codes::INDEX_WRONG_SCOPE,
1765                    index_rel,
1766                    fm_key_line(&yaml, "folder"),
1767                    Some("folder".into()),
1768                    format!("index `folder: {folder}` doesn't match location `{expected}`"),
1769                    Some(format!("set `folder: {expected}`")),
1770                    vec![],
1771                );
1772            }
1773        }
1774    }
1775}
1776
1777// ─────────────────────────────────────────────────────────────────────────────
1778//  Cross-file: log.md well-formedness + ordering (validate_all only)
1779// ─────────────────────────────────────────────────────────────────────────────
1780
1781/// `LOG_*` checks: bad timestamps, unknown kinds, out-of-order entries.
1782fn check_log(store: &Store, issues: &mut Vec<Issue>) {
1783    let log_rel = Path::new("log.md");
1784    let abs = store.root.join(log_rel);
1785    let Ok(text) = std::fs::read_to_string(&abs) else {
1786        return;
1787    };
1788
1789    let mut prev: Option<DateTime<FixedOffset>> = None;
1790    for (i, line) in text.lines().enumerate() {
1791        if !line.starts_with("## [") {
1792            continue;
1793        }
1794        let line_no = (i + 1) as u32;
1795        match parse_log_header(line) {
1796            None => push(
1797                issues,
1798                Severity::Error,
1799                codes::LOG_BAD_TIMESTAMP,
1800                log_rel,
1801                Some(line_no),
1802                None,
1803                format!("log entry header has an unparseable timestamp: {line:?}"),
1804                Some("use `## [YYYY-MM-DD HH:MM] <kind> | <object>`".into()),
1805                vec![],
1806            ),
1807            Some((ts, kind, _object)) => {
1808                if !RECOGNIZED_LOG_KINDS.contains(&kind.as_str()) {
1809                    push(
1810                        issues,
1811                        Severity::Warning,
1812                        codes::LOG_UNKNOWN_KIND,
1813                        log_rel,
1814                        Some(line_no),
1815                        None,
1816                        format!("log entry kind `{kind}` is not recognized"),
1817                        Some(format!("use one of: {}", RECOGNIZED_LOG_KINDS.join(", "))),
1818                        vec![],
1819                    );
1820                }
1821                if let Some(p) = prev {
1822                    if ts < p {
1823                        push(
1824                            issues,
1825                            Severity::Warning,
1826                            codes::LOG_OUT_OF_ORDER,
1827                            log_rel,
1828                            Some(line_no),
1829                            None,
1830                            "log entry is older than the entry above it (possible rewrite)".into(),
1831                            Some("append corrective entries; never reorder past ones".into()),
1832                            vec![],
1833                        );
1834                    }
1835                }
1836                prev = Some(ts);
1837            }
1838        }
1839    }
1840}
1841
1842// ─────────────────────────────────────────────────────────────────────────────
1843//  Self-contained primitives (collapse onto sibling modules once they land)
1844// ─────────────────────────────────────────────────────────────────────────────
1845
1846/// A minimal wiki-link found in a body: target, optional display, 1-based line.
1847struct Link {
1848    target: String,
1849    line: u32,
1850}
1851
1852/// True if the store marker (`DB.md`, uppercase) is present at the root. On a
1853/// case-insensitive filesystem `db.md` would also match `DB.md`; we require the
1854/// exact-cased directory entry to be present.
1855fn store_marker_present(store: &Store) -> bool {
1856    let want = store.root.join("DB.md");
1857    if !want.is_file() {
1858        return false;
1859    }
1860    // Reject a case-folded match (`db.md`) on case-insensitive filesystems.
1861    match std::fs::read_dir(&store.root) {
1862        Ok(entries) => entries
1863            .flatten()
1864            .any(|e| e.file_name().to_str() == Some("DB.md")),
1865        Err(_) => true, // can't enumerate; trust the is_file() above
1866    }
1867}
1868
1869/// Validate the store's identity file, `DB.md`: its frontmatter `type:` must be
1870/// `db-md`, it must carry both `scope` and `owner`, and its body may contain
1871/// only the three recognized `##` sections (`Agent instructions`, `Policies`,
1872/// `Schemas`).
1873///
1874/// `DB.md` is not a content file (no `summary`), so it is checked here rather
1875/// than through `check_content_file`. The marker presence is established by the
1876/// caller (`store_marker_present`); a malformed-frontmatter `DB.md` still counts
1877/// as a store (the marker is the filename), so we report its shape rather than
1878/// `NOT_A_STORE`. Issues anchor to `DB.md` as the store-relative path.
1879fn check_db_md(store: &Store, issues: &mut Vec<Issue>) {
1880    let rel = Path::new("DB.md");
1881    let abs = store.root.join("DB.md");
1882    let Ok(text) = std::fs::read_to_string(&abs) else {
1883        return; // marker present but unreadable: nothing more to say.
1884    };
1885
1886    let Some((fm_yaml, body, fm_end_line)) = split_frontmatter(&text) else {
1887        // No frontmatter block at all → it cannot declare `type: db-md` and has
1888        // neither required field. Report the type and both missing fields,
1889        // anchored to line 1 (the would-be opening fence).
1890        push(
1891            issues,
1892            Severity::Error,
1893            codes::DB_MD_BAD_TYPE,
1894            rel,
1895            Some(1),
1896            Some("type".into()),
1897            "DB.md has no frontmatter; it must declare `type: db-md`".into(),
1898            Some("add a `---` frontmatter block with `type: db-md`".into()),
1899            vec![],
1900        );
1901        for field in ["scope", "owner"] {
1902            push(
1903                issues,
1904                Severity::Error,
1905                codes::DB_MD_MISSING_FIELD,
1906                rel,
1907                Some(1),
1908                Some(field.into()),
1909                format!("DB.md frontmatter is missing required field `{field}`"),
1910                Some(format!("add `{field}:` to the DB.md frontmatter")),
1911                vec![],
1912            );
1913        }
1914        return;
1915    };
1916
1917    // Parse the frontmatter mapping. If it doesn't parse, we can still say the
1918    // identity contract is unmet (no provable `type: db-md`, no provable fields).
1919    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
1920        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
1921        Ok(Value::Null) => Some(BTreeMap::new()),
1922        _ => None,
1923    };
1924
1925    match &fm {
1926        Some(map) => {
1927            // ── type: db-md ──────────────────────────────────────────────────
1928            let type_ = map.get("type").and_then(scalar_string);
1929            if type_.as_deref() != Some("db-md") {
1930                let (line, msg) = match &type_ {
1931                    Some(t) => (
1932                        fm_key_line(&fm_yaml, "type"),
1933                        format!("DB.md has `type: {t}`; a store's DB.md must be `type: db-md`"),
1934                    ),
1935                    None => (
1936                        Some(1),
1937                        "DB.md frontmatter has no `type:`; it must be `type: db-md`".to_string(),
1938                    ),
1939                };
1940                push(
1941                    issues,
1942                    Severity::Error,
1943                    codes::DB_MD_BAD_TYPE,
1944                    rel,
1945                    line,
1946                    Some("type".into()),
1947                    msg,
1948                    Some("set `type: db-md` in the DB.md frontmatter".into()),
1949                    vec![],
1950                );
1951            }
1952
1953            // ── required fields: scope + owner ───────────────────────────────
1954            for field in ["scope", "owner"] {
1955                let present = map
1956                    .get(field)
1957                    .and_then(scalar_string)
1958                    .map(|s| !s.trim().is_empty())
1959                    .unwrap_or(false);
1960                if !present {
1961                    push(
1962                        issues,
1963                        Severity::Error,
1964                        codes::DB_MD_MISSING_FIELD,
1965                        rel,
1966                        // A present-but-empty field anchors to its line; a fully
1967                        // absent one to the block top.
1968                        fm_key_line_or_top(&fm_yaml, field),
1969                        Some(field.into()),
1970                        format!("DB.md frontmatter is missing required field `{field}`"),
1971                        Some(format!("add `{field}:` to the DB.md frontmatter")),
1972                        vec![],
1973                    );
1974                }
1975            }
1976        }
1977        None => {
1978            // Unparseable frontmatter: the identity contract is unprovable. Emit
1979            // the type error and both field errors, anchored to the block top.
1980            push(
1981                issues,
1982                Severity::Error,
1983                codes::DB_MD_BAD_TYPE,
1984                rel,
1985                Some(1),
1986                Some("type".into()),
1987                "DB.md frontmatter isn't valid YAML; it must declare `type: db-md`".into(),
1988                Some("fix the DB.md frontmatter and set `type: db-md`".into()),
1989                vec![],
1990            );
1991            for field in ["scope", "owner"] {
1992                push(
1993                    issues,
1994                    Severity::Error,
1995                    codes::DB_MD_MISSING_FIELD,
1996                    rel,
1997                    Some(1),
1998                    Some(field.into()),
1999                    format!("DB.md frontmatter is missing required field `{field}`"),
2000                    Some(format!("add `{field}:` to the DB.md frontmatter")),
2001                    vec![],
2002                );
2003            }
2004        }
2005    }
2006
2007    // ── recognized `##` section headers only ─────────────────────────────────
2008    // The body's H2 headings must be one of the three the toolkit reads; any
2009    // other is a likely typo / misplacement (warning — the parser ignores it,
2010    // so the config is not corrupted, but the operator wrote a section that will
2011    // never be read). H3 sub-headings (Frozen pages, Ignored types, `### <type>`
2012    // schema blocks) live under their H2 and are not flagged here.
2013    for section in crate::parser::extract_sections(&body) {
2014        if section.level != 2 {
2015            continue;
2016        }
2017        let name = section.heading.trim().to_ascii_lowercase();
2018        if matches!(name.as_str(), "agent instructions" | "policies" | "schemas") {
2019            continue;
2020        }
2021        // `Section::line` is 1-based within the body; the body begins at file
2022        // line `fm_end_line + 1`.
2023        let file_line = fm_end_line + section.line;
2024        push(
2025            issues,
2026            Severity::Warning,
2027            codes::DB_MD_UNKNOWN_SECTION,
2028            rel,
2029            Some(file_line),
2030            None,
2031            format!(
2032                "DB.md has an unrecognized `## {}` section",
2033                section.heading.trim()
2034            ),
2035            Some(
2036                "DB.md sections are `## Agent instructions`, `## Policies`, `## Schemas` — \
2037                 remove or rename this heading"
2038                    .into(),
2039            ),
2040            vec![],
2041        );
2042    }
2043}
2044
2045/// The `NOT_A_STORE` issue for a root with no `DB.md`.
2046fn not_a_store_issue(store: &Store) -> Issue {
2047    Issue {
2048        severity: Severity::Error,
2049        code: codes::NOT_A_STORE,
2050        file: store.root.clone(),
2051        line: None,
2052        key: None,
2053        message: format!("{} has no DB.md; not a db.md store", store.root.display()),
2054        suggestion: Some("create a `DB.md` at the store root".into()),
2055        related: vec![],
2056    }
2057}
2058
2059/// True if a store-relative path is a content file: under `sources/`,
2060/// `records/`, or `wiki/` and not an `index.md`/`index.jsonl`/`log.md`.
2061fn is_content_file(rel: &Path) -> bool {
2062    let Some(first) = rel.iter().next().and_then(|s| s.to_str()) else {
2063        return false;
2064    };
2065    if !matches!(first, "sources" | "records" | "wiki") {
2066        return false;
2067    }
2068    let name = rel.file_name().and_then(|s| s.to_str()).unwrap_or("");
2069    if matches!(name, "index.md" | "index.jsonl" | "log.md") {
2070        return false;
2071    }
2072    name.ends_with(".md")
2073}
2074
2075/// Split a file into `(frontmatter_yaml, body, closing_fence_line)`. The block
2076/// must start at the very first line with `---` and end at the next `---`.
2077/// Returns `None` if there's no leading frontmatter block.
2078fn split_frontmatter(text: &str) -> Option<(String, String, u32)> {
2079    let mut lines = text.lines();
2080    let first = lines.next()?;
2081    if first.trim_end() != "---" {
2082        return None;
2083    }
2084    let mut yaml = String::new();
2085    let mut close_line: Option<u32> = None;
2086    // line 1 is the opening fence; YAML starts at line 2.
2087    let mut current = 1u32;
2088    for line in lines {
2089        current += 1;
2090        if line.trim_end() == "---" {
2091            close_line = Some(current);
2092            break;
2093        }
2094        yaml.push_str(line);
2095        yaml.push('\n');
2096    }
2097    let close_line = close_line?;
2098    // Body = everything after the closing fence.
2099    let body: String = text
2100        .lines()
2101        .skip(close_line as usize)
2102        .collect::<Vec<_>>()
2103        .join("\n");
2104    Some((yaml, body, close_line))
2105}
2106
2107/// Read just the `summary` field of a file, or `None` if absent/unparseable.
2108fn read_summary(abs: &Path) -> Option<String> {
2109    let text = std::fs::read_to_string(abs).ok()?;
2110    let (yaml, _, _) = split_frontmatter(&text)?;
2111    let value: Value = serde_norway::from_str(&yaml).ok()?;
2112    if let Value::Mapping(m) = value {
2113        m.get(Value::String("summary".into()))
2114            .and_then(scalar_string)
2115    } else {
2116        None
2117    }
2118}
2119
2120/// Convert a `serde_norway` mapping into a string-keyed [`BTreeMap`], dropping
2121/// non-string keys (frontmatter keys are always strings).
2122fn yaml_map_to_btree(map: &serde_norway::Mapping) -> BTreeMap<String, Value> {
2123    let mut out = BTreeMap::new();
2124    for (k, v) in map {
2125        if let Value::String(s) = k {
2126            out.insert(s.clone(), v.clone());
2127        }
2128    }
2129    out
2130}
2131
2132/// A scalar YAML value as a string (`String`/`Number`/`Bool`); `None` for
2133/// sequences/mappings/null.
2134fn scalar_string(v: &Value) -> Option<String> {
2135    match v {
2136        Value::String(s) => Some(s.clone()),
2137        Value::Number(n) => Some(n.to_string()),
2138        Value::Bool(b) => Some(b.to_string()),
2139        _ => None,
2140    }
2141}
2142
2143/// True if `tags` is a flat YAML sequence of scalars. A mapping, a scalar, or a
2144/// sequence containing a nested sequence/mapping → false (`TAGS_MALFORMED`).
2145fn is_flat_scalar_list(v: &Value) -> bool {
2146    match v {
2147        Value::Sequence(items) => items.iter().all(|it| scalar_string(it).is_some()),
2148        _ => false,
2149    }
2150}
2151
2152/// Extract every frontmatter wiki-link, returning `(key, Link)` pairs with the
2153/// link's 1-based file line. **Text-based, by necessity:** an unquoted
2154/// `company: [[records/companies/x]]` parses in YAML as a nested *sequence*, not
2155/// a string (because `[[x]]` is YAML flow-list-in-a-list); a quoted
2156/// `"[[...]]"` parses as a string. Scanning the raw frontmatter text catches
2157/// both forms uniformly, the way the link textually appears — the doctrine view.
2158///
2159/// `fm_start_line` is the file line of the first YAML line (file line 2, since
2160/// line 1 is the opening `---`), so the returned `Link::line` is absolute.
2161fn frontmatter_link_fields_text(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, Link)> {
2162    let mut out = Vec::new();
2163    for (key, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2164        for link in links {
2165            out.push((key.clone(), link));
2166        }
2167    }
2168    out
2169}
2170
2171/// The wiki-link targets declared under a single top-level frontmatter key
2172/// (text-based; handles quoted + unquoted forms). Empty if the key is absent or
2173/// carries no `[[...]]`.
2174fn frontmatter_links_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Vec<Link> {
2175    for (k, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2176        if k == key {
2177            return links;
2178        }
2179    }
2180    Vec::new()
2181}
2182
2183/// The raw value text under a single top-level frontmatter key (the remainder of
2184/// the key line plus any indented continuation/sequence lines), trimmed. Used to
2185/// decide whether a `link to` field holds a plain string vs. a wiki-link.
2186fn frontmatter_raw_value_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Option<String> {
2187    for (k, value_text, _links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2188        if k == key {
2189            return Some(value_text);
2190        }
2191    }
2192    None
2193}
2194
2195/// Split a frontmatter YAML block into `(key, raw_value_text, wiki_links)` for
2196/// each top-level key. A top-level key is a line with no leading indentation in
2197/// `name:` form; its value spans the rest of that line plus any deeper-indented
2198/// continuation lines (block scalars, block sequences) until the next top-level
2199/// key. Wiki-links are every `[[...]]` found anywhere in that span, with their
2200/// absolute file line.
2201fn frontmatter_key_blocks(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, String, Vec<Link>)> {
2202    let mut blocks: Vec<(String, String, Vec<Link>)> = Vec::new();
2203    let mut current: Option<(String, String, Vec<Link>)> = None;
2204
2205    for (idx, raw_line) in fm_yaml.lines().enumerate() {
2206        let file_line = fm_start_line + idx as u32;
2207        let indented = raw_line.starts_with(' ') || raw_line.starts_with('\t');
2208        let trimmed = raw_line.trim();
2209
2210        // A new top-level key: no indentation, `name:` prefix, not a list dash or
2211        // comment. (Indented or dash lines belong to the current key's value.)
2212        let new_key = if !indented && !trimmed.starts_with('#') && !trimmed.starts_with('-') {
2213            top_level_key(raw_line)
2214        } else {
2215            None
2216        };
2217
2218        if let Some((key, after)) = new_key {
2219            if let Some(done) = current.take() {
2220                blocks.push(done);
2221            }
2222            let mut links = Vec::new();
2223            collect_line_links(after, file_line, &mut links);
2224            current = Some((key, after.trim().to_string(), links));
2225        } else if let Some((_k, value_text, links)) = current.as_mut() {
2226            // Continuation of the current key's value (indented or dash line).
2227            if !value_text.is_empty() {
2228                value_text.push('\n');
2229            }
2230            value_text.push_str(trimmed);
2231            collect_line_links(raw_line, file_line, links);
2232        }
2233    }
2234    if let Some(done) = current.take() {
2235        blocks.push(done);
2236    }
2237    blocks
2238}
2239
2240/// Parse a top-level frontmatter key line into `(key, value_after_colon)`.
2241/// `None` if the line isn't a `name:` mapping entry.
2242fn top_level_key(line: &str) -> Option<(String, &str)> {
2243    let (key, rest) = line.split_once(':')?;
2244    let key = key.trim();
2245    if key.is_empty()
2246        || !key
2247            .chars()
2248            .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
2249    {
2250        return None;
2251    }
2252    Some((key.to_string(), rest))
2253}
2254
2255/// Append every `[[target]]` / `[[target|display]]` found in `s` to `links`,
2256/// each tagged with `file_line`.
2257fn collect_line_links(s: &str, file_line: u32, links: &mut Vec<Link>) {
2258    let bytes = s.as_bytes();
2259    let mut i = 0;
2260    while i + 1 < bytes.len() {
2261        if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2262            if let Some(close) = s[i + 2..].find("]]") {
2263                let inner = &s[i + 2..i + 2 + close];
2264                // Guard against `[[[` (nested) double-counting: the inner must
2265                // not itself open another `[[`.
2266                let target = inner
2267                    .trim_start_matches('[')
2268                    .split('|')
2269                    .next()
2270                    .unwrap_or(inner)
2271                    .trim()
2272                    .to_string();
2273                if !target.is_empty() {
2274                    links.push(Link {
2275                        target,
2276                        line: file_line,
2277                    });
2278                }
2279                i = i + 2 + close + 2;
2280                continue;
2281            }
2282        }
2283        i += 1;
2284    }
2285}
2286
2287/// Extract every `[[...]]` wiki-link from a body, with 1-based line numbers.
2288/// Skips fenced code blocks (```), so example links in docs don't trip the
2289/// validator.
2290fn extract_wiki_links(body: &str) -> Vec<Link> {
2291    let mut out = Vec::new();
2292    let mut in_fence = false;
2293    for (idx, line) in body.lines().enumerate() {
2294        let trimmed = line.trim_start();
2295        if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
2296            in_fence = !in_fence;
2297            continue;
2298        }
2299        if in_fence {
2300            continue;
2301        }
2302        let line_no = (idx + 1) as u32;
2303        let bytes = line.as_bytes();
2304        let mut i = 0;
2305        while i + 1 < bytes.len() {
2306            if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2307                if let Some(close) = line[i + 2..].find("]]") {
2308                    let inner = &line[i + 2..i + 2 + close];
2309                    let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
2310                    // Skip a triple-bracket `[[[…` opening: the inner content
2311                    // starts with `[`, so this is the rejected flow-form list
2312                    // mis-encoding (`[[[a]], [[b]]]`), not a real wiki-link. A
2313                    // legitimate target never starts with `[`. The frontmatter
2314                    // `WIKI_LINK_FLOW_FORM_LIST` check already owns that error;
2315                    // extracting a bogus body link here would double-report it as
2316                    // a spurious `WIKI_LINK_SHORT_FORM`.
2317                    if !target.is_empty() && !target.starts_with('[') {
2318                        out.push(Link {
2319                            target,
2320                            line: line_no,
2321                        });
2322                    }
2323                    i = i + 2 + close + 2;
2324                    continue;
2325                }
2326            }
2327            i += 1;
2328        }
2329    }
2330    out
2331}
2332
2333/// Detect the frontmatter wiki-link-list mis-encoding: a YAML flow-sequence
2334/// whose items are themselves sequences (`attendees: [[[a]], [[b]]]`). Returns
2335/// the offending keys. The canonical block-sequence form is not flagged.
2336fn detect_flow_form_link_lists(fm_yaml: &str) -> Vec<String> {
2337    let mut out = Vec::new();
2338    for line in fm_yaml.lines() {
2339        let Some((key, rest)) = line.split_once(':') else {
2340            continue;
2341        };
2342        let key = key.trim();
2343        if key.is_empty() || key.starts_with('#') || key.starts_with('-') {
2344            continue;
2345        }
2346        let rest = rest.trim();
2347        // Flow sequence whose first element is itself a `[` (i.e. `[[[`) — a
2348        // nested flow list, which is the wiki-link-list mis-encoding.
2349        if rest.starts_with("[[[") {
2350            out.push(key.to_string());
2351        }
2352    }
2353    out
2354}
2355
2356/// True if a bare target (no `.md`) is a full store-relative path: it contains a
2357/// `/` and its first segment is a known layer.
2358fn is_full_store_path(bare: &str) -> bool {
2359    let mut parts = bare.splitn(2, '/');
2360    let first = parts.next().unwrap_or("");
2361    let has_rest = parts.next().map(|r| !r.is_empty()).unwrap_or(false);
2362    matches!(first, "sources" | "records" | "wiki") && has_rest
2363}
2364
2365/// True if a path contains only normal relative components. Validator inputs
2366/// come from user-authored markdown/JSON sidecars; never let absolute paths,
2367/// platform prefixes, or `..` turn a validation probe into a filesystem escape.
2368fn is_safe_store_relative_path(path: &Path) -> bool {
2369    let mut saw_component = false;
2370    for component in path.components() {
2371        match component {
2372            Component::Normal(_) => saw_component = true,
2373            Component::CurDir => {}
2374            Component::ParentDir | Component::RootDir | Component::Prefix(_) => return false,
2375        }
2376    }
2377    saw_component
2378}
2379
2380fn safe_md_target_rel(bare: &str) -> Option<PathBuf> {
2381    let path = Path::new(bare);
2382    if !is_safe_store_relative_path(path) {
2383        return None;
2384    }
2385    Some(PathBuf::from(format!("{bare}.md")))
2386}
2387
2388/// True if a bare target path is under `prefix` (both `.md`-stripped).
2389fn path_under_prefix(bare: &str, prefix: &str) -> bool {
2390    let prefix = prefix.trim_end_matches('/');
2391    bare == prefix || bare.starts_with(&format!("{prefix}/"))
2392}
2393
2394/// The type-folder for a store-relative content path: `<layer>/<type-folder>`
2395/// (the folder directly under the layer; date-shards roll up to it). `None` for
2396/// files directly in a layer folder or outside the three layers.
2397fn type_folder_of(rel: &Path) -> Option<PathBuf> {
2398    let comps: Vec<&str> = rel.iter().filter_map(|s| s.to_str()).collect();
2399    if comps.len() < 3 {
2400        return None; // need layer/type-folder/file at minimum
2401    }
2402    if !matches!(comps[0], "sources" | "records" | "wiki") {
2403        return None;
2404    }
2405    Some(PathBuf::from(comps[0]).join(comps[1]))
2406}
2407
2408/// **SWEEP.** Walk every `.md` content file under `sources/`/`records/`/`wiki/`,
2409/// returning store-relative paths to be parsed in full. Skips hidden dirs,
2410/// `log/`, and the index twin (`index.jsonl`). Used only by `validate_all`; the
2411/// working-set incoming-linker scan rides the embedded-ripgrep
2412/// `Store::find_links_to_any` (a single presence-only pass), so the loop default
2413/// never walks-and-*parses* the whole content tree.
2414fn walk_content_files(root: &Path) -> Vec<PathBuf> {
2415    let mut out = Vec::new();
2416    for layer in ["sources", "records", "wiki"] {
2417        let base = root.join(layer);
2418        if !base.is_dir() {
2419            continue;
2420        }
2421        for entry in walkdir::WalkDir::new(&base)
2422            .into_iter()
2423            .filter_entry(|e| {
2424                let name = e.file_name().to_str().unwrap_or("");
2425                !name.starts_with('.') && name != "log"
2426            })
2427            .flatten()
2428        {
2429            if !entry.file_type().is_file() {
2430                continue;
2431            }
2432            let name = entry.file_name().to_str().unwrap_or("");
2433            if name.ends_with(".md") && name != "index.md" {
2434                if let Ok(rel) = entry.path().strip_prefix(root) {
2435                    out.push(rel.to_path_buf());
2436                }
2437            }
2438        }
2439    }
2440    out.sort();
2441    out
2442}
2443
2444/// Every `index.md` under the store (root + layers + type-folders), as
2445/// store-relative paths. Used to detect orphan indexes.
2446fn walk_index_files(root: &Path) -> Vec<PathBuf> {
2447    let mut out = Vec::new();
2448    if root.join("index.md").is_file() {
2449        out.push(PathBuf::from("index.md"));
2450    }
2451    for layer in ["sources", "records", "wiki"] {
2452        let base = root.join(layer);
2453        if !base.is_dir() {
2454            continue;
2455        }
2456        for entry in walkdir::WalkDir::new(&base)
2457            .into_iter()
2458            .filter_entry(|e| {
2459                let name = e.file_name().to_str().unwrap_or("");
2460                !name.starts_with('.') && name != "log"
2461            })
2462            .flatten()
2463        {
2464            if entry.file_type().is_file() && entry.file_name().to_str() == Some("index.md") {
2465                if let Ok(rel) = entry.path().strip_prefix(root) {
2466                    out.push(rel.to_path_buf());
2467                }
2468            }
2469        }
2470    }
2471    out.sort();
2472    out
2473}
2474
2475/// A parsed `index.md` entry line: the wiki-link target, the optional summary
2476/// text after the `—`, and the 1-based line number.
2477struct IndexEntry {
2478    target: String,
2479    summary_text: Option<String>,
2480    line: u32,
2481}
2482
2483/// Parse the `- [[<path>]] — <summary>` entry lines of an `index.md`. Stops at a
2484/// `## More` footer (those lines aren't file entries). Root/layer entries with a
2485/// `|display` segment and a `(N)` count are parsed too — the target is the bare
2486/// path, the summary text is whatever follows the em dash.
2487fn parse_index_entries(text: &str) -> Vec<IndexEntry> {
2488    let mut out = Vec::new();
2489    let mut in_more = false;
2490    for (idx, line) in text.lines().enumerate() {
2491        let trimmed = line.trim_start();
2492        if trimmed.starts_with("## More") {
2493            in_more = true;
2494            continue;
2495        }
2496        if in_more {
2497            continue;
2498        }
2499        if !trimmed.starts_with("- ") {
2500            continue;
2501        }
2502        // Find the first `[[...]]`.
2503        let Some(open) = trimmed.find("[[") else {
2504            continue;
2505        };
2506        let Some(close_rel) = trimmed[open + 2..].find("]]") else {
2507            continue;
2508        };
2509        let inner = &trimmed[open + 2..open + 2 + close_rel];
2510        let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
2511
2512        // Summary text: whatever follows the first em dash (`—`) or ` - `.
2513        let after = &trimmed[open + 2 + close_rel + 2..];
2514        let summary_text = extract_index_entry_summary(after);
2515
2516        out.push(IndexEntry {
2517            target,
2518            summary_text,
2519            line: (idx + 1) as u32,
2520        });
2521    }
2522    out
2523}
2524
2525/// Pull the summary portion out of the text trailing an index entry's
2526/// wiki-link: drop a leading `(N files)` count, then the `—`/`-` separator, then
2527/// strip a trailing `· #tag` suffix.
2528fn extract_index_entry_summary(after: &str) -> Option<String> {
2529    let mut s = after.trim();
2530    // Drop a leading "(N ...)" count segment, if present.
2531    if s.starts_with('(') {
2532        if let Some(close) = s.find(')') {
2533            s = s[close + 1..].trim_start();
2534        }
2535    }
2536    // Require an em dash or hyphen separator before the summary.
2537    let s = if let Some(rest) = s.strip_prefix('—') {
2538        rest.trim()
2539    } else if let Some(rest) = s.strip_prefix('-') {
2540        rest.trim()
2541    } else {
2542        return None;
2543    };
2544    if s.is_empty() {
2545        return None;
2546    }
2547    // Strip a trailing `  ·  #tag #tag` suffix.
2548    let s = match s.split_once(" · ") {
2549        Some((summary, _tags)) => summary.trim(),
2550        None => s,
2551    };
2552    Some(s.to_string())
2553}
2554
2555/// Parse a `log.md` entry header `## [YYYY-MM-DD HH:MM] <kind> | <object>`.
2556/// Returns `(timestamp, kind, object)`; `None` if the timestamp is unparseable
2557/// or the header isn't well-formed.
2558fn parse_log_header(line: &str) -> Option<(DateTime<FixedOffset>, String, Option<String>)> {
2559    let rest = line.strip_prefix("## [")?;
2560    let close = rest.find(']')?;
2561    let ts_str = &rest[..close];
2562    let tail = rest[close + 1..].trim();
2563
2564    // Parse `YYYY-MM-DD HH:MM` (the SPEC header form) as a naive local time and
2565    // attach a zero offset — the log header carries minute precision, no zone.
2566    let naive = NaiveDateTime::parse_from_str(ts_str.trim(), "%Y-%m-%d %H:%M").ok()?;
2567    let offset = FixedOffset::east_opt(0)?;
2568    let ts = naive.and_local_timezone(offset).single()?;
2569
2570    // kind | object
2571    let (kind, object) = match tail.split_once('|') {
2572        Some((k, o)) => {
2573            let o = o.trim();
2574            (
2575                k.trim().to_string(),
2576                if o.is_empty() {
2577                    None
2578                } else {
2579                    Some(o.to_string())
2580                },
2581            )
2582        }
2583        None => (tail.to_string(), None),
2584    };
2585    if kind.is_empty() {
2586        return None;
2587    }
2588    Some((ts, kind, object))
2589}
2590
2591/// The timestamp of the most recent `validate` entry across `log.md` (active)
2592/// — the default working-set cutoff. Reads only headers; never the whole store.
2593fn last_validate_at(store: &Store) -> Option<DateTime<FixedOffset>> {
2594    let text = std::fs::read_to_string(store.root.join("log.md")).ok()?;
2595    let mut latest: Option<DateTime<FixedOffset>> = None;
2596    for line in text.lines() {
2597        if !line.starts_with("## [") {
2598            continue;
2599        }
2600        if let Some((ts, kind, _)) = parse_log_header(line) {
2601            if kind == "validate" {
2602                latest = Some(match latest {
2603                    Some(p) if p >= ts => p,
2604                    _ => ts,
2605                });
2606            }
2607        }
2608    }
2609    latest
2610}
2611
2612/// The set of content objects changed since `cutoff`, read from `log.md`
2613/// entries whose kind mutates a file. When `cutoff` is `None`, every mutating
2614/// entry counts (no prior validate window). Returns store-relative `.md` paths.
2615fn changed_objects_since(
2616    store: &Store,
2617    cutoff: Option<DateTime<FixedOffset>>,
2618) -> BTreeSet<PathBuf> {
2619    let mut out = BTreeSet::new();
2620    let Ok(text) = std::fs::read_to_string(store.root.join("log.md")) else {
2621        return out;
2622    };
2623    for line in text.lines() {
2624        if !line.starts_with("## [") {
2625            continue;
2626        }
2627        let Some((ts, kind, object)) = parse_log_header(line) else {
2628            continue;
2629        };
2630        if let Some(c) = cutoff {
2631            if ts < c {
2632                continue;
2633            }
2634        }
2635        if !matches!(
2636            kind.as_str(),
2637            "create" | "update" | "ingest" | "rename" | "delete" | "link"
2638        ) {
2639            continue;
2640        }
2641        if let Some(obj) = object {
2642            // The object slot is a store-relative path (or a wiki-link target).
2643            let bare = obj
2644                .trim()
2645                .trim_start_matches("[[")
2646                .trim_end_matches("]]")
2647                .split('|')
2648                .next()
2649                .unwrap_or("")
2650                .trim()
2651                .trim_end_matches(".md")
2652                .to_string();
2653            if bare.is_empty() {
2654                continue;
2655            }
2656            out.insert(PathBuf::from(format!("{bare}.md")));
2657        }
2658    }
2659    out
2660}
2661
2662/// The result of the [`derived_from_ignored_type`] policy check: the
2663/// `derived_from` target that resolves to an ignored-type record, plus that
2664/// record's type. Carries exactly what both the validate finding and the
2665/// write-time warning need to render their message.
2666#[derive(Debug, Clone, PartialEq, Eq)]
2667pub struct DerivedFromIgnored {
2668    /// The `derived_from` wiki-link target as written (bare store-relative path,
2669    /// no `.md`).
2670    pub target: String,
2671    /// The resolved `type` of that target, which is present in
2672    /// `store.config.ignored_types`.
2673    pub target_type: String,
2674}
2675
2676/// **The single authoritative `### Ignored types` derivation check.** Decides
2677/// whether a `wiki-page` derives from an ignored-type record: the type must be
2678/// `wiki-page`, `### Ignored types` must be non-empty, and some `derived_from`
2679/// target must resolve to a record whose `type` is in `ignored_types`. Returns
2680/// the first such target (and its type), or `None`.
2681///
2682/// Both surfaces call this so the policy lives in exactly one place:
2683/// [`check_content_file`] (read side — `dbmd validate`) feeds it the
2684/// `derived_from` targets it scanned from the raw frontmatter, and the write
2685/// surface (`dbmd write`) feeds it the targets from the composed frontmatter.
2686/// The link *extraction* differs per surface (text-scan with line numbers vs.
2687/// the parsed `Frontmatter`); the *decision* — type gate, target-type
2688/// resolution, and `ignored_types` membership — does not.
2689pub fn derived_from_ignored_type<I, S>(
2690    store: &Store,
2691    type_: &str,
2692    derived_from_targets: I,
2693) -> Option<DerivedFromIgnored>
2694where
2695    I: IntoIterator<Item = S>,
2696    S: AsRef<str>,
2697{
2698    if type_ != "wiki-page" || store.config.ignored_types.is_empty() {
2699        return None;
2700    }
2701    for target in derived_from_targets {
2702        let target = target.as_ref();
2703        if let Some(target_type) = link_target_type(store, target) {
2704            if store.config.ignored_types.contains(&target_type) {
2705                return Some(DerivedFromIgnored {
2706                    target: target.to_string(),
2707                    target_type,
2708                });
2709            }
2710        }
2711    }
2712    None
2713}
2714
2715/// Resolve the `type` of a wiki-link target file (bare, no `.md`), or `None`.
2716fn link_target_type(store: &Store, target: &str) -> Option<String> {
2717    let bare = target.trim_end_matches(".md");
2718    let abs = store.root.join(safe_md_target_rel(bare)?);
2719    let text = std::fs::read_to_string(&abs).ok()?;
2720    let (yaml, _, _) = split_frontmatter(&text)?;
2721    let value: Value = serde_norway::from_str(&yaml).ok()?;
2722    if let Value::Mapping(m) = value {
2723        m.get(Value::String("type".into())).and_then(scalar_string)
2724    } else {
2725        None
2726    }
2727}
2728
2729// ── Shape validators ─────────────────────────────────────────────────────────
2730
2731/// True if a string is RFC3339 / ISO-8601 with a time + zone (the
2732/// `created`/`updated` contract: `2026-05-27T08:00:00-07:00`).
2733fn is_iso8601(s: &str) -> bool {
2734    DateTime::parse_from_rfc3339(s.trim()).is_ok()
2735}
2736
2737/// True if a string is an ISO-8601 *date* (`2026-05-27`) or a full RFC3339
2738/// datetime. Type-specific date fields (`expense.date`, `contact.last_touch`)
2739/// accept the date-only form per the SPEC's worked example.
2740fn is_iso8601_date_or_datetime(s: &str) -> bool {
2741    let s = s.trim();
2742    if DateTime::parse_from_rfc3339(s).is_ok() {
2743        return true;
2744    }
2745    chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_ok()
2746}
2747
2748/// True for `<local>@<domain>` with a non-empty local part and a dotted domain.
2749fn is_email(s: &str) -> bool {
2750    let s = s.trim();
2751    let Some((local, domain)) = s.split_once('@') else {
2752        return false;
2753    };
2754    !local.is_empty()
2755        && domain.contains('.')
2756        && !domain.starts_with('.')
2757        && !domain.ends_with('.')
2758        && !domain.contains(' ')
2759        && !local.contains(' ')
2760}
2761
2762/// True for a currency amount: an optional symbol or 3-letter ISO code, then a
2763/// plain decimal number with optional thousands separators and ≤ 2 decimals.
2764///
2765/// The numeric part is validated by hand (not `f64::parse`) so the non-numeric
2766/// floats `f64` accepts — `inf`, `-inf`, `NaN`, and `1e3`-style exponents — are
2767/// rejected, and the ≤ 2-decimal rule is actually enforced.
2768fn is_currency(s: &str) -> bool {
2769    let mut t = s.trim();
2770    // Strip a leading currency symbol …
2771    for sym in ["$", "€", "£", "¥"] {
2772        if let Some(rest) = t.strip_prefix(sym) {
2773            t = rest.trim_start();
2774            break;
2775        }
2776    }
2777    // … or a leading 3-letter ISO-4217-ish code (`USD 100`, `EUR 9.50`). The
2778    // code must be exactly three ASCII letters and separated from the number by
2779    // whitespace, so a bare `USD` with no amount still fails.
2780    if let Some((head, rest)) = t.split_once(char::is_whitespace) {
2781        if head.len() == 3 && head.chars().all(|c| c.is_ascii_alphabetic()) {
2782            t = rest.trim_start();
2783        }
2784    }
2785
2786    let cleaned: String = t.chars().filter(|c| *c != ',').collect();
2787    is_plain_amount(cleaned.trim())
2788}
2789
2790/// True for a bare decimal amount: optional sign, ≥ 1 digit, an optional
2791/// fractional part of 1–2 digits. No exponents, no `inf`/`NaN`, no empty string.
2792fn is_plain_amount(s: &str) -> bool {
2793    let digits = s.strip_prefix(['+', '-']).unwrap_or(s);
2794    let (int_part, frac_part) = match digits.split_once('.') {
2795        Some((i, f)) => (i, Some(f)),
2796        None => (digits, None),
2797    };
2798    if int_part.is_empty() || !int_part.bytes().all(|b| b.is_ascii_digit()) {
2799        return false;
2800    }
2801    match frac_part {
2802        None => true,
2803        Some(f) => (1..=2).contains(&f.len()) && f.bytes().all(|b| b.is_ascii_digit()),
2804    }
2805}
2806
2807/// True for an http(s) URL.
2808fn is_url(s: &str) -> bool {
2809    let s = s.trim();
2810    (s.starts_with("http://") || s.starts_with("https://")) && s.len() > "https://".len()
2811}
2812
2813/// A short, deterministic suggestion for a `SCHEMA_SHAPE_MISMATCH`.
2814fn shape_suggestion(shape: Shape) -> String {
2815    match shape {
2816        Shape::String => "use a scalar string".into(),
2817        Shape::Int => "use an integer".into(),
2818        Shape::Bool => "use `true` or `false`".into(),
2819        Shape::Date => "use an ISO-8601 date, e.g. 2026-05-27".into(),
2820        Shape::Email => "use a `<local>@<domain>` address".into(),
2821        Shape::Currency => "use a numeric amount, e.g. 1234.56".into(),
2822        Shape::Url => "use an http(s) URL".into(),
2823    }
2824}
2825
2826/// Suggest a full-path rewrite for a short-form wiki-link. Without the layer we
2827/// can't know the folder, so the suggestion is generic but actionable.
2828fn short_form_suggestion(bare: &str) -> Option<String> {
2829    Some(format!(
2830        "use a full store-relative path, e.g. [[records/contacts/{}]]",
2831        slugish(bare)
2832    ))
2833}
2834
2835/// A filesystem-ish leaf for a plain string (lowercase, spaces → hyphens).
2836fn slugish(s: &str) -> String {
2837    s.trim()
2838        .to_lowercase()
2839        .chars()
2840        .map(|c| if c.is_whitespace() { '-' } else { c })
2841        .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '/' || *c == '_')
2842        .collect()
2843}
2844
2845/// Push a fully-formed [`Issue`].
2846#[allow(clippy::too_many_arguments)]
2847fn push(
2848    issues: &mut Vec<Issue>,
2849    severity: Severity,
2850    code: &'static str,
2851    file: &Path,
2852    line: Option<u32>,
2853    key: Option<String>,
2854    message: String,
2855    suggestion: Option<String>,
2856    related: Vec<PathBuf>,
2857) {
2858    issues.push(Issue {
2859        severity,
2860        code,
2861        file: file.to_path_buf(),
2862        line,
2863        key,
2864        message,
2865        suggestion,
2866        related,
2867    });
2868}
2869
2870/// 1-based line of a top-level frontmatter key inside the YAML block, offset to
2871/// the file (the YAML starts at file line 2). `None` if not found.
2872fn fm_key_line(fm_yaml: &str, key: &str) -> Option<u32> {
2873    for (i, line) in fm_yaml.lines().enumerate() {
2874        let trimmed = line.trim_start();
2875        // A top-level key line: `key:` with no leading list dash.
2876        if let Some(rest) = trimmed.strip_prefix(key) {
2877            if rest.starts_with(':') && line.starts_with(key) {
2878                // +2: file line 1 is the opening `---`, YAML line 0 → file line 2.
2879                return Some((i as u32) + 2);
2880            }
2881        }
2882    }
2883    None
2884}
2885
2886/// The line a *field-absence* issue (a required key that is missing entirely)
2887/// anchors to: the key's line when present, else line `1` — the frontmatter
2888/// block's opening `---`. A missing key has no line of its own; anchoring it to
2889/// the block top gives the agent (and the `EXPECTED` golden) a stable, non-null
2890/// line to point at instead of an unhelpful `null`.
2891fn fm_key_line_or_top(fm_yaml: &str, key: &str) -> Option<u32> {
2892    fm_key_line(fm_yaml, key).or(Some(1))
2893}
2894
2895/// A stable sort order for issues: by file, then line, then code. Keeps `--json`
2896/// output deterministic across runs.
2897fn issue_order(a: &Issue, b: &Issue) -> std::cmp::Ordering {
2898    a.file
2899        .cmp(&b.file)
2900        .then(a.line.cmp(&b.line))
2901        .then(a.code.cmp(b.code))
2902        .then(a.key.cmp(&b.key))
2903}
2904
2905// ═════════════════════════════════════════════════════════════════════════════
2906//  Tests
2907// ═════════════════════════════════════════════════════════════════════════════
2908
2909#[cfg(test)]
2910mod tests {
2911    use super::*;
2912    use crate::parser::{Config, FieldSpec};
2913    use std::fs;
2914    use tempfile::TempDir;
2915
2916    /// A test store builder over a real tempdir. Every helper writes real files
2917    /// so the assertions exercise real behavior, not mocks.
2918    struct Fixture {
2919        dir: TempDir,
2920        config: Config,
2921    }
2922
2923    impl Fixture {
2924        /// A fresh store with a **valid** `DB.md` (the identity contract:
2925        /// `type: db-md` + `scope` + `owner`) and the three layer dirs. A valid
2926        /// DB.md keeps `check_db_md` silent so a "clean store" fixture is truly
2927        /// clean; tests that want a broken DB.md write their own via `write`.
2928        fn new() -> Self {
2929            let dir = TempDir::new().unwrap();
2930            fs::write(
2931                dir.path().join("DB.md"),
2932                "---\ntype: db-md\nscope: company\nowner: Test\n---\n",
2933            )
2934            .unwrap();
2935            for layer in ["sources", "records", "wiki"] {
2936                fs::create_dir_all(dir.path().join(layer)).unwrap();
2937            }
2938            Fixture {
2939                dir,
2940                config: Config::default(),
2941            }
2942        }
2943
2944        /// A store with no `DB.md` marker.
2945        fn bare() -> Self {
2946            let dir = TempDir::new().unwrap();
2947            Fixture {
2948                dir,
2949                config: Config::default(),
2950            }
2951        }
2952
2953        /// Write a file at a store-relative path, creating parent dirs.
2954        fn write(&self, rel: &str, contents: &str) {
2955            let abs = self.dir.path().join(rel);
2956            fs::create_dir_all(abs.parent().unwrap()).unwrap();
2957            fs::write(abs, contents).unwrap();
2958        }
2959
2960        fn store(&self) -> Store {
2961            Store {
2962                root: self.dir.path().to_path_buf(),
2963                config: self.config.clone(),
2964            }
2965        }
2966
2967        fn store_all(&self) -> Vec<Issue> {
2968            validate_all(&self.store()).unwrap()
2969        }
2970
2971        /// Write the canonical `index.md` + `index.jsonl` at every level via the
2972        /// real builder ([`crate::index::Index::rebuild_all`]) — the same
2973        /// projection a `dbmd index rebuild` produces. Use this (rather than a
2974        /// hand-typed sidecar line) whenever a test asserts a *clean* store, so
2975        /// the sidecar carries the COMPLETE per-field projection and the fixture
2976        /// can't silently drift from what the index writer emits.
2977        fn rebuild_indexes(&self) {
2978            crate::index::Index::rebuild_all(&self.store()).unwrap();
2979        }
2980    }
2981
2982    /// True if any issue has this code.
2983    fn has(issues: &[Issue], code: &str) -> bool {
2984        issues.iter().any(|i| i.code == code)
2985    }
2986
2987    /// Count issues with a code.
2988    fn count(issues: &[Issue], code: &str) -> usize {
2989        issues.iter().filter(|i| i.code == code).count()
2990    }
2991
2992    /// The first issue with a code, or panic.
2993    fn find<'a>(issues: &'a [Issue], code: &str) -> &'a Issue {
2994        issues
2995            .iter()
2996            .find(|i| i.code == code)
2997            .unwrap_or_else(|| panic!("expected an issue with code {code}; got {issues:#?}"))
2998    }
2999
3000    /// A minimal valid `contact` body for reuse.
3001    fn valid_contact(summary: &str) -> String {
3002        format!(
3003            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{summary}\"\nname: A\n---\n\n# A\n"
3004        )
3005    }
3006
3007    // ── store marker ──────────────────────────────────────────────────────────
3008
3009    #[test]
3010    fn not_a_store_when_db_md_absent() {
3011        let fx = Fixture::bare();
3012        let issues = fx.store_all();
3013        assert_eq!(issues.len(), 1, "only NOT_A_STORE expected: {issues:#?}");
3014        assert_eq!(issues[0].code, codes::NOT_A_STORE);
3015        assert!(issues[0].is_error());
3016    }
3017
3018    #[test]
3019    fn working_set_also_reports_not_a_store() {
3020        let fx = Fixture::bare();
3021        let issues = validate_working_set(&fx.store(), None).unwrap();
3022        assert!(has(&issues, codes::NOT_A_STORE));
3023    }
3024
3025    #[test]
3026    fn clean_store_has_no_issues() {
3027        let fx = Fixture::new();
3028        fx.write("records/contacts/a.md", &valid_contact("A contact"));
3029        // Build the canonical indexes (complete per-field jsonl included) the
3030        // same way `dbmd index rebuild` does, so a freshly-rebuilt store is
3031        // proven clean across every projected field, not just summary/type.
3032        fx.rebuild_indexes();
3033        let issues = fx.store_all();
3034        assert!(
3035            issues.is_empty(),
3036            "expected a clean store, got: {issues:#?}"
3037        );
3038    }
3039
3040    // ── DB.md structure ───────────────────────────────────────────────────────
3041
3042    /// The `Fixture::new` DB.md is valid → no `DB_MD_*` issue. This pins the
3043    /// "valid identity file is silent" half (a bug that flagged a valid DB.md
3044    /// would fail here).
3045    #[test]
3046    fn valid_db_md_emits_no_structure_issue() {
3047        let fx = Fixture::new();
3048        let issues = fx.store_all();
3049        assert!(
3050            !has(&issues, codes::DB_MD_BAD_TYPE)
3051                && !has(&issues, codes::DB_MD_MISSING_FIELD)
3052                && !has(&issues, codes::DB_MD_UNKNOWN_SECTION),
3053            "a valid DB.md (type: db-md + scope + owner, recognized sections) is silent: {issues:#?}"
3054        );
3055    }
3056
3057    /// A DB.md whose `type:` isn't `db-md` → `DB_MD_BAD_TYPE`, keyed on `type`,
3058    /// anchored to the `type:` line (file line 2). Failing to read the type, or
3059    /// accepting a non-`db-md` type, breaks this.
3060    #[test]
3061    fn db_md_wrong_type_is_error() {
3062        let fx = Fixture::new();
3063        fx.write("DB.md", "---\ntype: notes\nscope: company\nowner: T\n---\n");
3064        let issues = fx.store_all();
3065        let i = find(&issues, codes::DB_MD_BAD_TYPE);
3066        assert!(i.is_error());
3067        assert_eq!(i.file, PathBuf::from("DB.md"));
3068        assert_eq!(i.key.as_deref(), Some("type"));
3069        assert_eq!(i.line, Some(2), "anchors to the `type:` line");
3070    }
3071
3072    /// A DB.md missing `scope` and `owner` → one `DB_MD_MISSING_FIELD` per
3073    /// absent field, each keyed on its field name, anchored to the block top.
3074    #[test]
3075    fn db_md_missing_scope_and_owner_each_report() {
3076        let fx = Fixture::new();
3077        fx.write("DB.md", "---\ntype: db-md\n---\n");
3078        let issues = fx.store_all();
3079        assert_eq!(
3080            count(&issues, codes::DB_MD_MISSING_FIELD),
3081            2,
3082            "both scope and owner absent → two issues: {issues:#?}"
3083        );
3084        let keys: BTreeSet<Option<String>> = issues
3085            .iter()
3086            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
3087            .map(|i| i.key.clone())
3088            .collect();
3089        assert_eq!(
3090            keys,
3091            BTreeSet::from([Some("scope".to_string()), Some("owner".to_string())]),
3092            "one issue keyed on each missing field"
3093        );
3094        for i in issues
3095            .iter()
3096            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
3097        {
3098            assert!(i.is_error());
3099            assert_eq!(i.line, Some(1), "absent field anchors to the block top");
3100        }
3101    }
3102
3103    /// A present-but-blank required field is still missing (`DB_MD_MISSING_FIELD`),
3104    /// anchored to its own line — guarding against an "is the key textually
3105    /// present?" shortcut that would miss `owner:` with an empty value.
3106    #[test]
3107    fn db_md_blank_required_field_is_missing() {
3108        let fx = Fixture::new();
3109        fx.write(
3110            "DB.md",
3111            "---\ntype: db-md\nscope: company\nowner: \"\"\n---\n",
3112        );
3113        let issues = fx.store_all();
3114        let i = find(&issues, codes::DB_MD_MISSING_FIELD);
3115        assert_eq!(i.key.as_deref(), Some("owner"));
3116        assert_eq!(
3117            i.line,
3118            Some(4),
3119            "a present-but-empty field anchors to its line"
3120        );
3121        assert!(
3122            count(&issues, codes::DB_MD_MISSING_FIELD) == 1,
3123            "scope is present and non-empty → only owner reported"
3124        );
3125    }
3126
3127    /// An unrecognized `##` section → `DB_MD_UNKNOWN_SECTION` (warning), anchored
3128    /// to the heading's file line; the three recognized sections stay silent.
3129    #[test]
3130    fn db_md_unknown_section_is_warning() {
3131        let fx = Fixture::new();
3132        fx.write(
3133            "DB.md",
3134            // line 1 `---`, 2 type, 3 scope, 4 owner, 5 `---`, 6 blank,
3135            // 7 `## Agent instructions`, 8 blank, 9 prose, 10 blank,
3136            // 11 `## Glossary`.
3137            "---\ntype: db-md\nscope: company\nowner: T\n---\n\n## Agent instructions\n\nbe good\n\n## Glossary\n\nterms\n",
3138        );
3139        let issues = fx.store_all();
3140        let i = find(&issues, codes::DB_MD_UNKNOWN_SECTION);
3141        assert!(!i.is_error(), "unknown section is a warning, not an error");
3142        assert_eq!(i.severity, Severity::Warning);
3143        assert_eq!(
3144            i.line,
3145            Some(11),
3146            "anchors to the `## Glossary` heading line"
3147        );
3148        assert!(
3149            i.message.contains("Glossary"),
3150            "the message names the offending section: {}",
3151            i.message
3152        );
3153        // The recognized `## Agent instructions` section did NOT fire.
3154        assert_eq!(
3155            count(&issues, codes::DB_MD_UNKNOWN_SECTION),
3156            1,
3157            "only the unrecognized section is flagged: {issues:#?}"
3158        );
3159    }
3160
3161    /// A DB.md with no frontmatter at all → `DB_MD_BAD_TYPE` plus both
3162    /// `DB_MD_MISSING_FIELD`s (no provable type, no provable fields).
3163    #[test]
3164    fn db_md_no_frontmatter_reports_type_and_both_fields() {
3165        let fx = Fixture::new();
3166        fx.write("DB.md", "# just a heading, no frontmatter\n");
3167        let issues = fx.store_all();
3168        assert!(has(&issues, codes::DB_MD_BAD_TYPE));
3169        assert_eq!(count(&issues, codes::DB_MD_MISSING_FIELD), 2);
3170    }
3171
3172    // ── frontmatter ─────────────────────────────────────────────────────────
3173
3174    #[test]
3175    fn missing_type_is_error() {
3176        let fx = Fixture::new();
3177        fx.write(
3178            "records/contacts/a.md",
3179            "---\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\n# A\n",
3180        );
3181        let issues = fx.store_all();
3182        assert!(has(&issues, codes::FM_MISSING_TYPE));
3183        assert!(find(&issues, codes::FM_MISSING_TYPE).is_error());
3184    }
3185
3186    #[test]
3187    fn missing_universal_timestamps_are_errors_on_content_files() {
3188        let fx = Fixture::new();
3189        fx.write(
3190            "records/contacts/a.md",
3191            "---\ntype: contact\nsummary: x\nname: A\n---\n\n# A\n",
3192        );
3193        let issues = fx.store_all();
3194
3195        let missing_created = find(&issues, codes::FM_MISSING_CREATED);
3196        assert_eq!(missing_created.key.as_deref(), Some("created"));
3197        assert!(missing_created.is_error());
3198
3199        let missing_updated = find(&issues, codes::FM_MISSING_UPDATED);
3200        assert_eq!(missing_updated.key.as_deref(), Some("updated"));
3201        assert!(missing_updated.is_error());
3202    }
3203
3204    #[test]
3205    fn meta_files_do_not_require_universal_timestamps() {
3206        let fx = Fixture::new();
3207        let issues = fx.store_all();
3208
3209        assert!(
3210            !has(&issues, codes::FM_MISSING_CREATED),
3211            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
3212        );
3213        assert!(
3214            !has(&issues, codes::FM_MISSING_UPDATED),
3215            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
3216        );
3217    }
3218
3219    #[test]
3220    fn content_file_with_no_frontmatter_block_reports_type_and_summary() {
3221        let fx = Fixture::new();
3222        fx.write(
3223            "wiki/people/a.md",
3224            "# Just a heading\n\nNo frontmatter here.\n",
3225        );
3226        let issues = fx.store_all();
3227        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
3228        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
3229    }
3230
3231    #[test]
3232    fn content_file_with_empty_frontmatter_reports_type_and_summary() {
3233        let fx = Fixture::new();
3234        fx.write("wiki/people/a.md", "---\n---\n\nbody\n");
3235        let issues = fx.store_all();
3236        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
3237        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
3238    }
3239
3240    #[test]
3241    fn malformed_yaml_is_error_and_suppresses_field_checks() {
3242        let fx = Fixture::new();
3243        // A tab inside a mapping value is invalid YAML.
3244        fx.write(
3245            "records/contacts/a.md",
3246            "---\ntype: contact\n  bad: : : :\n: : nope\n---\n\nbody\n",
3247        );
3248        let issues = fx.store_all();
3249        let issue = find(&issues, codes::FM_MALFORMED_YAML);
3250        assert!(issue.is_error());
3251        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
3252        // When YAML doesn't parse we don't *also* claim the summary is missing;
3253        // the agent fixes the YAML first.
3254        assert!(
3255            !has(&issues, codes::SUMMARY_MISSING),
3256            "malformed YAML should suppress SUMMARY_MISSING: {issues:#?}"
3257        );
3258    }
3259
3260    #[test]
3261    fn bad_created_timestamp_is_error() {
3262        let fx = Fixture::new();
3263        fx.write(
3264            "records/contacts/a.md",
3265            "---\ntype: contact\ncreated: not-a-date\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
3266        );
3267        let issues = fx.store_all();
3268        let issue = find(&issues, codes::FM_BAD_TIMESTAMP);
3269        assert_eq!(issue.key.as_deref(), Some("created"));
3270        assert!(issue.is_error());
3271    }
3272
3273    #[test]
3274    fn date_only_created_is_rejected_but_type_date_field_accepted() {
3275        let fx = Fixture::new();
3276        // `created` must be a full RFC3339 datetime → a date-only value is bad.
3277        // `last_touch` is a type-specific date field → date-only is fine.
3278        fx.write(
3279            "records/contacts/a.md",
3280            "---\ntype: contact\ncreated: 2026-05-22\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\nlast_touch: 2026-05-22\n---\n\n# A\n",
3281        );
3282        let issues = fx.store_all();
3283        let created_issues: Vec<_> = issues
3284            .iter()
3285            .filter(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created"))
3286            .collect();
3287        assert_eq!(
3288            created_issues.len(),
3289            1,
3290            "date-only `created` must fail: {issues:#?}"
3291        );
3292        assert!(
3293            !issues.iter().any(
3294                |i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("last_touch")
3295            ),
3296            "date-only `last_touch` is valid: {issues:#?}"
3297        );
3298    }
3299
3300    // ── summary ─────────────────────────────────────────────────────────────
3301
3302    #[test]
3303    fn summary_missing_empty_multiline_toolong() {
3304        let fx = Fixture::new();
3305        fx.write(
3306            "wiki/people/missing.md",
3307            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\n---\n\nbody\n",
3308        );
3309        fx.write(
3310            "wiki/people/empty.md",
3311            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"   \"\n---\n\nbody\n",
3312        );
3313        let long = "x".repeat(201);
3314        fx.write(
3315            "wiki/people/long.md",
3316            &format!("---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{long}\"\n---\n\nbody\n"),
3317        );
3318        let issues = fx.store_all();
3319        assert!(has(&issues, codes::SUMMARY_MISSING));
3320        assert_eq!(
3321            find(&issues, codes::SUMMARY_MISSING).file,
3322            PathBuf::from("wiki/people/missing.md")
3323        );
3324        assert!(has(&issues, codes::SUMMARY_EMPTY));
3325        assert!(has(&issues, codes::SUMMARY_TOO_LONG));
3326        assert_eq!(
3327            find(&issues, codes::SUMMARY_TOO_LONG).severity,
3328            Severity::Warning
3329        );
3330    }
3331
3332    #[test]
3333    fn summary_multiline_via_yaml_block_scalar() {
3334        let fx = Fixture::new();
3335        // A literal block scalar produces a value with a newline.
3336        fx.write(
3337            "wiki/people/a.md",
3338            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: |\n  line one\n  line two\n---\n\nbody\n",
3339        );
3340        let issues = fx.store_all();
3341        assert!(has(&issues, codes::SUMMARY_MULTILINE), "{issues:#?}");
3342    }
3343
3344    #[test]
3345    fn summary_exactly_200_chars_is_ok() {
3346        let fx = Fixture::new();
3347        let s = "y".repeat(200);
3348        fx.write(
3349            "wiki/people/a.md",
3350            &format!("---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{s}\"\n---\n\nbody\n"),
3351        );
3352        let issues = fx.store_all();
3353        assert!(
3354            !has(&issues, codes::SUMMARY_TOO_LONG),
3355            "200 is the bound, inclusive: {issues:#?}"
3356        );
3357    }
3358
3359    #[test]
3360    fn meta_files_need_no_summary() {
3361        let fx = Fixture::new();
3362        // The root/layer/type indexes + log carry no summary and must not be
3363        // flagged. (A lone DB.md store with one contact and full indexes.)
3364        fx.write("records/contacts/a.md", &valid_contact("A contact"));
3365        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n# I\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
3366        fx.write(
3367            "records/index.md",
3368            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
3369        );
3370        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — A contact\n");
3371        fx.write(
3372            "records/contacts/index.jsonl",
3373            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"A contact\"}\n",
3374        );
3375        fx.write("log.md", "---\ntype: log\n---\n\n# Log\n");
3376        let issues = fx.store_all();
3377        assert!(!has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
3378    }
3379
3380    // ── tags ────────────────────────────────────────────────────────────────
3381
3382    #[test]
3383    fn nested_tags_warns_flat_tags_ok() {
3384        let fx = Fixture::new();
3385        fx.write(
3386            "records/contacts/nested.md",
3387            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags:\n  - good\n  - [nested, list]\n---\n\n# A\n",
3388        );
3389        fx.write(
3390            "records/contacts/flat.md",
3391            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags: [customer, vip]\n---\n\n# A\n",
3392        );
3393        let issues = fx.store_all();
3394        let tag_issues: Vec<_> = issues
3395            .iter()
3396            .filter(|i| i.code == codes::TAGS_MALFORMED)
3397            .collect();
3398        assert_eq!(
3399            tag_issues.len(),
3400            1,
3401            "only the nested-tags file should warn: {issues:#?}"
3402        );
3403        assert_eq!(
3404            tag_issues[0].file,
3405            PathBuf::from("records/contacts/nested.md")
3406        );
3407        assert_eq!(tag_issues[0].severity, Severity::Warning);
3408    }
3409
3410    // ── wiki-links ────────────────────────────────────────────────────────────
3411
3412    #[test]
3413    fn short_form_wiki_link_is_error() {
3414        let fx = Fixture::new();
3415        let mut body = valid_contact("links to a short form");
3416        body.push_str("\nSee [[sarah-chen]] for details.\n");
3417        fx.write("wiki/people/a.md", &body);
3418        let issues = fx.store_all();
3419        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
3420        assert!(issue.is_error());
3421        assert!(issue.message.contains("sarah-chen"));
3422        // A short-form link must NOT also be reported broken — fix the form first.
3423        assert!(
3424            !issues
3425                .iter()
3426                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.message.contains("sarah-chen")),
3427            "short-form should suppress broken: {issues:#?}"
3428        );
3429    }
3430
3431    #[test]
3432    fn broken_full_path_wiki_link_is_error() {
3433        let fx = Fixture::new();
3434        let mut body = valid_contact("links to a missing file");
3435        body.push_str("\nSee [[records/contacts/ghost]].\n");
3436        fx.write("wiki/people/a.md", &body);
3437        let issues = fx.store_all();
3438        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
3439        assert!(issue.is_error());
3440        assert!(issue.message.contains("records/contacts/ghost"));
3441        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
3442    }
3443
3444    #[test]
3445    fn traversal_full_path_wiki_link_is_rejected_before_probe() {
3446        let fx = Fixture::new();
3447        let mut body = valid_contact("links with traversal");
3448        body.push_str("\nSee [[records/contacts/../../ghost]].\n");
3449        fx.write("wiki/people/a.md", &body);
3450        let issues = fx.store_all();
3451        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
3452        assert!(issue.message.contains("not a safe store-relative path"));
3453        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
3454    }
3455
3456    #[test]
3457    fn valid_full_path_wiki_link_passes() {
3458        let fx = Fixture::new();
3459        fx.write("records/contacts/target.md", &valid_contact("target"));
3460        let mut body = valid_contact("links to target");
3461        body.push_str("\nSee [[records/contacts/target]].\n");
3462        fx.write("wiki/people/a.md", &body);
3463        let issues = fx.store_all();
3464        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
3465        assert!(!has(&issues, codes::WIKI_LINK_SHORT_FORM), "{issues:#?}");
3466    }
3467
3468    #[test]
3469    fn md_extension_wiki_link_warns_and_resolves() {
3470        let fx = Fixture::new();
3471        fx.write("records/contacts/target.md", &valid_contact("target"));
3472        let mut body = valid_contact("links with extension");
3473        body.push_str("\nSee [[records/contacts/target.md]].\n");
3474        fx.write("wiki/people/a.md", &body);
3475        let issues = fx.store_all();
3476        let issue = find(&issues, codes::WIKI_LINK_HAS_EXTENSION);
3477        assert_eq!(issue.severity, Severity::Warning);
3478        assert_eq!(
3479            issue.suggestion.as_deref(),
3480            Some("drop the extension: [[records/contacts/target]]")
3481        );
3482        // The target exists once `.md` is stripped → not broken.
3483        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
3484    }
3485
3486    #[test]
3487    fn wiki_links_in_code_fences_are_ignored() {
3488        let fx = Fixture::new();
3489        let mut body = valid_contact("has a fenced example");
3490        body.push_str("\n```\n[[sarah-chen]]\n```\n");
3491        fx.write("wiki/people/a.md", &body);
3492        let issues = fx.store_all();
3493        assert!(
3494            !has(&issues, codes::WIKI_LINK_SHORT_FORM),
3495            "fenced wiki-links must be ignored: {issues:#?}"
3496        );
3497    }
3498
3499    #[test]
3500    fn flow_form_link_list_in_frontmatter_is_error() {
3501        let fx = Fixture::new();
3502        fx.write(
3503            "records/meetings/m.md",
3504            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees: [[[records/contacts/a]], [[records/contacts/b]]]\n---\n\n# M\n",
3505        );
3506        let issues = fx.store_all();
3507        let issue = find(&issues, codes::WIKI_LINK_FLOW_FORM_LIST);
3508        assert!(issue.is_error());
3509        assert_eq!(issue.key.as_deref(), Some("attendees"));
3510    }
3511
3512    #[test]
3513    fn block_form_link_list_in_frontmatter_is_not_flow_form() {
3514        let fx = Fixture::new();
3515        fx.write("records/contacts/a.md", &valid_contact("a"));
3516        fx.write("records/contacts/b.md", &valid_contact("b"));
3517        fx.write(
3518            "records/meetings/m.md",
3519            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees:\n  - [[records/contacts/a]]\n  - [[records/contacts/b]]\n---\n\n# M\n",
3520        );
3521        let issues = fx.store_all();
3522        assert!(
3523            !has(&issues, codes::WIKI_LINK_FLOW_FORM_LIST),
3524            "{issues:#?}"
3525        );
3526        // Block-form link targets are still integrity-checked (both exist here).
3527        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
3528    }
3529
3530    #[test]
3531    fn frontmatter_short_form_link_field_is_error() {
3532        let fx = Fixture::new();
3533        // `related` is a *custom* (non-schema) wiki-link field, so it goes
3534        // through the generic doctrine path → a short form is WIKI_LINK_SHORT_FORM.
3535        fx.write(
3536            "wiki/people/a.md",
3537            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: \"[[sarah-chen]]\"\n---\n\n# A\n",
3538        );
3539        let issues = fx.store_all();
3540        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
3541        assert!(issue.is_error());
3542        assert_eq!(issue.key.as_deref(), Some("related"));
3543    }
3544
3545    #[test]
3546    fn unquoted_frontmatter_link_is_recognized() {
3547        // An UNQUOTED `[[...]]` parses in YAML as a nested sequence, not a
3548        // string. The validator must still see it as a wiki-link (text-based
3549        // extraction). A short-form custom field must report SHORT_FORM, and a
3550        // full-path one with a missing target must report BROKEN.
3551        let fx = Fixture::new();
3552        fx.write(
3553            "wiki/people/short.md",
3554            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[sarah-chen]]\n---\n\n# A\n",
3555        );
3556        fx.write(
3557            "wiki/people/broken.md",
3558            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[records/contacts/ghost]]\n---\n\n# A\n",
3559        );
3560        let issues = fx.store_all();
3561        assert!(
3562            issues.iter().any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
3563                && i.file == *"wiki/people/short.md"
3564                && i.key.as_deref() == Some("related")),
3565            "unquoted short-form frontmatter link must be caught: {issues:#?}"
3566        );
3567        assert!(
3568            issues
3569                .iter()
3570                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.file == *"wiki/people/broken.md"),
3571            "unquoted full-path frontmatter link to a missing file must be caught: {issues:#?}"
3572        );
3573    }
3574
3575    #[test]
3576    fn short_form_in_declared_link_field_is_prefix_mismatch_not_double_reported() {
3577        // A short-form value in a *declared* link field (a `### contact` schema
3578        // with `company link to records/companies/`) is SCHEMA_LINK_PREFIX_MISMATCH
3579        // (the target isn't under the prefix), and must NOT also be reported as a
3580        // bare WIKI_LINK_SHORT_FORM — the schema path owns that field once.
3581        let mut fx = Fixture::new();
3582        fx.config.schemas.insert(
3583            "contact".into(),
3584            Schema {
3585                fields: vec![FieldSpec {
3586                    name: "company".into(),
3587                    link_prefix: Some(PathBuf::from("records/companies")),
3588                    ..Default::default()
3589                }],
3590                ..Default::default()
3591            },
3592        );
3593        fx.write(
3594            "records/contacts/a.md",
3595            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[northstar]]\"\n---\n\n# A\n",
3596        );
3597        let issues = fx.store_all();
3598        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
3599        assert_eq!(issue.key.as_deref(), Some("company"));
3600        // The same link must NOT also be double-reported via the generic path.
3601        assert!(
3602            !issues
3603                .iter()
3604                .any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
3605                    && i.key.as_deref() == Some("company")),
3606            "schema link fields are checked once, by the schema path: {issues:#?}"
3607        );
3608    }
3609
3610    #[test]
3611    fn schema_link_field_with_md_extension_still_warns() {
3612        let mut fx = Fixture::new();
3613        fx.config.schemas.insert(
3614            "contact".into(),
3615            Schema {
3616                fields: vec![FieldSpec {
3617                    name: "company".into(),
3618                    link_prefix: Some(PathBuf::from("records/companies")),
3619                    ..Default::default()
3620                }],
3621                ..Default::default()
3622            },
3623        );
3624        fx.write(
3625            "records/companies/acme.md",
3626            "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: Acme\nname: Acme\n---\n\n# Acme\n",
3627        );
3628        fx.write(
3629            "records/contacts/a.md",
3630            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[records/companies/acme.md]]\"\n---\n\n# A\n",
3631        );
3632        let issues = fx.store_all();
3633        let issue = issues
3634            .iter()
3635            .find(|i| {
3636                i.code == codes::WIKI_LINK_HAS_EXTENSION && i.key.as_deref() == Some("company")
3637            })
3638            .unwrap_or_else(|| panic!("schema link extension warning missing: {issues:#?}"));
3639        assert_eq!(issue.severity, Severity::Warning);
3640        assert!(
3641            !issues
3642                .iter()
3643                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.key.as_deref() == Some("company")),
3644            "extensionless existence check should still find acme.md: {issues:#?}"
3645        );
3646    }
3647
3648    // ── schema: explicit DB.md schema (required / shape / enum) ───────────────
3649
3650    #[test]
3651    fn explicit_schema_required_shape_enum() {
3652        let fx = {
3653            let mut fx = Fixture::new();
3654            // contact schema: name required, email required+email shape,
3655            // status enum: active|inactive
3656            let schema = Schema {
3657                fields: vec![
3658                    FieldSpec {
3659                        name: "name".into(),
3660                        required: true,
3661                        ..Default::default()
3662                    },
3663                    FieldSpec {
3664                        name: "email".into(),
3665                        required: true,
3666                        shape: Some(Shape::Email),
3667                        ..Default::default()
3668                    },
3669                    FieldSpec {
3670                        name: "status".into(),
3671                        enum_values: Some(vec!["active".into(), "inactive".into()]),
3672                        ..Default::default()
3673                    },
3674                ],
3675                ..Default::default()
3676            };
3677            fx.config.schemas.insert("contact".into(), schema);
3678            fx
3679        };
3680        fx.write(
3681            "records/contacts/a.md",
3682            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nemail: not-an-email\nstatus: archived\n---\n\n# A\n",
3683        );
3684        let issues = fx.store_all();
3685        // name absent → MISSING_REQUIRED
3686        assert!(
3687            issues
3688                .iter()
3689                .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
3690                    && i.key.as_deref() == Some("name")),
3691            "{issues:#?}"
3692        );
3693        // email malformed → SHAPE_MISMATCH
3694        assert!(
3695            issues.iter().any(
3696                |i| i.code == codes::SCHEMA_SHAPE_MISMATCH && i.key.as_deref() == Some("email")
3697            ),
3698            "{issues:#?}"
3699        );
3700        // status archived not in enum → ENUM_VIOLATION
3701        assert!(
3702            issues
3703                .iter()
3704                .any(|i| i.code == codes::SCHEMA_ENUM_VIOLATION
3705                    && i.key.as_deref() == Some("status")),
3706            "{issues:#?}"
3707        );
3708    }
3709
3710    #[test]
3711    fn schema_without_link_field_allows_plain_value() {
3712        // A `contact` schema with no `company` link field means a plain `company`
3713        // string is fine — schema enforcement is exactly what the store declares,
3714        // nothing implicit.
3715        let mut fx = Fixture::new();
3716        fx.config.schemas.insert(
3717            "contact".into(),
3718            Schema {
3719                fields: vec![FieldSpec {
3720                    name: "name".into(),
3721                    required: true,
3722                    ..Default::default()
3723                }],
3724                ..Default::default()
3725            },
3726        );
3727        fx.write(
3728            "records/contacts/a.md",
3729            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
3730        );
3731        let issues = fx.store_all();
3732        assert!(
3733            !has(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH),
3734            "no declared link field for `company` → a plain value is fine: {issues:#?}"
3735        );
3736    }
3737
3738    #[test]
3739    fn schema_link_field_plain_value_is_prefix_mismatch() {
3740        // The surviving link-enforcement path: a declared `link to <prefix>/`
3741        // field with a plain-string value is SCHEMA_LINK_PREFIX_MISMATCH.
3742        let mut fx = Fixture::new();
3743        fx.config.schemas.insert(
3744            "contact".into(),
3745            Schema {
3746                fields: vec![FieldSpec {
3747                    name: "company".into(),
3748                    link_prefix: Some(PathBuf::from("records/companies")),
3749                    ..Default::default()
3750                }],
3751                ..Default::default()
3752            },
3753        );
3754        fx.write(
3755            "records/contacts/a.md",
3756            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
3757        );
3758        let issues = fx.store_all();
3759        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
3760        assert_eq!(issue.key.as_deref(), Some("company"));
3761        assert!(issue
3762            .suggestion
3763            .as_deref()
3764            .unwrap()
3765            .contains("records/companies/"));
3766    }
3767
3768    #[test]
3769    fn schema_shape_int_and_url_and_currency() {
3770        let mut fx = Fixture::new();
3771        fx.config.schemas.insert(
3772            "widget".into(),
3773            Schema {
3774                fields: vec![
3775                    FieldSpec {
3776                        name: "qty".into(),
3777                        shape: Some(Shape::Int),
3778                        ..Default::default()
3779                    },
3780                    FieldSpec {
3781                        name: "site".into(),
3782                        shape: Some(Shape::Url),
3783                        ..Default::default()
3784                    },
3785                    FieldSpec {
3786                        name: "price".into(),
3787                        shape: Some(Shape::Currency),
3788                        ..Default::default()
3789                    },
3790                ],
3791                ..Default::default()
3792            },
3793        );
3794        // `USD 100` is the corpus-realistic shape (an `expense.currency`-style
3795        // ISO code + amount). It must pass — it used to spuriously fail.
3796        fx.write(
3797            "records/widgets/ok.md",
3798            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nqty: 5\nsite: https://example.com\nprice: \"USD 1,234.50\"\n---\n\n# ok\n",
3799        );
3800        // `free` is non-numeric; `inf`/`NaN`/3-decimal used to slip through
3801        // because the old impl leaned on `f64::parse`. `price: inf` here guards
3802        // the under-rejection half of the finding.
3803        fx.write(
3804            "records/widgets/bad.md",
3805            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nqty: five\nsite: ftp://nope\nprice: inf\n---\n\n# bad\n",
3806        );
3807        let issues = fx.store_all();
3808        let bad_shape: Vec<_> = issues
3809            .iter()
3810            .filter(|i| {
3811                i.code == codes::SCHEMA_SHAPE_MISMATCH && i.file == *"records/widgets/bad.md"
3812            })
3813            .map(|i| i.key.clone().unwrap_or_default())
3814            .collect();
3815        assert!(bad_shape.contains(&"qty".to_string()), "{issues:#?}");
3816        assert!(bad_shape.contains(&"site".to_string()), "{issues:#?}");
3817        assert!(
3818            bad_shape.contains(&"price".to_string()),
3819            "inf must be rejected as currency: {issues:#?}"
3820        );
3821        assert!(
3822            !issues
3823                .iter()
3824                .any(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH
3825                    && i.file == *"records/widgets/ok.md"),
3826            "valid shapes (incl. `USD 1,234.50`) must not fire: {issues:#?}"
3827        );
3828    }
3829
3830    #[test]
3831    fn is_currency_accepts_codes_and_rejects_non_numeric() {
3832        // Symbols and 3-letter ISO codes both strip; plain numbers pass.
3833        for ok in [
3834            "100",
3835            "1234.56",
3836            "$1,234.50",
3837            "USD 100", // the finding's headline probe — used to be false
3838            "usd 100", // case-insensitive code
3839            "EUR 9.50",
3840            "£12",
3841            "¥1000",
3842            "-5.00", // signed amounts are real (refunds)
3843            "+5",
3844            "1,000,000",
3845        ] {
3846            assert!(is_currency(ok), "expected currency: {ok:?}");
3847        }
3848        // Non-numeric floats `f64::parse` would accept, and the > 2-decimal /
3849        // bare-code / exponent cases the docstring forbids.
3850        for bad in [
3851            "inf", "-inf", "infinity", "NaN", "nan",    // f64 accepts these; we must not
3852            "12.999", // 3 decimals
3853            "1.2345", // 4 decimals
3854            "USD",    // bare code, no amount
3855            "$",      // bare symbol
3856            "free", "", " ", "1e3",      // exponent form
3857            "1.",       // trailing dot, no fractional digits
3858            ".5",       // leading dot, no integer digits
3859            "1 000",    // space as separator is not a thousands separator
3860            "USDD 100", // 4-letter "code" must not strip
3861        ] {
3862            assert!(!is_currency(bad), "expected NOT currency: {bad:?}");
3863        }
3864    }
3865
3866    // ── policies ───────────────────────────────────────────────────────────
3867
3868    #[test]
3869    fn ignored_type_present_is_info() {
3870        let mut fx = Fixture::new();
3871        fx.config.ignored_types.push("temp".into());
3872        fx.write(
3873            "records/temps/x.md",
3874            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
3875        );
3876        let issues = fx.store_all();
3877        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_PRESENT);
3878        assert_eq!(issue.severity, Severity::Info);
3879        assert!(!issue.is_error());
3880        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
3881    }
3882
3883    #[test]
3884    fn wiki_page_derived_from_ignored_type_warns() {
3885        let mut fx = Fixture::new();
3886        fx.config.ignored_types.push("temp".into());
3887        fx.write(
3888            "records/temps/x.md",
3889            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
3890        );
3891        fx.write(
3892            "wiki/themes/t.md",
3893            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: derived\nderived_from: \"[[records/temps/x]]\"\n---\n\n# t\n",
3894        );
3895        let issues = fx.store_all();
3896        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_DERIVED);
3897        assert_eq!(issue.severity, Severity::Warning);
3898        assert_eq!(issue.key.as_deref(), Some("derived_from"));
3899        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
3900    }
3901
3902    /// The shared `derived_from_ignored_type` entry point — the single
3903    /// policy-decision both `dbmd validate` (read) and `dbmd write` (write-time
3904    /// warning) now route through, so they cannot diverge. This pins its
3905    /// contract directly: the type gate, the empty-ignored-types gate, a
3906    /// positive match carrying the resolved target type, and a non-ignored
3907    /// target rejected.
3908    #[test]
3909    fn derived_from_ignored_type_is_the_shared_policy_decision() {
3910        let mut fx = Fixture::new();
3911        fx.config.ignored_types.push("secret".into());
3912        // An ignored-type record …
3913        fx.write(
3914            "records/secrets/s.md",
3915            "---\ntype: secret\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: hush\n---\n\n# s\n",
3916        );
3917        // … and a non-ignored record.
3918        fx.write(
3919            "records/contacts/c.md",
3920            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nname: C\n---\n\n# c\n",
3921        );
3922        let store = fx.store();
3923
3924        // Positive: a wiki-page deriving from the ignored-type record matches,
3925        // and the hit carries both the target (as written) and its resolved type.
3926        let hit =
3927            derived_from_ignored_type(&store, "wiki-page", std::iter::once("records/secrets/s"))
3928                .expect("wiki-page → ignored-type record must match");
3929        assert_eq!(hit.target, "records/secrets/s");
3930        assert_eq!(hit.target_type, "secret");
3931
3932        // Type gate: a non-`wiki-page` type never triggers, even with the same
3933        // ignored-type target.
3934        assert_eq!(
3935            derived_from_ignored_type(&store, "contact", std::iter::once("records/secrets/s")),
3936            None,
3937            "only wiki-page derivation is policed"
3938        );
3939
3940        // Target gate: a wiki-page deriving from a non-ignored record is fine.
3941        assert_eq!(
3942            derived_from_ignored_type(&store, "wiki-page", std::iter::once("records/contacts/c")),
3943            None,
3944            "deriving from a non-ignored type is allowed"
3945        );
3946
3947        // First match wins across multiple targets (here the second is the hit).
3948        let hit = derived_from_ignored_type(
3949            &store,
3950            "wiki-page",
3951            ["records/contacts/c", "records/secrets/s"],
3952        )
3953        .expect("a later ignored-type target must still be found");
3954        assert_eq!(hit.target, "records/secrets/s");
3955
3956        // Empty-policy gate: with no `### Ignored types`, nothing is policed.
3957        fx.config.ignored_types.clear();
3958        let store = fx.store();
3959        assert_eq!(
3960            derived_from_ignored_type(&store, "wiki-page", std::iter::once("records/secrets/s")),
3961            None,
3962            "an empty ignored-types policy short-circuits"
3963        );
3964    }
3965
3966    // ── duplicates ───────────────────────────────────────────────────────────
3967
3968    #[test]
3969    fn dup_id_is_hard_error_with_related() {
3970        let fx = Fixture::new();
3971        fx.write(
3972            "records/contacts/a.md",
3973            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
3974        );
3975        fx.write(
3976            "records/contacts/b.md",
3977            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
3978        );
3979        let issues = fx.store_all();
3980        // Reporting rule #1: ONE issue per collision group, keyed on the
3981        // lexicographically smallest path (`a.md`), partner in `related`.
3982        assert_eq!(
3983            count(&issues, codes::DUP_ID),
3984            1,
3985            "one issue per group: {issues:#?}"
3986        );
3987        let a = issues.iter().find(|i| i.code == codes::DUP_ID).unwrap();
3988        assert_eq!(a.file, PathBuf::from("records/contacts/a.md"));
3989        assert!(a.is_error());
3990        assert_eq!(a.key.as_deref(), Some("id"));
3991        assert_eq!(
3992            a.line,
3993            Some(3),
3994            "anchors to the `id` line on the reported file"
3995        );
3996        assert_eq!(a.related, vec![PathBuf::from("records/contacts/b.md")]);
3997    }
3998
3999    #[test]
4000    fn dup_id_not_fired_in_working_set() {
4001        // DUP_* is an --all-only cross-file check; the working set must not run it.
4002        let fx = Fixture::new();
4003        fx.write(
4004            "records/contacts/a.md",
4005            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
4006        );
4007        fx.write(
4008            "records/contacts/b.md",
4009            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
4010        );
4011        // Log says both changed since epoch, so they're in the working set.
4012        fx.write(
4013            "log.md",
4014            "---\ntype: log\n---\n\n## [2026-05-22 10:00] create | records/contacts/a\nx\n\n## [2026-05-22 10:01] create | records/contacts/b\nx\n",
4015        );
4016        let issues = validate_working_set(&fx.store(), None).unwrap();
4017        assert!(
4018            !has(&issues, codes::DUP_ID),
4019            "DUP_ID is --all only: {issues:#?}"
4020        );
4021    }
4022
4023    #[test]
4024    fn dup_unique_key_single_field_is_warning() {
4025        let mut fx = Fixture::new();
4026        // contact declares `- unique: email`.
4027        fx.config.schemas.insert(
4028            "contact".into(),
4029            Schema {
4030                unique_keys: vec![vec!["email".into()]],
4031                ..Default::default()
4032            },
4033        );
4034        for (f, name) in [("a", "A"), ("b", "B")] {
4035            fx.write(
4036                &format!("records/contacts/{f}.md"),
4037                &format!("---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: s\nname: {name}\nemail: dup@x.com\n---\n\n# {name}\n"),
4038            );
4039        }
4040        let issues = fx.store_all();
4041        // One issue per group (rule #1), keyed on the smallest path, anchored to
4042        // the single `email` field.
4043        assert_eq!(count(&issues, codes::DUP_UNIQUE_KEY), 1);
4044        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
4045        assert_eq!(dup.severity, Severity::Warning);
4046        assert_eq!(dup.file, PathBuf::from("records/contacts/a.md"));
4047        assert_eq!(dup.key.as_deref(), Some("email"));
4048        assert_eq!(dup.related, vec![PathBuf::from("records/contacts/b.md")]);
4049    }
4050
4051    #[test]
4052    fn dup_unique_key_compound_and_clean_when_one_field_differs() {
4053        let mut fx = Fixture::new();
4054        // expense declares `- unique: date, amount, vendor` (a compound key).
4055        fx.config.schemas.insert(
4056            "expense".into(),
4057            Schema {
4058                unique_keys: vec![vec!["date".into(), "amount".into(), "vendor".into()]],
4059                ..Default::default()
4060            },
4061        );
4062        fx.write("records/companies/acme.md", "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: c\nname: Acme\n---\n# A\n");
4063        let exp = |f: &str, amount: &str| {
4064            format!(
4065            "---\ntype: expense\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: e\ndate: 2026-05-01\namount: {amount}\nvendor: \"[[records/companies/acme]]\"\n---\n\n# {f}\n"
4066        )
4067        };
4068        fx.write("records/expenses/e1.md", &exp("e1", "100"));
4069        fx.write("records/expenses/e2.md", &exp("e2", "100"));
4070        fx.write("records/expenses/e3.md", &exp("e3", "200")); // different amount
4071        let issues = fx.store_all();
4072        // One issue for the e1+e2 group (rule #1), keyed on the smallest path
4073        // (e1) with e2 in `related`; e3 differs on amount and never appears.
4074        assert_eq!(
4075            count(&issues, codes::DUP_UNIQUE_KEY),
4076            1,
4077            "only e1+e2 collide, one issue: {issues:#?}"
4078        );
4079        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
4080        assert_eq!(dup.file, PathBuf::from("records/expenses/e1.md"));
4081        assert_eq!(
4082            dup.line,
4083            Some(1),
4084            "compound-key collision anchors to line 1"
4085        );
4086        assert_eq!(dup.related, vec![PathBuf::from("records/expenses/e2.md")]);
4087        assert!(
4088            !issues.iter().any(|i| i.code == codes::DUP_UNIQUE_KEY
4089                && i.related.contains(&PathBuf::from("records/expenses/e3.md"))),
4090            "e3 differs on amount and must not collide: {issues:#?}"
4091        );
4092    }
4093
4094    #[test]
4095    fn dup_unique_key_list_field_is_order_independent() {
4096        let mut fx = Fixture::new();
4097        // meeting declares `- unique: date, attendees`; the list field is a set.
4098        fx.config.schemas.insert(
4099            "meeting".into(),
4100            Schema {
4101                unique_keys: vec![vec!["date".into(), "attendees".into()]],
4102                ..Default::default()
4103            },
4104        );
4105        fx.write("records/contacts/a.md", &valid_contact("a"));
4106        fx.write("records/contacts/b.md", &valid_contact("b"));
4107        let m = |f: &str, order: &str| {
4108            let attendees = if order == "ab" {
4109                "  - [[records/contacts/a]]\n  - [[records/contacts/b]]"
4110            } else {
4111                "  - [[records/contacts/b]]\n  - [[records/contacts/a]]"
4112            };
4113            format!(
4114                "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nattendees:\n{attendees}\n---\n\n# {f}\n"
4115            )
4116        };
4117        fx.write("records/meetings/m1.md", &m("m1", "ab"));
4118        fx.write("records/meetings/m2.md", &m("m2", "ba"));
4119        let issues = fx.store_all();
4120        // The attendee SET is order-independent, so m1 (ab) and m2 (ba) collide
4121        // → a single issue on the smaller path.
4122        assert_eq!(
4123            count(&issues, codes::DUP_UNIQUE_KEY),
4124            1,
4125            "same date + same attendee set (any order) collide as one issue: {issues:#?}"
4126        );
4127        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
4128        assert_eq!(dup.file, PathBuf::from("records/meetings/m1.md"));
4129        assert_eq!(dup.related, vec![PathBuf::from("records/meetings/m2.md")]);
4130    }
4131
4132    // ── indexes ───────────────────────────────────────────────────────────────
4133
4134    #[test]
4135    fn missing_indexes_at_all_three_levels() {
4136        let fx = Fixture::new();
4137        fx.write("records/contacts/a.md", &valid_contact("a"));
4138        let issues = fx.store_all();
4139        // root, layer (records), and type-folder (records/contacts) all missing.
4140        // The type-folder INDEX_MISSING is keyed on the FOLDER path (not its
4141        // would-be index.md), per the field convention `EXPECTED` pins.
4142        let missing_files: BTreeSet<PathBuf> = issues
4143            .iter()
4144            .filter(|i| i.code == codes::INDEX_MISSING)
4145            .map(|i| i.file.clone())
4146            .collect();
4147        assert!(
4148            missing_files.contains(&PathBuf::from("index.md")),
4149            "{issues:#?}"
4150        );
4151        assert!(
4152            missing_files.contains(&PathBuf::from("records/index.md")),
4153            "{issues:#?}"
4154        );
4155        assert!(
4156            missing_files.contains(&PathBuf::from("records/contacts")),
4157            "{issues:#?}"
4158        );
4159        // When the index.md is entirely absent we do NOT additionally fire
4160        // INDEX_JSONL_MISSING — one INDEX_MISSING covers the folder (rule #4).
4161        assert!(!has(&issues, codes::INDEX_JSONL_MISSING), "{issues:#?}");
4162    }
4163
4164    #[test]
4165    fn index_stale_entry_and_missing_entry() {
4166        let fx = Fixture::new();
4167        fx.write(
4168            "records/contacts/present.md",
4169            &valid_contact("present contact"),
4170        );
4171        // Indexes for the parents (root/layer) present so we isolate type-folder.
4172        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4173        fx.write(
4174            "records/index.md",
4175            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4176        );
4177        // Type-folder index lists a GHOST (stale) and omits `present` (missing).
4178        fx.write(
4179            "records/contacts/index.md",
4180            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/ghost]] — gone\n",
4181        );
4182        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/present.md\",\"type\":\"contact\",\"summary\":\"present contact\"}\n");
4183        let issues = fx.store_all();
4184        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
4185        assert!(stale.message.contains("ghost"));
4186        assert!(stale.is_error());
4187        let missing = find(&issues, codes::INDEX_MISSING_ENTRY);
4188        assert!(
4189            missing.message.contains("present.md"),
4190            "{}",
4191            missing.message
4192        );
4193    }
4194
4195    #[test]
4196    fn index_md_entry_with_traversal_path_is_stale_not_probe() {
4197        let fx = Fixture::new();
4198        fx.write("records/contacts/a.md", &valid_contact("a"));
4199        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4200        fx.write(
4201            "records/index.md",
4202            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4203        );
4204        fx.write(
4205            "records/contacts/index.md",
4206            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/../../ghost]] — unsafe\n",
4207        );
4208        fx.write(
4209            "records/contacts/index.jsonl",
4210            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
4211        );
4212        let issues = fx.store_all();
4213        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
4214        assert!(stale.message.contains("not a safe store-relative path"));
4215    }
4216
4217    #[test]
4218    fn index_summary_mismatch() {
4219        let fx = Fixture::new();
4220        fx.write("records/contacts/a.md", &valid_contact("the real summary"));
4221        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4222        fx.write(
4223            "records/index.md",
4224            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4225        );
4226        fx.write(
4227            "records/contacts/index.md",
4228            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a STALE summary\n",
4229        );
4230        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"the real summary\"}\n");
4231        let issues = fx.store_all();
4232        let issue = find(&issues, codes::INDEX_SUMMARY_MISMATCH);
4233        assert!(issue.is_error());
4234        assert_eq!(issue.related, vec![PathBuf::from("records/contacts/a.md")]);
4235    }
4236
4237    #[test]
4238    fn index_summary_match_passes() {
4239        let fx = Fixture::new();
4240        fx.write("records/contacts/a.md", &valid_contact("matching summary"));
4241        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4242        fx.write(
4243            "records/index.md",
4244            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4245        );
4246        fx.write(
4247            "records/contacts/index.md",
4248            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — matching summary\n",
4249        );
4250        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"matching summary\"}\n");
4251        let issues = fx.store_all();
4252        assert!(!has(&issues, codes::INDEX_SUMMARY_MISMATCH), "{issues:#?}");
4253    }
4254
4255    #[test]
4256    fn index_entry_with_tag_suffix_matches_summary() {
4257        let fx = Fixture::new();
4258        fx.write("records/contacts/a.md", &valid_contact("clean summary"));
4259        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4260        fx.write(
4261            "records/index.md",
4262            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4263        );
4264        // Entry carries a ` · #tag` suffix which must be stripped before compare.
4265        fx.write(
4266            "records/contacts/index.md",
4267            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — clean summary · #customer\n",
4268        );
4269        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"clean summary\"}\n");
4270        let issues = fx.store_all();
4271        assert!(
4272            !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
4273            "tag suffix should be stripped: {issues:#?}"
4274        );
4275    }
4276
4277    #[test]
4278    fn index_jsonl_desync_missing_file_in_jsonl() {
4279        let fx = Fixture::new();
4280        fx.write("records/contacts/a.md", &valid_contact("a"));
4281        fx.write("records/contacts/b.md", &valid_contact("b"));
4282        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (2 files)\n");
4283        fx.write(
4284            "records/index.md",
4285            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4286        );
4287        fx.write(
4288            "records/contacts/index.md",
4289            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n- [[records/contacts/b]] — b\n",
4290        );
4291        // jsonl only lists `a` → `b` is a desync (the twin must be complete).
4292        fx.write(
4293            "records/contacts/index.jsonl",
4294            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
4295        );
4296        let issues = fx.store_all();
4297        let desync = find(&issues, codes::INDEX_JSONL_DESYNC);
4298        assert!(desync.message.contains("b.md"), "{}", desync.message);
4299    }
4300
4301    #[test]
4302    fn index_jsonl_desync_record_points_at_missing_file() {
4303        let fx = Fixture::new();
4304        fx.write("records/contacts/a.md", &valid_contact("a"));
4305        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4306        fx.write(
4307            "records/index.md",
4308            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4309        );
4310        fx.write(
4311            "records/contacts/index.md",
4312            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
4313        );
4314        fx.write(
4315            "records/contacts/index.jsonl",
4316            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
4317        );
4318        let issues = fx.store_all();
4319        assert!(
4320            issues
4321                .iter()
4322                .any(|i| i.code == codes::INDEX_JSONL_DESYNC && i.message.contains("ghost.md")),
4323            "{issues:#?}"
4324        );
4325    }
4326
4327    #[test]
4328    fn index_jsonl_record_with_traversal_path_is_desync_not_probe() {
4329        let fx = Fixture::new();
4330        fx.write("records/contacts/a.md", &valid_contact("a"));
4331        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4332        fx.write(
4333            "records/index.md",
4334            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4335        );
4336        fx.write(
4337            "records/contacts/index.md",
4338            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
4339        );
4340        fx.write(
4341            "records/contacts/index.jsonl",
4342            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/../../ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
4343        );
4344        let issues = fx.store_all();
4345        assert!(
4346            issues.iter().any(|i| i.code == codes::INDEX_JSONL_DESYNC
4347                && i.message.contains("not a safe store-relative path")),
4348            "{issues:#?}"
4349        );
4350    }
4351
4352    #[test]
4353    fn index_jsonl_stale_summary() {
4354        let fx = Fixture::new();
4355        fx.write("records/contacts/a.md", &valid_contact("real summary"));
4356        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4357        fx.write(
4358            "records/index.md",
4359            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4360        );
4361        fx.write(
4362            "records/contacts/index.md",
4363            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — real summary\n",
4364        );
4365        // jsonl summary disagrees with the file frontmatter.
4366        fx.write(
4367            "records/contacts/index.jsonl",
4368            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"OUTDATED\"}\n",
4369        );
4370        let issues = fx.store_all();
4371        let stale = find(&issues, codes::INDEX_JSONL_STALE);
4372        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
4373        assert!(stale.key.as_deref().unwrap().contains("summary"));
4374    }
4375
4376    /// The whole point of `INDEX_JSONL_STALE`: a sidecar field the query/search
4377    /// path actually reads (`email`, `domain`, the `(date,amount,vendor)` dedup
4378    /// tuple, `tags`, `updated`, `links`, `company` …) that disagrees with the
4379    /// `.md` is STALE — even when `summary` and `type` are perfectly correct.
4380    /// Pre-fix the validator only diffed summary+type, so a sidecar with a wrong
4381    /// `email` validated clean and answered `--where email=…` with a phantom
4382    /// value present in no file. This is the direct regression guard.
4383    #[test]
4384    fn index_jsonl_stale_queryable_field_email() {
4385        let fx = Fixture::new();
4386        let contact = "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"a contact\"\nname: A\nemail: real@correct.com\n---\n\n# A\n";
4387        fx.write("records/contacts/a.md", contact);
4388        // Start from the canonical, fully-correct sidecar set …
4389        fx.rebuild_indexes();
4390        let jsonl_path = fx.dir.path().join("records/contacts/index.jsonl");
4391        let good = fs::read_to_string(&jsonl_path).unwrap();
4392        // sanity: the canonical store is clean (no STALE on a fresh rebuild).
4393        assert!(
4394            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
4395            "freshly-rebuilt sidecar must not be stale"
4396        );
4397        // … then desync ONLY the email so it's the single differing field.
4398        assert!(
4399            good.contains("real@correct.com"),
4400            "sidecar projects email: {good}"
4401        );
4402        fx.write(
4403            "records/contacts/index.jsonl",
4404            &good.replace("real@correct.com", "STALE-WRONG@evil.com"),
4405        );
4406
4407        let issues = fx.store_all();
4408        let stale = find(&issues, codes::INDEX_JSONL_STALE);
4409        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
4410        // The mismatch is reported precisely on `email`, and summary/type — which
4411        // still match — are NOT named.
4412        let key = stale.key.as_deref().unwrap();
4413        assert!(
4414            key.contains("email"),
4415            "expected `email` in stale key, got {key:?}"
4416        );
4417        assert!(!key.contains("summary"), "summary still matches: {key:?}");
4418        assert!(!key.contains("type"), "type still matches: {key:?}");
4419    }
4420
4421    /// Broaden the guard across the typed/list/timestamp projections at once:
4422    /// a wrong `tags`, `updated`, and a custom dedup field (`amount`) are each
4423    /// caught, with all three named in one issue.
4424    #[test]
4425    fn index_jsonl_stale_typed_and_list_fields() {
4426        let fx = Fixture::new();
4427        let expense = "---\ntype: expense\ncreated: 2026-05-20T08:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"office chairs\"\ntags: [furniture, q2]\namount: 1299\nvendor: Acme\ndate: 2026-05-20\n---\n\n# Expense\n";
4428        fx.write("records/expenses/e.md", expense);
4429        fx.rebuild_indexes();
4430        let jsonl_path = fx.dir.path().join("records/expenses/index.jsonl");
4431        let good = fs::read_to_string(&jsonl_path).unwrap();
4432        assert!(
4433            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
4434            "freshly-rebuilt sidecar must not be stale"
4435        );
4436        // Desync a list field (tags), a timestamp (updated), and a number (amount).
4437        let stale_line = good
4438            .replace("\"q2\"", "\"WRONG-TAG\"")
4439            .replace("2026-05-22T10:00:00-07:00", "2099-01-01T00:00:00-07:00")
4440            .replace("1299", "9999");
4441        fx.write("records/expenses/index.jsonl", &stale_line);
4442
4443        let issues = fx.store_all();
4444        let stale = find(&issues, codes::INDEX_JSONL_STALE);
4445        let key = stale.key.as_deref().unwrap();
4446        for expected in ["amount", "tags", "updated"] {
4447            assert!(
4448                key.contains(expected),
4449                "expected `{expected}` in stale key, got {key:?}"
4450            );
4451        }
4452    }
4453
4454    #[test]
4455    fn index_orphan_in_noncanonical_folder() {
4456        let fx = Fixture::new();
4457        fx.write("records/contacts/a.md", &valid_contact("a"));
4458        // Build the canonical indexes so they aren't reported as orphans.
4459        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4460        fx.write(
4461            "records/index.md",
4462            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4463        );
4464        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
4465        fx.write(
4466            "records/contacts/index.jsonl",
4467            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
4468        );
4469        // An index.md inside a sub-sub-folder (operator territory) is an orphan.
4470        fx.write(
4471            "records/contacts/subfolder/index.md",
4472            "---\ntype: index\nscope: type-folder\n---\n\n# stray\n",
4473        );
4474        let issues = fx.store_all();
4475        let orphan = find(&issues, codes::INDEX_ORPHAN);
4476        assert_eq!(orphan.severity, Severity::Warning);
4477        assert_eq!(
4478            orphan.file,
4479            PathBuf::from("records/contacts/subfolder/index.md")
4480        );
4481    }
4482
4483    #[test]
4484    fn index_wrong_scope() {
4485        let fx = Fixture::new();
4486        fx.write("records/contacts/a.md", &valid_contact("a"));
4487        // Root index declares the wrong scope.
4488        fx.write("index.md", "---\ntype: index\nscope: layer\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4489        fx.write(
4490            "records/index.md",
4491            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4492        );
4493        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
4494        fx.write(
4495            "records/contacts/index.jsonl",
4496            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
4497        );
4498        let issues = fx.store_all();
4499        let issue = find(&issues, codes::INDEX_WRONG_SCOPE);
4500        assert_eq!(issue.severity, Severity::Warning);
4501        assert_eq!(issue.file, PathBuf::from("index.md"));
4502    }
4503
4504    #[test]
4505    fn capped_type_folder_index_does_not_flag_missing_entries() {
4506        // Over the 500-entry cap, omitted entries are expected, not an error.
4507        let fx = Fixture::new();
4508        for i in 0..501 {
4509            fx.write(
4510                &format!("records/contacts/c{i:04}.md"),
4511                &valid_contact(&format!("contact {i}")),
4512            );
4513        }
4514        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (501 files)\n");
4515        fx.write(
4516            "records/index.md",
4517            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4518        );
4519        // Type-folder index lists only ONE entry + a More footer.
4520        fx.write(
4521            "records/contacts/index.md",
4522            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/c0000]] — contact 0\n\n## More\n\nThis folder has 501 files.\n",
4523        );
4524        // jsonl must still be complete — write all 501 lines.
4525        let mut jsonl = String::new();
4526        for i in 0..501 {
4527            jsonl.push_str(&format!(
4528                "{{\"path\":\"records/contacts/c{i:04}.md\",\"type\":\"contact\",\"summary\":\"contact {i}\"}}\n"
4529            ));
4530        }
4531        fx.write("records/contacts/index.jsonl", &jsonl);
4532        let issues = fx.store_all();
4533        assert!(
4534            !has(&issues, codes::INDEX_MISSING_ENTRY),
4535            "over the cap, missing browse entries are expected: {issues:#?}"
4536        );
4537        // But the jsonl is complete → no desync.
4538        assert!(
4539            !has(&issues, codes::INDEX_JSONL_DESYNC),
4540            "{:#?}",
4541            issues
4542                .iter()
4543                .filter(|i| i.code == codes::INDEX_JSONL_DESYNC)
4544                .collect::<Vec<_>>()
4545        );
4546    }
4547
4548    // ── log ────────────────────────────────────────────────────────────────
4549
4550    #[test]
4551    fn log_bad_timestamp_unknown_kind_out_of_order() {
4552        let fx = Fixture::new();
4553        fx.write(
4554            "log.md",
4555            concat!(
4556                "---\ntype: log\n---\n\n# Log\n\n",
4557                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
4558                "## [2026-05-27 09:00] update | records/contacts/b\nx\n\n", // out of order
4559                "## [2026-05-27 11:00] frobnicate | records/contacts/c\nx\n\n", // unknown kind
4560                "## [not-a-date] create | records/contacts/d\nx\n",         // bad timestamp
4561            ),
4562        );
4563        let issues = fx.store_all();
4564        assert!(has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
4565        assert_eq!(
4566            find(&issues, codes::LOG_OUT_OF_ORDER).severity,
4567            Severity::Warning
4568        );
4569        let unknown = find(&issues, codes::LOG_UNKNOWN_KIND);
4570        assert_eq!(unknown.severity, Severity::Warning);
4571        assert!(unknown.message.contains("frobnicate"));
4572        assert!(unknown
4573            .suggestion
4574            .as_deref()
4575            .is_some_and(|s| s.contains("create")));
4576        let bad = find(&issues, codes::LOG_BAD_TIMESTAMP);
4577        assert!(bad.is_error());
4578    }
4579
4580    #[test]
4581    fn log_validate_entry_without_object_is_well_formed() {
4582        let fx = Fixture::new();
4583        fx.write(
4584            "log.md",
4585            "---\ntype: log\n---\n\n## [2026-05-27 10:00] validate\nPASS\n",
4586        );
4587        let issues = fx.store_all();
4588        assert!(!has(&issues, codes::LOG_BAD_TIMESTAMP), "{issues:#?}");
4589        assert!(!has(&issues, codes::LOG_UNKNOWN_KIND), "{issues:#?}");
4590    }
4591
4592    #[test]
4593    fn log_in_order_is_clean() {
4594        let fx = Fixture::new();
4595        fx.write(
4596            "log.md",
4597            concat!(
4598                "---\ntype: log\n---\n\n",
4599                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
4600                "## [2026-05-27 10:05] update | records/contacts/a\nx\n",
4601            ),
4602        );
4603        let issues = fx.store_all();
4604        assert!(!has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
4605    }
4606
4607    #[test]
4608    fn log_not_checked_in_working_set() {
4609        // log.md ordering is an --all-only check.
4610        let fx = Fixture::new();
4611        fx.write(
4612            "log.md",
4613            concat!(
4614                "---\ntype: log\n---\n\n",
4615                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
4616                "## [2026-05-27 09:00] update | records/contacts/a\nx\n",
4617            ),
4618        );
4619        let issues = validate_working_set(&fx.store(), None).unwrap();
4620        assert!(
4621            !has(&issues, codes::LOG_OUT_OF_ORDER),
4622            "log ordering is --all only: {issues:#?}"
4623        );
4624    }
4625
4626    // ── working-set scoping ───────────────────────────────────────────────────
4627
4628    #[test]
4629    fn working_set_validates_only_changed_files() {
4630        let fx = Fixture::new();
4631        // `dirty` has a bad timestamp; `clean_but_unlogged` also does but is NOT
4632        // in the log → working set must skip it.
4633        fx.write(
4634            "records/contacts/dirty.md",
4635            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
4636        );
4637        fx.write(
4638            "records/contacts/unlogged.md",
4639            "---\ntype: contact\ncreated: ALSO-BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
4640        );
4641        fx.write(
4642            "log.md",
4643            "---\ntype: log\n---\n\n## [2026-05-22 10:00] update | records/contacts/dirty\nedited\n",
4644        );
4645        let issues = validate_working_set(&fx.store(), None).unwrap();
4646        assert!(
4647            issues.iter().any(
4648                |i| i.code == codes::FM_BAD_TIMESTAMP && i.file == *"records/contacts/dirty.md"
4649            ),
4650            "{issues:#?}"
4651        );
4652        assert!(
4653            !issues
4654                .iter()
4655                .any(|i| i.file == *"records/contacts/unlogged.md"),
4656            "unlogged file must not be in the working set: {issues:#?}"
4657        );
4658    }
4659
4660    #[test]
4661    fn working_set_includes_incoming_linkers_to_changed_path() {
4662        let fx = Fixture::new();
4663        // `changed` was renamed/removed (logged). `linker` points at it with a
4664        // now-broken link and was NOT itself logged — but must be pulled in.
4665        fx.write(
4666            "wiki/people/linker.md",
4667            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: links to a removed page\n---\n\nSee [[records/contacts/changed]].\n",
4668        );
4669        // `changed.md` does NOT exist on disk (removed).
4670        fx.write(
4671            "log.md",
4672            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/changed\nremoved\n",
4673        );
4674        let issues = validate_working_set(&fx.store(), None).unwrap();
4675        assert!(
4676            issues
4677                .iter()
4678                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.file == *"wiki/people/linker.md"),
4679            "incoming linker to a removed path must be validated: {issues:#?}"
4680        );
4681    }
4682
4683    #[test]
4684    fn working_set_respects_explicit_since_cutoff() {
4685        let fx = Fixture::new();
4686        fx.write(
4687            "records/contacts/old.md",
4688            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
4689        );
4690        fx.write(
4691            "records/contacts/new.md",
4692            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
4693        );
4694        fx.write(
4695            "log.md",
4696            concat!(
4697                "---\ntype: log\n---\n\n",
4698                "## [2026-05-20 10:00] update | records/contacts/old\nx\n\n",
4699                "## [2026-05-25 10:00] update | records/contacts/new\nx\n",
4700            ),
4701        );
4702        // Cutoff after `old` but before `new`.
4703        let since = DateTime::parse_from_rfc3339("2026-05-22T00:00:00+00:00").unwrap();
4704        let issues = validate_working_set(&fx.store(), Some(since)).unwrap();
4705        assert!(
4706            issues.iter().any(|i| i.file == *"records/contacts/new.md"),
4707            "{issues:#?}"
4708        );
4709        assert!(
4710            !issues.iter().any(|i| i.file == *"records/contacts/old.md"),
4711            "old change is before the cutoff: {issues:#?}"
4712        );
4713    }
4714
4715    #[test]
4716    fn working_set_default_since_is_last_validate_entry() {
4717        let fx = Fixture::new();
4718        // `before` changed before the last validate; `after` changed after.
4719        fx.write(
4720            "records/contacts/before.md",
4721            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
4722        );
4723        fx.write(
4724            "records/contacts/after.md",
4725            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
4726        );
4727        fx.write(
4728            "log.md",
4729            concat!(
4730                "---\ntype: log\n---\n\n",
4731                "## [2026-05-20 10:00] update | records/contacts/before\nx\n\n",
4732                "## [2026-05-21 10:00] validate\nPASS\n\n",
4733                "## [2026-05-22 10:00] update | records/contacts/after\nx\n",
4734            ),
4735        );
4736        let issues = validate_working_set(&fx.store(), None).unwrap();
4737        assert!(
4738            issues
4739                .iter()
4740                .any(|i| i.file == *"records/contacts/after.md"),
4741            "{issues:#?}"
4742        );
4743        assert!(
4744            !issues
4745                .iter()
4746                .any(|i| i.file == *"records/contacts/before.md"),
4747            "change before the last validate entry is outside the default window: {issues:#?}"
4748        );
4749    }
4750
4751    // ── ordering / determinism ────────────────────────────────────────────────
4752
4753    #[test]
4754    fn issues_are_sorted_by_file_then_line() {
4755        let fx = Fixture::new();
4756        fx.write("wiki/people/z.md", "---\ntype: wiki-page\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
4757        fx.write("wiki/people/a.md", "---\ntype: wiki-page\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
4758        let issues = fx.store_all();
4759        let files: Vec<&PathBuf> = issues.iter().map(|i| &i.file).collect();
4760        let mut sorted = files.clone();
4761        sorted.sort();
4762        assert_eq!(
4763            files, sorted,
4764            "issues must be emitted in a stable file order"
4765        );
4766    }
4767
4768    // ── boundaries: codes validate must NOT emit ──────────────────────────────
4769
4770    #[test]
4771    fn frozen_page_is_not_a_validate_error() {
4772        // POLICY_FROZEN_PAGE is a *write-time* refusal, never a validate finding.
4773        // A clean file listed in `### Frozen pages` must validate clean.
4774        let mut fx = Fixture::new();
4775        fx.config
4776            .frozen_pages
4777            .push(PathBuf::from("records/decisions/d.md"));
4778        fx.write(
4779            "records/decisions/d.md",
4780            "---\ntype: decision\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a finalized decision\n---\n\n# D\n",
4781        );
4782        let issues = fx.store_all();
4783        assert!(
4784            !has(&issues, codes::POLICY_FROZEN_PAGE),
4785            "frozen pages are enforced at write-time, not by validate: {issues:#?}"
4786        );
4787    }
4788
4789    #[test]
4790    fn wiki_link_ambiguous_is_never_emitted_under_full_path_doctrine() {
4791        // The full-path doctrine makes ambiguity impossible; the defensive code
4792        // must never fire on a normal store.
4793        let fx = Fixture::new();
4794        fx.write("records/contacts/sarah-chen.md", &valid_contact("sarah"));
4795        let mut body = valid_contact("links to sarah");
4796        body.push_str("\nSee [[records/contacts/sarah-chen]].\n");
4797        fx.write("wiki/people/p.md", &body);
4798        let issues = fx.store_all();
4799        assert!(!has(&issues, codes::WIKI_LINK_AMBIGUOUS), "{issues:#?}");
4800    }
4801
4802    // ── unknown-type / unknown-field passthrough ──────────────────────────────
4803
4804    #[test]
4805    fn unknown_type_passes_through() {
4806        // A custom type is ambient context: it has a `type`, so no
4807        // FM_MISSING_TYPE, and with no matching schema there are no schema
4808        // errors. Only the universal contract (summary, timestamps) applies.
4809        let fx = Fixture::new();
4810        fx.write(
4811            "records/proposals/x.md",
4812            "---\ntype: proposal\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a proposal\ncustom_field: anything\nbudget: 5000\n---\n\n# Proposal\n",
4813        );
4814        let issues = fx.store_all();
4815        assert!(!has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
4816        assert!(!has(&issues, codes::SCHEMA_MISSING_REQUIRED), "{issues:#?}");
4817        assert!(!has(&issues, codes::SCHEMA_SHAPE_MISMATCH), "{issues:#?}");
4818        // The unknown fields don't trip anything.
4819        assert!(
4820            !issues
4821                .iter()
4822                .any(|i| i.key.as_deref() == Some("custom_field")
4823                    || i.key.as_deref() == Some("budget")),
4824            "unknown fields are ambient context: {issues:#?}"
4825        );
4826    }
4827
4828    // ── find_links_to prefix-collision safety (working set) ───────────────────
4829
4830    #[test]
4831    fn incoming_linker_scan_does_not_prefix_match() {
4832        // A changed `records/contacts/sarah` must NOT pull in a file that only
4833        // links to `records/contacts/sarah-chen` (a longer path sharing a prefix).
4834        let fx = Fixture::new();
4835        fx.write(
4836            "wiki/people/only-sarah-chen.md",
4837            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
4838        );
4839        // The log says `records/contacts/sarah` (the shorter path) changed.
4840        fx.write(
4841            "log.md",
4842            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah\nremoved\n",
4843        );
4844        let issues = validate_working_set(&fx.store(), None).unwrap();
4845        assert!(
4846            !issues
4847                .iter()
4848                .any(|i| i.file == *"wiki/people/only-sarah-chen.md"),
4849            "a prefix-sharing link must not pull a file into the working set: {issues:#?}"
4850        );
4851    }
4852
4853    #[test]
4854    fn incoming_linker_scan_pulls_in_catalog_index_md() {
4855        // CONTRACT: the working-set incoming-linker scan rides the embedded-
4856        // ripgrep `Store::find_links_to`, which scans EVERY `.md` (including
4857        // `index.md` catalogs) — NOT the walk-and-read over `walk_content_files`,
4858        // which excludes `index.md`. A type-folder `index.md` that lists a now-
4859        // deleted target must be pulled into the working set so its dangling
4860        // catalog entry is flagged `WIKI_LINK_BROKEN`. The old walk-and-read
4861        // implementation skipped `index.md` and let this broken link survive the
4862        // loop silently; this test fails if anyone reverts to that path.
4863        let fx = Fixture::new();
4864        // A catalog that still lists the deleted contact (a real, common stale
4865        // state after a `delete`). No other file references the target, so the
4866        // catalog is the ONLY incoming linker — if it isn't scanned, nothing is.
4867        fx.write(
4868            "records/contacts/index.md",
4869            "---\ntype: index\n---\n\n- [[records/contacts/sarah-chen]] — Sarah Chen\n",
4870        );
4871        // The log says `records/contacts/sarah-chen` was deleted.
4872        fx.write(
4873            "log.md",
4874            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n",
4875        );
4876        let issues = validate_working_set(&fx.store(), None).unwrap();
4877        assert!(
4878            issues.iter().any(
4879                |i| i.file == *"records/contacts/index.md" && i.code == codes::WIKI_LINK_BROKEN
4880            ),
4881            "the catalog `index.md` linking to the deleted target must be pulled \
4882             into the working set and flagged WIKI_LINK_BROKEN (proves the scan \
4883             uses embedded-ripgrep `Store::find_links_to`, not the index-skipping \
4884             walk-and-read): {issues:#?}"
4885        );
4886    }
4887
4888    #[test]
4889    fn incoming_linker_scan_covers_the_whole_changed_set_in_one_pass() {
4890        // CONTRACT (the O(changed × store) fix): the working-set scan finds
4891        // incoming linkers for EVERY changed object, and does so via the single
4892        // batch pass `Store::find_links_to_any` — not one full store read per
4893        // changed object. This test pins the behavior that makes the single-pass
4894        // correct: with two DISTINCT deleted targets, the linker to EACH is pulled
4895        // into the working set and flagged. A regression that scanned for only the
4896        // first/last changed object, or that dropped the batch union, would leave
4897        // one of the two broken links unreported and fail here.
4898        let fx = Fixture::new();
4899        // Linker A → deleted target #1 (in the body).
4900        fx.write(
4901            "wiki/people/refers-sarah.md",
4902            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
4903        );
4904        // Linker B → deleted target #2 (in a typed frontmatter field — an edge the
4905        // sidecar `links` projection would miss, which is why this must be a
4906        // content scan, not a sidecar read).
4907        fx.write(
4908            "records/meetings/2026/05/kickoff.md",
4909            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\ncompany: \"[[records/companies/acme]]\"\n---\n\n# Kickoff\n",
4910        );
4911        // The log says BOTH targets were deleted in this window.
4912        fx.write(
4913            "log.md",
4914            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n\n## [2026-05-22 10:05] delete | records/companies/acme\nremoved\n",
4915        );
4916
4917        let issues = validate_working_set(&fx.store(), None).unwrap();
4918        assert!(
4919            issues
4920                .iter()
4921                .any(|i| i.file == *"wiki/people/refers-sarah.md"
4922                    && i.code == codes::WIKI_LINK_BROKEN),
4923            "linker to the FIRST deleted target must be pulled in and flagged: {issues:#?}"
4924        );
4925        assert!(
4926            issues
4927                .iter()
4928                .any(|i| i.file == *"records/meetings/2026/05/kickoff.md"
4929                    && i.code == codes::WIKI_LINK_BROKEN),
4930            "linker to the SECOND deleted target (typed-field edge) must also be \
4931             pulled in and flagged — proves the scan covers the whole changed set, \
4932             not just one object: {issues:#?}"
4933        );
4934    }
4935
4936    #[test]
4937    fn frontmatter_block_sequence_links_each_get_their_own_line() {
4938        // Each block-sequence wiki-link reports on its own source line.
4939        let fx = Fixture::new();
4940        // Neither target exists → two WIKI_LINK_BROKEN, on different lines.
4941        fx.write(
4942            "records/meetings/m.md",
4943            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nparticipants:\n  - [[records/contacts/ghost1]]\n  - [[records/contacts/ghost2]]\n---\n\n# M\n",
4944        );
4945        let issues = fx.store_all();
4946        let broken_lines: BTreeSet<Option<u32>> = issues
4947            .iter()
4948            .filter(|i| i.code == codes::WIKI_LINK_BROKEN)
4949            .map(|i| i.line)
4950            .collect();
4951        assert_eq!(
4952            broken_lines.len(),
4953            2,
4954            "two distinct broken-link lines: {issues:#?}"
4955        );
4956    }
4957
4958    /// Every code in `mod codes` must appear as a row in SPEC.md § Validation —
4959    /// the SPEC table is the declared "complete vocabulary" an agent branches on,
4960    /// and the module doc-comment promises this code implements "exactly those
4961    /// codes — no more, no fewer." This guards against the code/SPEC drift where a
4962    /// new validation code is added to the engine but never documented.
4963    #[test]
4964    fn every_code_constant_is_documented_in_spec() {
4965        // Parse the canonical constant *values* straight out of this module's
4966        // source, so a future `pub const X: &str = "X";` is covered with no test
4967        // edit. Format is uniform: `    pub const NAME: &str = "VALUE";`.
4968        let this_src = include_str!("validate.rs");
4969        let mut codes_in_module: Vec<String> = Vec::new();
4970        let mut in_codes_mod = false;
4971        for line in this_src.lines() {
4972            let t = line.trim();
4973            if t.starts_with("pub mod codes") {
4974                in_codes_mod = true;
4975                continue;
4976            }
4977            // The `mod codes` block ends at its closing brace at column 0.
4978            if in_codes_mod && line == "}" {
4979                break;
4980            }
4981            if in_codes_mod {
4982                if let Some(rest) = t.strip_prefix("pub const ") {
4983                    // rest = `NAME: &str = "VALUE";`
4984                    let value = rest
4985                        .split_once('=')
4986                        .map(|(_, v)| v.trim())
4987                        .and_then(|v| v.strip_prefix('"'))
4988                        .and_then(|v| v.strip_suffix("\";"))
4989                        .unwrap_or_else(|| panic!("unparseable code constant line: {line:?}"));
4990                    codes_in_module.push(value.to_string());
4991                }
4992            }
4993        }
4994        assert!(
4995            codes_in_module.len() >= 36,
4996            "parsed only {} code constants from `mod codes`; the parser likely \
4997             broke against a source-format change",
4998            codes_in_module.len()
4999        );
5000
5001        // SPEC.md lives at the repo root, two levels up from this crate's manifest.
5002        let spec_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../../SPEC.md");
5003        let spec = fs::read_to_string(&spec_path)
5004            .unwrap_or_else(|e| panic!("cannot read {}: {e}", spec_path.display()));
5005
5006        // Each code must appear as a SPEC § Validation table cell: `` | `CODE` | ``.
5007        let missing: Vec<&String> = codes_in_module
5008            .iter()
5009            .filter(|code| !spec.contains(&format!("| `{code}` |")))
5010            .collect();
5011        assert!(
5012            missing.is_empty(),
5013            "validation codes emitted by the engine but absent from SPEC.md \
5014             § Validation (the declared complete vocabulary): {missing:?}"
5015        );
5016    }
5017}