Skip to main content

dbmd_core/
validate.rs

1//! `validate` — the validation engine.
2//!
3//! The canonical issue-code vocabulary is **SPEC.md § Validation** (that table
4//! is the single source of truth). This module implements exactly those codes
5//! — no more, no fewer. If a code is added here it must be added to the SPEC
6//! table in the same change. The codes are exposed as the [`codes`] constants
7//! so call sites never spell a code as a bare string literal.
8//!
9//! **Two scopes.** [`validate_working_set`] is the loop default: content files
10//! changed since `since`, plus any file whose wiki-links target a changed path.
11//! The changed set and the per-file checks are O(changed); the incoming linkers
12//! are found by a *single* embedded-ripgrep pass over the store for the whole
13//! changed set at once ([`Store::find_links_to_any`], one scan — not a full read
14//! per changed object, and not the parse-the-tree walk `--all` does). It never
15//! calls [`Store::walk`] and never builds the global cross-file state.
16//! [`validate_all`] is the full SWEEP: it adds the checks that need that global
17//! state — entity-dedup `DUP_*`, every-index sync, and `log.md` ordering.
18//!
19//! ## Why this module is self-contained
20//!
21//! Validation does its own frontmatter split, YAML parse, wiki-link scan,
22//! log-header parse, and file walk here, reading only the two public,
23//! caller-populated fields of a [`Store`]: [`Store::root`] and
24//! [`Store::config`] — rather than routing through the sibling modules
25//! ([`crate::parser`], [`crate::store`], [`crate::log`], [`crate::index`]).
26//! Keeping the checks local lets the validator report precise, per-issue
27//! diagnostics (exact codes, file, and context) without coupling its output to
28//! incidental behavior of the shared readers; the public surface and the
29//! emitted issue vocabulary are the contract.
30
31use std::collections::{BTreeMap, BTreeSet, HashMap};
32use std::path::{Component, Path, PathBuf};
33
34use chrono::{DateTime, FixedOffset, NaiveDateTime};
35use serde_norway::Value;
36
37use crate::parser::{Schema, Shape};
38use crate::store::Store;
39
40/// Severity of a validation [`Issue`]. Any [`Severity::Error`] fails validation
41/// (non-zero exit); warnings and info do not.
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub enum Severity {
44    /// Blocks: a hard violation of the format or doctrine.
45    Error,
46    /// A decision point the agent resolves at its discretion.
47    Warning,
48    /// Visibility only; never affects exit status.
49    Info,
50}
51
52/// A single structured validation finding. Agent-primary and machine-parseable
53/// via `--json`; `suggestion` is a deterministic remediation hint the agent
54/// applies without guessing.
55#[derive(Debug, Clone, PartialEq, Eq)]
56pub struct Issue {
57    /// The severity; only [`Severity::Error`] fails validation.
58    pub severity: Severity,
59    /// The structured code, e.g. `"WIKI_LINK_SHORT_FORM"` — one of [`codes`].
60    pub code: &'static str,
61    /// The file the issue is about.
62    pub file: PathBuf,
63    /// The 1-based line, when applicable.
64    pub line: Option<u32>,
65    /// The frontmatter key, when the issue is about a specific field.
66    pub key: Option<String>,
67    /// A human-readable message.
68    pub message: String,
69    /// A deterministic remediation hint, when one exists.
70    pub suggestion: Option<String>,
71    /// Other files involved (e.g. the duplicate partner in a collision).
72    pub related: Vec<PathBuf>,
73}
74
75impl Issue {
76    /// True if this issue fails validation (i.e. its severity is
77    /// [`Severity::Error`]).
78    pub fn is_error(&self) -> bool {
79        matches!(self.severity, Severity::Error)
80    }
81}
82
83/// The canonical validation issue codes — one constant per row of the SPEC.md
84/// § Validation table. Call sites reference these instead of bare strings so
85/// the code and the SPEC table can never silently drift.
86pub mod codes {
87    /// path has no `DB.md`; not a db.md store.
88    pub const NOT_A_STORE: &str = "NOT_A_STORE";
89    /// the store's `DB.md` is not `type: db-md`.
90    pub const DB_MD_BAD_TYPE: &str = "DB_MD_BAD_TYPE";
91    /// the store's `DB.md` frontmatter lacks `scope` or `owner`.
92    pub const DB_MD_MISSING_FIELD: &str = "DB_MD_MISSING_FIELD";
93    /// `DB.md` has an `##` section other than the three recognized ones.
94    pub const DB_MD_UNKNOWN_SECTION: &str = "DB_MD_UNKNOWN_SECTION";
95    /// content file has no `type:`.
96    pub const FM_MISSING_TYPE: &str = "FM_MISSING_TYPE";
97    /// content file has no `created:`.
98    pub const FM_MISSING_CREATED: &str = "FM_MISSING_CREATED";
99    /// content file has no `updated:`.
100    pub const FM_MISSING_UPDATED: &str = "FM_MISSING_UPDATED";
101    /// frontmatter block isn't valid YAML.
102    pub const FM_MALFORMED_YAML: &str = "FM_MALFORMED_YAML";
103    /// `created` or `updated` isn't ISO-8601.
104    pub const FM_BAD_TIMESTAMP: &str = "FM_BAD_TIMESTAMP";
105    /// content file has no `summary`.
106    pub const SUMMARY_MISSING: &str = "SUMMARY_MISSING";
107    /// `summary` present but empty.
108    pub const SUMMARY_EMPTY: &str = "SUMMARY_EMPTY";
109    /// `summary` contains newlines.
110    pub const SUMMARY_MULTILINE: &str = "SUMMARY_MULTILINE";
111    /// `summary` > 200 chars.
112    pub const SUMMARY_TOO_LONG: &str = "SUMMARY_TOO_LONG";
113    /// wiki-link target isn't a full store-relative path.
114    pub const WIKI_LINK_SHORT_FORM: &str = "WIKI_LINK_SHORT_FORM";
115    /// wiki-link target file doesn't exist.
116    pub const WIKI_LINK_BROKEN: &str = "WIKI_LINK_BROKEN";
117    /// wiki-link target matches multiple files (defensive).
118    pub const WIKI_LINK_AMBIGUOUS: &str = "WIKI_LINK_AMBIGUOUS";
119    /// wiki-link target carries a `.md` extension — drop it.
120    pub const WIKI_LINK_HAS_EXTENSION: &str = "WIKI_LINK_HAS_EXTENSION";
121    /// frontmatter list uses inline `[[[a]], [[b]]]` — use block form.
122    pub const WIKI_LINK_FLOW_FORM_LIST: &str = "WIKI_LINK_FLOW_FORM_LIST";
123    /// two files declare the same explicit `id`.
124    pub const DUP_ID: &str = "DUP_ID";
125    /// two records of a type collide on a `DB.md ## Schemas` `unique:` key.
126    pub const DUP_UNIQUE_KEY: &str = "DUP_UNIQUE_KEY";
127    /// a `DB.md` schema requires a field that's absent.
128    pub const SCHEMA_MISSING_REQUIRED: &str = "SCHEMA_MISSING_REQUIRED";
129    /// a value doesn't match the schema's shape modifier.
130    pub const SCHEMA_SHAPE_MISMATCH: &str = "SCHEMA_SHAPE_MISMATCH";
131    /// a `link to <prefix>/` field has a plain or wrong-prefix value.
132    pub const SCHEMA_LINK_PREFIX_MISMATCH: &str = "SCHEMA_LINK_PREFIX_MISMATCH";
133    /// a value isn't in the schema's `enum`.
134    pub const SCHEMA_ENUM_VIOLATION: &str = "SCHEMA_ENUM_VIOLATION";
135    /// a write was attempted on a `### Frozen pages` path (write-time).
136    pub const POLICY_FROZEN_PAGE: &str = "POLICY_FROZEN_PAGE";
137    /// a file with an `### Ignored types` type exists.
138    pub const POLICY_IGNORED_TYPE_PRESENT: &str = "POLICY_IGNORED_TYPE_PRESENT";
139    /// a `wiki-page` derives from an ignored-type record.
140    pub const POLICY_IGNORED_TYPE_DERIVED: &str = "POLICY_IGNORED_TYPE_DERIVED";
141    /// a `log.md` entry header timestamp is unparseable.
142    pub const LOG_BAD_TIMESTAMP: &str = "LOG_BAD_TIMESTAMP";
143    /// a `log.md` entry kind isn't recognized.
144    pub const LOG_UNKNOWN_KIND: &str = "LOG_UNKNOWN_KIND";
145    /// `log.md` entries aren't in non-decreasing time order (possible rewrite).
146    pub const LOG_OUT_OF_ORDER: &str = "LOG_OUT_OF_ORDER";
147    /// a non-empty canonical folder lacks `index.md`.
148    pub const INDEX_MISSING: &str = "INDEX_MISSING";
149    /// an `index.md` lists a file that no longer exists.
150    pub const INDEX_STALE_ENTRY: &str = "INDEX_STALE_ENTRY";
151    /// a file isn't listed in its folder's `index.md`.
152    pub const INDEX_MISSING_ENTRY: &str = "INDEX_MISSING_ENTRY";
153    /// an `index.md` sits in an empty / non-canonical folder.
154    pub const INDEX_ORPHAN: &str = "INDEX_ORPHAN";
155    /// an index's `scope:` doesn't match its filesystem location.
156    pub const INDEX_WRONG_SCOPE: &str = "INDEX_WRONG_SCOPE";
157    /// an index entry's text doesn't match the target file's `summary`.
158    pub const INDEX_SUMMARY_MISMATCH: &str = "INDEX_SUMMARY_MISMATCH";
159    /// a type-folder's `index.jsonl` twin is missing.
160    pub const INDEX_JSONL_MISSING: &str = "INDEX_JSONL_MISSING";
161    /// a file isn't in the `index.jsonl`, or a jsonl record points at a missing
162    /// file.
163    pub const INDEX_JSONL_DESYNC: &str = "INDEX_JSONL_DESYNC";
164    /// a `index.jsonl` record's fields don't match the file's frontmatter.
165    pub const INDEX_JSONL_STALE: &str = "INDEX_JSONL_STALE";
166    /// `tags` isn't a flat YAML list of short scalar labels.
167    pub const TAGS_MALFORMED: &str = "TAGS_MALFORMED";
168}
169
170/// The SPEC's `summary` length bound (chars). Over it → `SUMMARY_TOO_LONG`.
171const MAX_SUMMARY_LEN: usize = 200;
172
173/// Recognized `log.md` entry kinds (SPEC § `log.md`). Anything else →
174/// `LOG_UNKNOWN_KIND` (warning, not error).
175const RECOGNIZED_LOG_KINDS: &[&str] = &[
176    "ingest",
177    "create",
178    "update",
179    "delete",
180    "rename",
181    "link",
182    "validate",
183    "index-rebuild",
184    "contradiction",
185];
186
187// ─────────────────────────────────────────────────────────────────────────────
188//  Public entrypoints
189// ─────────────────────────────────────────────────────────────────────────────
190
191/// **Loop default.** Validate the working set: content files changed since
192/// `since` (default: the last `validate` entry in `log.md`), plus any file whose
193/// wiki-links target a changed/renamed/removed path. Per-file *checks* only —
194/// none of the cross-file global passes (entity-dedup, every-index sync,
195/// `log.md` ordering) that `--all` adds. If the default call finds no logged
196/// changed objects, it falls back to a per-file content sweep so an externally
197/// edited or freshly copied store cannot pass vacuously.
198///
199/// **Cost.** The changed set is read from `log.md` — O(changed): every
200/// `create`/`update`/`ingest`/`rename`/`delete`/`link` entry newer than the
201/// cutoff names an object. Per-file frontmatter + link-doctrine checks then run
202/// over that set plus its incoming linkers — also O(changed). The one part that
203/// is *not* O(changed) is discovering those incoming linkers: a link to a
204/// changed path can live in the body or a typed frontmatter field of any file,
205/// so it is found by a **single** embedded-ripgrep pass over the store
206/// ([`Store::find_links_to_any`]) for the whole changed set at once — one store
207/// scan, flat in the changed-set size. (It was previously a full store read
208/// *per* changed object — `O(changed × store)`; that is the blow-up this path
209/// no longer pays.) The unavoidable single content scan is the same shape as
210/// free-text `dbmd search`; the sidecar `links` projection can't replace it
211/// because it omits body/typed-field edges.
212pub fn validate_working_set(
213    store: &Store,
214    since: Option<DateTime<FixedOffset>>,
215) -> crate::Result<Vec<Issue>> {
216    if !store_marker_present(store) {
217        return Ok(vec![not_a_store_issue(store)]);
218    }
219
220    let cutoff = match since {
221        Some(ts) => Some(ts),
222        None => last_validate_at(store),
223    };
224
225    // 1. Changed objects, straight from the log (O(changed) — never a walk).
226    let changed = changed_objects_since(store, cutoff);
227    if changed.is_empty() && since.is_none() {
228        return validate_content_sweep(store);
229    }
230
231    // 2. Add every file with an incoming wiki-link to a changed/renamed/removed
232    //    path (the linker may now be stale even though it didn't change). The
233    //    incoming-linker scan is `Store::find_links_to_any` — ONE embedded-ripgrep
234    //    pass over the store for the WHOLE changed set (one `.md` walk, one
235    //    presence-only/early-exit scan per file), not one walk per object. This
236    //    is the fix for the `O(changed × store)` blow-up that calling
237    //    `find_links_to` in a loop produced (a full store read per changed
238    //    object); the cost is now a single store scan regardless of how many
239    //    objects changed. A returned self-link is harmlessly deduped by the set
240    //    (the object is already inserted below).
241    let changed_targets: Vec<PathBuf> = changed.iter().cloned().collect();
242    let mut working: BTreeSet<PathBuf> = changed;
243    for linker in store.find_links_to_any(&changed_targets)? {
244        working.insert(linker);
245    }
246
247    let mut issues = Vec::new();
248    for rel in &working {
249        let abs = store.root.join(rel);
250        // A changed path can be a *deletion* — skip files that no longer exist;
251        // the incoming-linker scan above already flagged links into them.
252        if !abs.is_file() {
253            continue;
254        }
255        // `None` basename index: the working-set pass does not build the
256        // store-wide basename map (that is a `--all`-only structure), so a bare
257        // short-form target is reported as plain `WIKI_LINK_SHORT_FORM` and the
258        // `--all` sweep does the ambiguity upgrade.
259        check_content_file(store, rel, &abs, None, &mut issues);
260    }
261    issues.sort_by(issue_order);
262    Ok(issues)
263}
264
265fn validate_content_sweep(store: &Store) -> crate::Result<Vec<Issue>> {
266    let mut issues = Vec::new();
267    for rel in store.walk()? {
268        let abs = store.root.join(&rel);
269        check_content_file(store, &rel, &abs, None, &mut issues);
270    }
271    issues.sort_by(issue_order);
272    Ok(issues)
273}
274
275/// **Full SWEEP (O(store)).** Validate every file, every link, and every index,
276/// adding the cross-file checks that need global state: entity-dedup `DUP_*`,
277/// every-index sync (md + jsonl), and `log.md` ordering. CI / recovery, not the
278/// loop.
279pub fn validate_all(store: &Store) -> crate::Result<Vec<Issue>> {
280    if !store_marker_present(store) {
281        return Ok(vec![not_a_store_issue(store)]);
282    }
283
284    let mut issues = Vec::new();
285
286    // Store-identity file: `DB.md` shape (type / required fields / section
287    // headers). A single root file, checked once in the sweep — not a content
288    // file (it carries no `summary`), so it is not part of `walk_content_files`.
289    check_db_md(store, &mut issues);
290
291    let files = walk_content_files(&store.root);
292
293    // The basename index makes the short-form wiki-link check able to upgrade a
294    // bare-basename target to `WIKI_LINK_AMBIGUOUS` when it matches ≥2 files.
295    // Built once from the already-gathered sweep list (no extra walk); only the
296    // `--all` path has it (the working-set path stays O(changed)).
297    let basenames = build_basename_index(&files);
298
299    // Per-file checks over the whole store.
300    let mut parsed: Vec<(PathBuf, Parsed)> = Vec::new();
301    for rel in &files {
302        let abs = store.root.join(rel);
303        if let Some(p) = check_content_file(store, rel, &abs, Some(&basenames), &mut issues) {
304            parsed.push((rel.clone(), p));
305        }
306    }
307
308    // Cross-file: hard `id` + soft schema-declared `unique:` dedup collisions.
309    check_duplicates(store, &parsed, &mut issues);
310
311    // Cross-file: hierarchical index.md + index.jsonl sync.
312    check_indexes(store, &files, &mut issues);
313
314    // Cross-file: log.md well-formedness + ordering.
315    check_log(store, &mut issues);
316
317    issues.sort_by(issue_order);
318    Ok(issues)
319}
320
321// ─────────────────────────────────────────────────────────────────────────────
322//  Per-file content checks (shared by both scopes)
323// ─────────────────────────────────────────────────────────────────────────────
324
325/// What `validate_all`'s cross-file pass needs from a per-file parse: the
326/// parsed YAML mapping (for dedup keys) and the raw frontmatter text (for
327/// text-based wiki-link extraction). The body and fence-line are consumed
328/// inline during the per-file pass and not carried here.
329struct Parsed {
330    /// The parsed top-level YAML mapping, keyed by string. `None` ⇒ malformed
331    /// YAML (a `FM_MALFORMED_YAML` was already emitted).
332    fm: Option<BTreeMap<String, Value>>,
333    /// The raw frontmatter YAML text (between the fences) — the source for
334    /// text-based wiki-link extraction in dedup.
335    fm_yaml: String,
336}
337
338/// Run every per-file check on one content file, pushing issues. Returns the
339/// parsed file so `validate_all` can reuse it for cross-file checks. Returns
340/// `None` only when the file is unreadable or has no frontmatter block at all
341/// (which for a content file is itself reported).
342fn check_content_file(
343    store: &Store,
344    rel: &Path,
345    abs: &Path,
346    basenames: Option<&BasenameIndex>,
347    issues: &mut Vec<Issue>,
348) -> Option<Parsed> {
349    let text = match std::fs::read_to_string(abs) {
350        Ok(t) => t,
351        Err(_) => return None,
352    };
353
354    let is_content = is_content_file(rel);
355
356    let (fm_yaml, body, fm_end_line) = match split_frontmatter(&text) {
357        Some(split) => split,
358        None => {
359            // No frontmatter at all. For a content file that means there's no
360            // `type:` and no `summary:` — report both the way a parsed-but-empty
361            // file would, so the agent gets the same actionable codes.
362            if is_content {
363                push(
364                    issues,
365                    Severity::Error,
366                    codes::FM_MISSING_TYPE,
367                    rel,
368                    None,
369                    Some("type".into()),
370                    "content file has no frontmatter `type:`".into(),
371                    Some("add a YAML frontmatter block with `type:`".into()),
372                    vec![],
373                );
374                push(
375                    issues,
376                    Severity::Error,
377                    codes::SUMMARY_MISSING,
378                    rel,
379                    None,
380                    Some("summary".into()),
381                    "content file has no `summary`".into(),
382                    Some("run `dbmd fm init`".into()),
383                    vec![],
384                );
385            }
386            return None;
387        }
388    };
389
390    // Parse the YAML block.
391    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
392        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
393        // An empty frontmatter block parses as Null; treat as an empty mapping.
394        Ok(Value::Null) => Some(BTreeMap::new()),
395        Ok(_) => {
396            // A scalar / sequence at the top level isn't a frontmatter mapping.
397            // Anchor to line 1 — the frontmatter block's opening `---`; the whole
398            // block is opaque, so there is no single offending field line.
399            push(
400                issues,
401                Severity::Error,
402                codes::FM_MALFORMED_YAML,
403                rel,
404                Some(1),
405                None,
406                "frontmatter is not a YAML mapping".into(),
407                Some("repair the frontmatter YAML mapping, then rerun `dbmd validate`".into()),
408                vec![],
409            );
410            None
411        }
412        Err(e) => {
413            // Anchor to line 1 (the opening `---`): an unparseable block has no
414            // single offending field line; the agent re-reads the whole block.
415            push(
416                issues,
417                Severity::Error,
418                codes::FM_MALFORMED_YAML,
419                rel,
420                Some(1),
421                None,
422                format!("frontmatter block isn't valid YAML: {e}"),
423                Some("repair the frontmatter YAML block, then rerun `dbmd validate`".into()),
424                vec![],
425            );
426            None
427        }
428    };
429
430    if let Some(map) = &fm {
431        // The detailed frontmatter checks only run when the YAML parsed.
432        check_frontmatter(store, rel, map, &fm_yaml, basenames, issues, is_content);
433    }
434
435    // Wiki-link doctrine checks run on the body of every content file (and
436    // also on index/log meta files, whose entries are wiki-links too).
437    check_body_wiki_links(store, rel, &body, fm_end_line, basenames, issues);
438
439    Some(Parsed { fm, fm_yaml })
440}
441
442/// All frontmatter-level checks for a content file with valid YAML.
443fn check_frontmatter(
444    store: &Store,
445    rel: &Path,
446    fm: &BTreeMap<String, Value>,
447    fm_yaml: &str,
448    basenames: Option<&BasenameIndex>,
449    issues: &mut Vec<Issue>,
450    is_content: bool,
451) {
452    let type_ = fm.get("type").and_then(scalar_string);
453
454    // ── type ────────────────────────────────────────────────────────────────
455    if is_content && type_.is_none() {
456        push(
457            issues,
458            Severity::Error,
459            codes::FM_MISSING_TYPE,
460            rel,
461            fm_key_line_or_top(fm_yaml, "type"),
462            Some("type".into()),
463            "content file has no `type:`".into(),
464            Some("add a `type:` field (e.g. `type: contact`)".into()),
465            vec![],
466        );
467    }
468
469    // ── summary (universal on content files) ──────────────────────────────────
470    if is_content {
471        check_summary(rel, fm, fm_yaml, issues);
472    }
473
474    // ── timestamps: created / updated ─────────────────────────────────────────
475    for (key, missing_code) in [
476        ("created", codes::FM_MISSING_CREATED),
477        ("updated", codes::FM_MISSING_UPDATED),
478    ] {
479        if is_content && !fm.contains_key(key) {
480            push(
481                issues,
482                Severity::Error,
483                missing_code,
484                rel,
485                fm_key_line_or_top(fm_yaml, key),
486                Some(key.into()),
487                format!("content file has no `{key}:` timestamp"),
488                Some(format!(
489                    "set `{key}` to an RFC3339 timestamp, e.g. 2026-05-27T08:00:00-07:00"
490                )),
491                vec![],
492            );
493        } else if let Some(v) = fm.get(key) {
494            if let Some(s) = scalar_string(v) {
495                if !is_iso8601(&s) {
496                    push(
497                        issues,
498                        Severity::Error,
499                        codes::FM_BAD_TIMESTAMP,
500                        rel,
501                        fm_key_line(fm_yaml, key),
502                        Some(key.into()),
503                        format!("`{key}` is not ISO-8601: {s:?}"),
504                        Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
505                        vec![],
506                    );
507                }
508            }
509        }
510    }
511    // ── tags shape ────────────────────────────────────────────────────────────
512    if let Some(tags) = fm.get("tags") {
513        if !is_flat_scalar_list(tags) {
514            push(
515                issues,
516                Severity::Warning,
517                codes::TAGS_MALFORMED,
518                rel,
519                fm_key_line(fm_yaml, "tags"),
520                Some("tags".into()),
521                "`tags` must be a flat YAML list of short scalar labels".into(),
522                Some("use block form: one `- <tag>` per line".into()),
523                vec![],
524            );
525        }
526    }
527
528    // ── inline flow-form wiki-link lists in frontmatter ──────────────────────
529    for key in detect_flow_form_link_lists(fm_yaml) {
530        push(
531            issues,
532            Severity::Error,
533            codes::WIKI_LINK_FLOW_FORM_LIST,
534            rel,
535            fm_key_line(fm_yaml, &key),
536            Some(key.clone()),
537            format!("`{key}` uses inline flow form `[[[a]], [[b]]]`"),
538            Some("use YAML block-sequence form: one `- [[...]]` per line".into()),
539            vec![],
540        );
541    }
542
543    // ── frontmatter wiki-link fields: doctrine + integrity ───────────────────
544    // Skip keys that have an explicit `link to` schema spec — those are checked
545    // (with prefix enforcement) in `check_schema`, and double-reporting the same
546    // link via two paths would be noise.
547    let schema_link_keys: BTreeSet<String> =
548        effective_schema(store, type_.as_deref().unwrap_or(""))
549            .map(|s| {
550                s.fields
551                    .iter()
552                    .filter(|f| f.link_prefix.is_some())
553                    .map(|f| f.name.clone())
554                    .collect()
555            })
556            .unwrap_or_default();
557    for (key, link) in frontmatter_link_fields_text(fm_yaml, 2) {
558        if schema_link_keys.contains(&key) {
559            continue;
560        }
561        check_wiki_link(
562            store,
563            rel,
564            &link,
565            Some(link.line),
566            Some(&key),
567            basenames,
568            issues,
569        );
570    }
571
572    // ── policies: ignored types ──────────────────────────────────────────────
573    if let Some(t) = &type_ {
574        if store.config.ignored_types.iter().any(|it| it == t) {
575            push(
576                issues,
577                Severity::Info,
578                codes::POLICY_IGNORED_TYPE_PRESENT,
579                rel,
580                fm_key_line(fm_yaml, "type"),
581                Some("type".into()),
582                format!("file has ignored type `{t}` (per DB.md ## Policies)"),
583                Some(
584                    "change the `type`, or remove it from DB.md `### Ignored types` if it should be managed"
585                        .into(),
586                ),
587                // The policy source: `DB.md` declares the ignored type.
588                vec![PathBuf::from("DB.md")],
589            );
590        }
591        // A wiki-page deriving from an ignored-type record → warning. The
592        // decision lives in the shared `derived_from_ignored_type` entry point;
593        // this side only supplies the `derived_from` targets (with their line,
594        // which the issue carries) and renders the finding.
595        for link in frontmatter_links_for_key(fm_yaml, "derived_from", 2) {
596            if let Some(hit) =
597                derived_from_ignored_type(store, t, std::iter::once(link.target.as_str()))
598            {
599                push(
600                    issues,
601                    Severity::Warning,
602                    codes::POLICY_IGNORED_TYPE_DERIVED,
603                    rel,
604                    Some(link.line),
605                    Some("derived_from".into()),
606                    format!(
607                        "wiki-page derives from ignored-type record `{}` (type `{}`)",
608                        hit.target, hit.target_type
609                    ),
610                    Some(
611                        "drop this `derived_from` link, or remove the target type from DB.md `### Ignored types`"
612                            .into(),
613                    ),
614                    // The ignored-type source record, plus `DB.md` (the policy
615                    // source that lists the ignored type).
616                    vec![
617                        PathBuf::from(format!("{}.md", hit.target)),
618                        PathBuf::from("DB.md"),
619                    ],
620                );
621            }
622        }
623    }
624
625    // ── schema enforcement: DB.md ## Schemas (the only schema source) ─────────
626    if let Some(t) = &type_ {
627        if let Some(schema) = effective_schema(store, t) {
628            check_schema(store, rel, fm, fm_yaml, &schema, issues);
629        }
630    }
631}
632
633/// `summary` rules: required, non-empty, single-line, ≤ 200 chars.
634fn check_summary(rel: &Path, fm: &BTreeMap<String, Value>, fm_yaml: &str, issues: &mut Vec<Issue>) {
635    let line = fm_key_line(fm_yaml, "summary");
636    match fm.get("summary") {
637        None => push(
638            issues,
639            Severity::Error,
640            codes::SUMMARY_MISSING,
641            rel,
642            // A missing `summary` key has no line of its own → anchor to the
643            // frontmatter block top (line 1), the EXPECTED field-absence rule.
644            fm_key_line_or_top(fm_yaml, "summary"),
645            Some("summary".into()),
646            "content file has no `summary`".into(),
647            Some("run `dbmd fm init`".into()),
648            vec![],
649        ),
650        Some(v) => {
651            let s = scalar_string(v).unwrap_or_default();
652            if s.trim().is_empty() {
653                push(
654                    issues,
655                    Severity::Error,
656                    codes::SUMMARY_EMPTY,
657                    rel,
658                    line,
659                    Some("summary".into()),
660                    "`summary` is present but empty".into(),
661                    Some("write a one-line summary, or run `dbmd fm init`".into()),
662                    vec![],
663                );
664            } else if s.contains('\n') {
665                push(
666                    issues,
667                    Severity::Error,
668                    codes::SUMMARY_MULTILINE,
669                    rel,
670                    line,
671                    Some("summary".into()),
672                    "`summary` must be one line (contains a newline)".into(),
673                    Some("collapse the summary to a single line".into()),
674                    vec![],
675                );
676            } else if s.chars().count() > MAX_SUMMARY_LEN {
677                push(
678                    issues,
679                    Severity::Warning,
680                    codes::SUMMARY_TOO_LONG,
681                    rel,
682                    line,
683                    Some("summary".into()),
684                    format!(
685                        "`summary` is {} chars (> {MAX_SUMMARY_LEN})",
686                        s.chars().count()
687                    ),
688                    Some(format!("trim the summary to ≤ {MAX_SUMMARY_LEN} chars")),
689                    vec![],
690                );
691            }
692        }
693    }
694}
695
696/// Wiki-link checks for a body. Per-link doctrine (`WIKI_LINK_*`).
697fn check_body_wiki_links(
698    store: &Store,
699    rel: &Path,
700    body: &str,
701    fm_end_line: u32,
702    basenames: Option<&BasenameIndex>,
703    issues: &mut Vec<Issue>,
704) {
705    for link in extract_wiki_links(body) {
706        // Body lines are offset past the frontmatter block. `link.line` is
707        // 1-based within `body`; the body starts at `fm_end_line + 1`.
708        let abs_line = fm_end_line + link.line;
709        check_wiki_link(store, rel, &link, Some(abs_line), None, basenames, issues);
710    }
711}
712
713/// A store-wide map from a file's bare basename (its stem, no `.md`) to every
714/// store-relative path carrying that basename. Built once per `validate --all`
715/// sweep so the short-form wiki-link check can distinguish a merely short-form
716/// target (`WIKI_LINK_SHORT_FORM`) from one that is *ambiguous* because the bare
717/// basename matches two or more files (`WIKI_LINK_AMBIGUOUS`, the defensive
718/// code). `None` in the working-set path — that loop is O(changed) and never
719/// walks the store, so it reports the plain short-form error without the scan.
720type BasenameIndex = HashMap<String, Vec<PathBuf>>;
721
722/// Build the [`BasenameIndex`] from the swept file list (already gathered by
723/// `validate_all`; no extra walk).
724fn build_basename_index(files: &[PathBuf]) -> BasenameIndex {
725    let mut idx: BasenameIndex = HashMap::new();
726    for rel in files {
727        if let Some(stem) = rel.file_stem().and_then(|s| s.to_str()) {
728            idx.entry(stem.to_string()).or_default().push(rel.clone());
729        }
730    }
731    idx
732}
733
734/// The shared per-wiki-link doctrine + integrity check used by both body links
735/// and frontmatter link-fields. `basenames` is `Some` only in the `--all`
736/// sweep, where a no-slash short-form target is upgraded to `WIKI_LINK_AMBIGUOUS`
737/// when its bare basename matches ≥2 files.
738fn check_wiki_link(
739    store: &Store,
740    rel: &Path,
741    link: &Link,
742    line: Option<u32>,
743    key: Option<&str>,
744    basenames: Option<&BasenameIndex>,
745    issues: &mut Vec<Issue>,
746) {
747    let bare = link.target.trim_end_matches(".md");
748
749    // Short-form: not a full store-relative path (no `/`, or first segment isn't
750    // a known layer).
751    if !is_full_store_path(bare) {
752        // Ambiguous (defensive) takes precedence over plain short-form when the
753        // target is a bare basename (no `/`) that matches ≥2 files in the store.
754        // Only computable in the sweep (where `basenames` is populated); the
755        // working-set path falls through to the plain short-form error.
756        if !bare.contains('/') {
757            if let Some(idx) = basenames {
758                if let Some(matches) = idx.get(bare) {
759                    if matches.len() >= 2 {
760                        let mut related = matches.clone();
761                        related.sort();
762                        push(
763                            issues,
764                            Severity::Error,
765                            codes::WIKI_LINK_AMBIGUOUS,
766                            rel,
767                            line,
768                            key.map(str::to_string),
769                            format!(
770                                "short-form wiki-link `[[{}]]` matches multiple files",
771                                link.target
772                            ),
773                            Some("use the full store-relative path to disambiguate".into()),
774                            related,
775                        );
776                        return;
777                    }
778                }
779            }
780        }
781        push(
782            issues,
783            Severity::Error,
784            codes::WIKI_LINK_SHORT_FORM,
785            rel,
786            line,
787            key.map(str::to_string),
788            format!(
789                "wiki-link `[[{}]]` is not a full store-relative path",
790                link.target
791            ),
792            short_form_suggestion(bare),
793            vec![],
794        );
795        // Don't also report broken; the agent must fix the form first.
796        return;
797    }
798
799    // `.md` extension → warning, then still check existence.
800    if link.target.ends_with(".md") {
801        push(
802            issues,
803            Severity::Warning,
804            codes::WIKI_LINK_HAS_EXTENSION,
805            rel,
806            line,
807            key.map(str::to_string),
808            format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
809            Some(format!("drop the extension: [[{bare}]]")),
810            vec![],
811        );
812    }
813
814    let Some(target_rel) = safe_md_target_rel(bare) else {
815        push(
816            issues,
817            Severity::Error,
818            codes::WIKI_LINK_BROKEN,
819            rel,
820            line,
821            key.map(str::to_string),
822            format!("wiki-link target `{bare}` is not a safe store-relative path"),
823            Some("use a full store-relative path under sources/, records/, or wiki/".into()),
824            vec![],
825        );
826        return;
827    };
828
829    // Broken: target file doesn't exist (O(1) stat).
830    let target_abs = store.root.join(target_rel);
831    if !target_abs.is_file() {
832        push(
833            issues,
834            Severity::Error,
835            codes::WIKI_LINK_BROKEN,
836            rel,
837            line,
838            key.map(str::to_string),
839            format!("wiki-link target `{bare}` doesn't exist"),
840            Some(format!(
841                "create `{bare}.md`, or point the link at an existing file"
842            )),
843            vec![],
844        );
845    }
846}
847
848// ─────────────────────────────────────────────────────────────────────────────
849//  Schema enforcement (user-declared DB.md ## Schemas — the only source)
850// ─────────────────────────────────────────────────────────────────────────────
851
852/// The effective schema for a type: the store's explicit `DB.md ## Schemas`
853/// block, or `None`. This is the **only** source of schema enforcement — the
854/// toolkit ships no implicit or built-in per-type schema (SPEC § Schemas). A
855/// store that wants its `contact` / `expense` / etc. fields enforced declares
856/// them in `## Schemas`; the example schema pack in SPEC § Example types is a
857/// copy-in starting point.
858fn effective_schema(store: &Store, type_: &str) -> Option<Schema> {
859    store.config.schemas.get(type_).cloned()
860}
861
862/// Validate a file's frontmatter against a schema's [`FieldSpec`]s.
863fn check_schema(
864    store: &Store,
865    rel: &Path,
866    fm: &BTreeMap<String, Value>,
867    fm_yaml: &str,
868    schema: &Schema,
869    issues: &mut Vec<Issue>,
870) {
871    for spec in &schema.fields {
872        let present = fm.get(&spec.name);
873        let line = fm_key_line(fm_yaml, &spec.name);
874
875        // Required.
876        let is_empty = match present {
877            None => true,
878            Some(v) => scalar_string(v)
879                .map(|s| s.trim().is_empty())
880                .unwrap_or(false),
881        };
882        if spec.required && is_empty {
883            push(
884                issues,
885                Severity::Error,
886                codes::SCHEMA_MISSING_REQUIRED,
887                rel,
888                // Absent key → anchor to the frontmatter top (line 1); a
889                // present-but-empty value keeps its own line.
890                fm_key_line_or_top(fm_yaml, &spec.name),
891                Some(spec.name.clone()),
892                format!("required field `{}` is absent or empty", spec.name),
893                Some(format!("set `{}` to a non-empty value", spec.name)),
894                vec![],
895            );
896            continue;
897        }
898        let Some(value) = present else { continue };
899
900        // An OPTIONAL field that is `null` or empty is simply unset — there is
901        // no value to shape/enum/link-check. (The required+empty case already
902        // returned above as `SCHEMA_MISSING_REQUIRED`.) Without this, an
903        // `paid_at: null` on an `invoice` whose schema marks `paid_at (date)`
904        // would wrongly fire `SCHEMA_SHAPE_MISMATCH` against the empty string.
905        let value_empty = value.is_null()
906            || scalar_string(value)
907                .map(|s| s.trim().is_empty())
908                .unwrap_or(false);
909        if !spec.required && value_empty {
910            continue;
911        }
912
913        // link to <prefix>/ — extract the link target(s) from the raw frontmatter
914        // text (unquoted `[[...]]` is a YAML nested-sequence, not a string).
915        if let Some(prefix) = &spec.link_prefix {
916            check_schema_link(store, rel, &spec.name, fm_yaml, prefix, line, issues);
917            continue; // a link field is never also shape/enum-checked
918        }
919
920        // A shape- or enum-constrained field expects a SCALAR. A YAML sequence
921        // or mapping satisfies neither, and would otherwise slip through both
922        // checks (`scalar_string` returns `None` for non-scalars, so the enum
923        // and shape bodies silently no-op). Flag it as a shape mismatch rather
924        // than let a structurally-wrong value validate clean. (Link fields,
925        // which legitimately take block-form sequences, already `continue`d.)
926        if (spec.shape.is_some() || spec.enum_values.is_some()) && scalar_string(value).is_none() {
927            push(
928                issues,
929                Severity::Error,
930                codes::SCHEMA_SHAPE_MISMATCH,
931                rel,
932                line,
933                Some(spec.name.clone()),
934                format!(
935                    "`{}` must be a scalar value, found a list or mapping",
936                    spec.name
937                ),
938                Some(format!("set `{}` to a single scalar value", spec.name)),
939                vec![],
940            );
941            continue;
942        }
943
944        // enum
945        if let Some(allowed) = &spec.enum_values {
946            if let Some(s) = scalar_string(value) {
947                if !allowed.iter().any(|a| a == &s) {
948                    push(
949                        issues,
950                        Severity::Error,
951                        codes::SCHEMA_ENUM_VIOLATION,
952                        rel,
953                        line,
954                        Some(spec.name.clone()),
955                        format!("`{}` value {s:?} not in enum {allowed:?}", spec.name),
956                        Some(format!("use one of: {}", allowed.join(", "))),
957                        vec![],
958                    );
959                }
960            }
961            continue;
962        }
963
964        // shape
965        if let Some(shape) = spec.shape {
966            check_schema_shape(rel, &spec.name, value, shape, line, issues);
967        }
968    }
969}
970
971/// `link to <prefix>/` enforcement: the value must be a wiki-link whose target
972/// starts with `<prefix>`. Reads the link target(s) from the raw frontmatter
973/// text so unquoted `field: [[...]]` (a YAML nested-sequence, not a string) is
974/// recognized exactly like the quoted form.
975fn check_schema_link(
976    store: &Store,
977    rel: &Path,
978    field: &str,
979    fm_yaml: &str,
980    prefix: &Path,
981    line: Option<u32>,
982    issues: &mut Vec<Issue>,
983) {
984    let prefix_str = prefix.to_string_lossy();
985    let prefix_str = prefix_str.trim_end_matches('/');
986    let suggestion = |target_leaf: &str| {
987        Some(format!(
988            "expected `link to {prefix_str}/`; replace with [[{prefix_str}/{target_leaf}]]"
989        ))
990    };
991
992    let links = frontmatter_links_for_key(fm_yaml, field, 2);
993    if links.is_empty() {
994        // No wiki-link in the field's value → it's a plain string.
995        let raw = frontmatter_raw_value_for_key(fm_yaml, field, 2).unwrap_or_default();
996        let raw = raw.trim().trim_matches('"').trim_matches('\'').trim();
997        let leaf = slugish(raw);
998        push(
999            issues,
1000            Severity::Error,
1001            codes::SCHEMA_LINK_PREFIX_MISMATCH,
1002            rel,
1003            line,
1004            Some(field.to_string()),
1005            format!(
1006                "`{field}` is a plain string {raw:?}, expected a wiki-link under `{prefix_str}/`"
1007            ),
1008            suggestion(&leaf),
1009            vec![],
1010        );
1011        return;
1012    }
1013
1014    for link in links {
1015        if link.target.ends_with(".md") {
1016            let bare = link.target.trim_end_matches(".md");
1017            push(
1018                issues,
1019                Severity::Warning,
1020                codes::WIKI_LINK_HAS_EXTENSION,
1021                rel,
1022                Some(link.line),
1023                Some(field.to_string()),
1024                format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
1025                Some(format!("drop the extension: [[{bare}]]")),
1026                vec![],
1027            );
1028        }
1029        let bare = link.target.trim_end_matches(".md");
1030        if !path_under_prefix(bare, prefix_str) {
1031            let leaf = bare.rsplit('/').next().unwrap_or(bare);
1032            push(
1033                issues,
1034                Severity::Error,
1035                codes::SCHEMA_LINK_PREFIX_MISMATCH,
1036                rel,
1037                line,
1038                Some(field.to_string()),
1039                format!("`{field}` target `{bare}` is not under `{prefix_str}/`"),
1040                suggestion(leaf),
1041                vec![],
1042            );
1043        } else {
1044            let Some(target_rel) = safe_md_target_rel(bare) else {
1045                push(
1046                    issues,
1047                    Severity::Error,
1048                    codes::WIKI_LINK_BROKEN,
1049                    rel,
1050                    line,
1051                    Some(field.to_string()),
1052                    format!("wiki-link target `{bare}` is not a safe store-relative path"),
1053                    Some(
1054                        "use a full store-relative path under sources/, records/, or wiki/".into(),
1055                    ),
1056                    vec![],
1057                );
1058                continue;
1059            };
1060            // Correct prefix — still surface a broken target so the agent sees
1061            // one consistent vocabulary.
1062            let target_abs = store.root.join(target_rel);
1063            if !target_abs.is_file() {
1064                push(
1065                    issues,
1066                    Severity::Error,
1067                    codes::WIKI_LINK_BROKEN,
1068                    rel,
1069                    line,
1070                    Some(field.to_string()),
1071                    format!("wiki-link target `{bare}` doesn't exist"),
1072                    Some(format!(
1073                        "create `{bare}.md`, or point the link at an existing file"
1074                    )),
1075                    vec![],
1076                );
1077            }
1078        }
1079    }
1080}
1081
1082/// Shape enforcement for a non-link, non-enum schema field.
1083fn check_schema_shape(
1084    rel: &Path,
1085    field: &str,
1086    value: &Value,
1087    shape: Shape,
1088    line: Option<u32>,
1089    issues: &mut Vec<Issue>,
1090) {
1091    let s = scalar_string(value).unwrap_or_default();
1092    let ok = match shape {
1093        Shape::String => true, // any scalar string
1094        Shape::Int => value.is_i64() || value.is_u64() || s.trim().parse::<i64>().is_ok(),
1095        Shape::Bool => value.is_bool() || matches!(s.trim(), "true" | "false"),
1096        Shape::Date => is_iso8601_date_or_datetime(&s),
1097        Shape::Email => is_email(&s),
1098        Shape::Currency => is_currency(&s),
1099        Shape::Url => is_url(&s),
1100    };
1101    if !ok {
1102        push(
1103            issues,
1104            Severity::Error,
1105            codes::SCHEMA_SHAPE_MISMATCH,
1106            rel,
1107            line,
1108            Some(field.to_string()),
1109            format!("`{field}` value {s:?} doesn't match shape {shape:?}"),
1110            Some(shape_suggestion(shape)),
1111            vec![],
1112        );
1113    }
1114}
1115
1116// ─────────────────────────────────────────────────────────────────────────────
1117//  Cross-file: entity-dedup collisions (validate_all only)
1118// ─────────────────────────────────────────────────────────────────────────────
1119
1120/// Hard `DUP_ID` + the soft, schema-declared `DUP_UNIQUE_KEY` collisions.
1121///
1122/// `DUP_ID` is universal (two files with the same explicit `id`).
1123/// `DUP_UNIQUE_KEY` is driven entirely by the store's `DB.md ## Schemas`: each
1124/// `- unique: <field>[, <field> …]` directive on a `### <type>` declares a
1125/// uniqueness constraint, and two records of that type whose declared values
1126/// collide warn. No type carries a built-in dedup key — the store opts in.
1127///
1128/// **Reporting precedence (rule #1 in `corpus-b-edges/EXPECTED/README.md`):** a
1129/// collision group of N files yields exactly ONE issue, not N. Its `file` is the
1130/// lexicographically smallest store-relative path in the group (a total order →
1131/// deterministic); `related` is the rest, sorted. A single-field key anchors to
1132/// that field's line on the reported file and carries it as `key`; a multi-field
1133/// key anchors to line 1 with a null key.
1134fn check_duplicates(store: &Store, parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
1135    // Path → frontmatter YAML, for resolving the anchor field's line on the
1136    // reported (smallest-path) member.
1137    let fm_yaml_of: HashMap<&PathBuf, &str> = parsed
1138        .iter()
1139        .map(|(rel, p)| (rel, p.fm_yaml.as_str()))
1140        .collect();
1141
1142    // ── DUP_ID (hard error): two files with the same explicit `id`. ──────────
1143    let mut by_id: HashMap<String, Vec<PathBuf>> = HashMap::new();
1144    for (rel, p) in parsed {
1145        if let Some(map) = &p.fm {
1146            if let Some(id) = map.get("id").and_then(scalar_string) {
1147                if !id.trim().is_empty() {
1148                    by_id.entry(id).or_default().push(rel.clone());
1149                }
1150            }
1151        }
1152    }
1153    for (id, files) in &by_id {
1154        if files.len() > 1 {
1155            let (reported, related) = canonical_and_related(files);
1156            let line = fm_yaml_of.get(&reported).and_then(|y| fm_key_line(y, "id"));
1157            push(
1158                issues,
1159                Severity::Error,
1160                codes::DUP_ID,
1161                &reported,
1162                line,
1163                Some("id".into()),
1164                format!("id {id:?} is declared by more than one file"),
1165                Some("give each file a unique `id` (or drop it to derive from the path)".into()),
1166                related,
1167            );
1168        }
1169    }
1170
1171    // ── DUP_UNIQUE_KEY (warning): schema-declared `unique:` collisions. ───────
1172    // Every constraint comes from the store's `## Schemas`; a type with no
1173    // `unique:` directive is never dedup-checked. Iteration over the BTreeMap is
1174    // key-ordered, so emitted issues are deterministic across runs.
1175    for (type_name, schema) in &store.config.schemas {
1176        for key_fields in &schema.unique_keys {
1177            soft_dup(parsed, issues, type_name, key_fields, &fm_yaml_of);
1178        }
1179    }
1180}
1181
1182/// Emit ONE `DUP_UNIQUE_KEY` warning per group of ≥2 files of `type_` whose
1183/// declared `key_fields` render to the same token tuple. Files missing any key
1184/// field are skipped — an incomplete key is never a collision.
1185///
1186/// Per reporting rule #1 the issue is keyed on the lexicographically smallest
1187/// store-relative path; `related` is the rest. A single-field key anchors to
1188/// that field's line on the reported file and carries it as `key`; a multi-field
1189/// key anchors to line 1 with a null key. `fm_yaml_of` resolves the field line.
1190fn soft_dup(
1191    parsed: &[(PathBuf, Parsed)],
1192    issues: &mut Vec<Issue>,
1193    type_: &str,
1194    key_fields: &[String],
1195    fm_yaml_of: &HashMap<&PathBuf, &str>,
1196) {
1197    if key_fields.is_empty() {
1198        return;
1199    }
1200    let mut groups: HashMap<Vec<String>, Vec<PathBuf>> = HashMap::new();
1201    for (rel, p) in parsed {
1202        let is_type =
1203            p.fm.as_ref()
1204                .and_then(|m| m.get("type"))
1205                .and_then(scalar_string)
1206                .map(|t| t == type_)
1207                .unwrap_or(false);
1208        if !is_type {
1209            continue;
1210        }
1211        if let Some(key) = dedup_key(p, key_fields) {
1212            groups.entry(key).or_default().push(rel.clone());
1213        }
1214    }
1215    // HashMap iteration is nondeterministic; sort by reported member so the
1216    // emitted issue order is stable across runs.
1217    let mut collisions: Vec<(PathBuf, Vec<PathBuf>)> = groups
1218        .values()
1219        .filter(|files| files.len() > 1)
1220        .map(|files| canonical_and_related(files))
1221        .collect();
1222    collisions.sort_by(|a, b| a.0.cmp(&b.0));
1223
1224    let fields_disp = key_fields.join(", ");
1225    for (reported, related) in collisions {
1226        // Single-field keys anchor to the field's line + carry the key; multi-
1227        // field keys anchor to line 1 with a null key.
1228        let (line, key) = if key_fields.len() == 1 {
1229            (
1230                fm_yaml_of
1231                    .get(&reported)
1232                    .and_then(|y| fm_key_line(y, &key_fields[0])),
1233                Some(key_fields[0].clone()),
1234            )
1235        } else {
1236            (Some(1), None)
1237        };
1238        let n = related.len();
1239        push(
1240            issues,
1241            Severity::Warning,
1242            codes::DUP_UNIQUE_KEY,
1243            &reported,
1244            line,
1245            key,
1246            format!("`{type_}` unique key ({fields_disp}) collides with {n} other record(s)"),
1247            Some("merge with `dbmd rename`, or cross-link with `dbmd link`".into()),
1248            related,
1249        );
1250    }
1251}
1252
1253/// Render a type's `unique:` key for one file: each field's dedup token in
1254/// order, or `None` if any field is absent/empty (an incomplete key never
1255/// collides).
1256fn dedup_key(p: &Parsed, key_fields: &[String]) -> Option<Vec<String>> {
1257    let mut out = Vec::with_capacity(key_fields.len());
1258    for f in key_fields {
1259        out.push(dedup_token(p, f)?);
1260    }
1261    Some(out)
1262}
1263
1264/// One field's normalized dedup token, or `None` when absent/empty. Wiki-link
1265/// values (single or block-sequence list) reduce to their lower-cased target
1266/// path(s); a list collapses to a sorted, de-duplicated set so item order never
1267/// matters. Plain scalars (and YAML scalar lists) lower-case and trim.
1268fn dedup_token(p: &Parsed, field: &str) -> Option<String> {
1269    // Wiki-links first — read from the raw frontmatter text so the unquoted
1270    // `field: [[...]]` (a YAML nested-sequence, not a string) is handled.
1271    let links = frontmatter_links_for_key(&p.fm_yaml, field, 2);
1272    if !links.is_empty() {
1273        let set: BTreeSet<String> = links
1274            .into_iter()
1275            .map(|l| l.target.trim_end_matches(".md").to_lowercase())
1276            .filter(|t| !t.is_empty())
1277            .collect();
1278        return if set.is_empty() {
1279            None
1280        } else {
1281            Some(set.into_iter().collect::<Vec<_>>().join(","))
1282        };
1283    }
1284    match p.fm.as_ref()?.get(field) {
1285        Some(Value::Sequence(items)) => {
1286            let set: BTreeSet<String> = items
1287                .iter()
1288                .filter_map(scalar_string)
1289                .map(|s| s.trim().to_lowercase())
1290                .filter(|t| !t.is_empty())
1291                .collect();
1292            if set.is_empty() {
1293                None
1294            } else {
1295                Some(set.into_iter().collect::<Vec<_>>().join(","))
1296            }
1297        }
1298        Some(v) => {
1299            let s = scalar_string(v)?.trim().to_lowercase();
1300            if s.is_empty() {
1301                None
1302            } else {
1303                Some(s)
1304            }
1305        }
1306        None => None,
1307    }
1308}
1309
1310/// Split a non-empty collision group into `(reported, related)`: the
1311/// lexicographically smallest store-relative path is the reported member; the
1312/// rest, sorted ascending, are `related`. Deterministic because store-relative
1313/// path is a total order — the property reporting rule #1 relies on.
1314fn canonical_and_related(files: &[PathBuf]) -> (PathBuf, Vec<PathBuf>) {
1315    let mut sorted = files.to_vec();
1316    sorted.sort();
1317    let reported = sorted[0].clone();
1318    let related = sorted[1..].to_vec();
1319    (reported, related)
1320}
1321
1322// ─────────────────────────────────────────────────────────────────────────────
1323//  Cross-file: hierarchical index.md + index.jsonl sync (validate_all only)
1324// ─────────────────────────────────────────────────────────────────────────────
1325
1326/// All `INDEX_*` and `INDEX_JSONL_*` checks across the three canonical levels.
1327fn check_indexes(store: &Store, files: &[PathBuf], issues: &mut Vec<Issue>) {
1328    // Group content files by their immediate parent folder (the type-folder,
1329    // *across date shards* — a sharded file's "type folder" is the folder right
1330    // under the layer). We key on the type-folder so shards roll up correctly.
1331    let mut type_folders: BTreeMap<PathBuf, Vec<PathBuf>> = BTreeMap::new();
1332    let mut layers_present: BTreeSet<&'static str> = BTreeSet::new();
1333    for rel in files {
1334        // The layer is the first path component — recorded independently of the
1335        // type-folder so a layer containing only loose files still requires an
1336        // `index.md`.
1337        if let Some(layer) = rel.iter().next().and_then(|s| s.to_str()) {
1338            match layer {
1339                "sources" => layers_present.insert("sources"),
1340                "records" => layers_present.insert("records"),
1341                "wiki" => layers_present.insert("wiki"),
1342                _ => false,
1343            };
1344        }
1345        if let Some(tf) = type_folder_of(rel) {
1346            type_folders.entry(tf).or_default().push(rel.clone());
1347        }
1348    }
1349
1350    // ── Root index.md ─────────────────────────────────────────────────────────
1351    if !files.is_empty() {
1352        let root_index = store.root.join("index.md");
1353        if !root_index.is_file() {
1354            push(
1355                issues,
1356                Severity::Error,
1357                codes::INDEX_MISSING,
1358                Path::new("index.md"),
1359                None,
1360                None,
1361                "store has files but no root `index.md`".into(),
1362                Some("run `dbmd index rebuild`".into()),
1363                vec![],
1364            );
1365        } else {
1366            check_index_scope(store, Path::new("index.md"), "root", None, issues);
1367        }
1368    }
1369
1370    // ── Layer index.md ────────────────────────────────────────────────────────
1371    for layer in &layers_present {
1372        let layer_index_rel = PathBuf::from(layer).join("index.md");
1373        let abs = store.root.join(&layer_index_rel);
1374        if !abs.is_file() {
1375            push(
1376                issues,
1377                Severity::Error,
1378                codes::INDEX_MISSING,
1379                &layer_index_rel,
1380                None,
1381                None,
1382                format!("layer `{layer}/` has files but no `index.md`"),
1383                Some("run `dbmd index rebuild`".into()),
1384                vec![],
1385            );
1386        } else {
1387            check_index_scope(store, &layer_index_rel, "layer", Some(layer), issues);
1388        }
1389    }
1390
1391    // ── Type-folder index.md + index.jsonl ───────────────────────────────────
1392    for (tf, members) in &type_folders {
1393        let index_md_rel = tf.join("index.md");
1394        let index_md_abs = store.root.join(&index_md_rel);
1395        let index_md_present = index_md_abs.is_file();
1396        if !index_md_present {
1397            // The whole folder index is absent → a single `INDEX_MISSING` keyed
1398            // on the FOLDER (not the would-be `index.md` path). When the index is
1399            // entirely missing we do NOT additionally evaluate per-entry
1400            // completeness or the `index.jsonl` twin: one `INDEX_MISSING` covers
1401            // the folder (precedence rule #4 in `corpus-b-edges/EXPECTED`).
1402            push(
1403                issues,
1404                Severity::Error,
1405                codes::INDEX_MISSING,
1406                tf,
1407                None,
1408                None,
1409                format!("non-empty folder `{}` has no index.md", tf.display()),
1410                Some(format!(
1411                    "run `dbmd index rebuild --folder {}`",
1412                    tf.display()
1413                )),
1414                vec![],
1415            );
1416            continue;
1417        }
1418
1419        check_index_scope(store, &index_md_rel, "type-folder", tf.to_str(), issues);
1420        check_type_folder_index_md(store, tf, &index_md_rel, members, issues);
1421
1422        // index.jsonl twin — must exist and be complete (uncapped). Only checked
1423        // when the `index.md` is present (above): a folder whose entire index is
1424        // missing is one `INDEX_MISSING`, not also an `INDEX_JSONL_MISSING`.
1425        let jsonl_rel = tf.join("index.jsonl");
1426        let jsonl_abs = store.root.join(&jsonl_rel);
1427        if !jsonl_abs.is_file() {
1428            push(
1429                issues,
1430                Severity::Error,
1431                codes::INDEX_JSONL_MISSING,
1432                &jsonl_rel,
1433                None,
1434                None,
1435                format!("type-folder `{}/` has no `index.jsonl` twin", tf.display()),
1436                Some("run `dbmd index rebuild`".into()),
1437                vec![],
1438            );
1439        } else {
1440            check_type_folder_index_jsonl(store, tf, &jsonl_rel, members, issues);
1441        }
1442    }
1443
1444    // ── Orphan index.md: an index file in a folder with no content. ──────────
1445    for rel in walk_index_files(&store.root) {
1446        let parent = rel.parent().unwrap_or(Path::new("")).to_path_buf();
1447        let parent_str = parent.to_string_lossy().to_string();
1448        let is_canonical = parent_str.is_empty() // root
1449            || matches!(parent_str.as_str(), "sources" | "records" | "wiki")
1450            || type_folders.contains_key(&parent);
1451        if !is_canonical {
1452            push(
1453                issues,
1454                Severity::Warning,
1455                codes::INDEX_ORPHAN,
1456                &rel,
1457                None,
1458                None,
1459                format!(
1460                    "`{}` sits in an empty or non-canonical folder",
1461                    rel.display()
1462                ),
1463                Some("remove it, or run `dbmd index rebuild`".into()),
1464                vec![],
1465            );
1466        }
1467    }
1468}
1469
1470/// Check a type-folder `index.md`'s entries against the folder's actual files:
1471/// stale entries (target gone), missing entries (file not listed), and
1472/// summary mismatches.
1473fn check_type_folder_index_md(
1474    store: &Store,
1475    tf: &Path,
1476    index_rel: &Path,
1477    members: &[PathBuf],
1478    issues: &mut Vec<Issue>,
1479) {
1480    let abs = store.root.join(index_rel);
1481    let Ok(text) = std::fs::read_to_string(&abs) else {
1482        return;
1483    };
1484    let entries = parse_index_entries(&text);
1485
1486    let listed: BTreeSet<PathBuf> = entries
1487        .iter()
1488        .map(|e| PathBuf::from(e.target.trim_end_matches(".md")))
1489        .collect();
1490
1491    // Stale entries + summary mismatch.
1492    for entry in &entries {
1493        let bare = entry.target.trim_end_matches(".md");
1494        let Some(target_rel) = safe_md_target_rel(bare) else {
1495            push(
1496                issues,
1497                Severity::Error,
1498                codes::INDEX_STALE_ENTRY,
1499                index_rel,
1500                Some(entry.line),
1501                None,
1502                format!("index entry `[[{bare}]]` is not a safe store-relative path"),
1503                Some("run `dbmd index rebuild`".into()),
1504                vec![],
1505            );
1506            continue;
1507        };
1508        let target_abs = store.root.join(target_rel);
1509        if !target_abs.is_file() {
1510            push(
1511                issues,
1512                Severity::Error,
1513                codes::INDEX_STALE_ENTRY,
1514                index_rel,
1515                Some(entry.line),
1516                None,
1517                format!("index entry `[[{bare}]]` points at a missing file"),
1518                Some("run `dbmd index rebuild`".into()),
1519                // The stale target the entry names (the file that no longer
1520                // exists) — so the agent can locate the dangling reference.
1521                vec![PathBuf::from(format!("{bare}.md"))],
1522            );
1523            continue;
1524        }
1525        // Summary mismatch: the entry text must equal the file's `summary`. A
1526        // bare `- [[path]]` entry (no `— <text>`) when the file HAS a non-empty
1527        // summary is also a mismatch — the SPEC requires every type-folder index
1528        // entry to quote the file's `summary` (`- [[path]] — <summary>`), so a
1529        // missing quote can't validate clean just because there's nothing to
1530        // compare.
1531        if let Some(expected) = read_summary(&target_abs) {
1532            match &entry.summary_text {
1533                Some(text_part) if text_part.trim() != expected.trim() => {
1534                    push(
1535                        issues,
1536                        Severity::Error,
1537                        codes::INDEX_SUMMARY_MISMATCH,
1538                        index_rel,
1539                        Some(entry.line),
1540                        None,
1541                        format!("index entry for `{bare}` text doesn't match the file's `summary`"),
1542                        Some("run `dbmd index rebuild`".into()),
1543                        vec![PathBuf::from(format!("{bare}.md"))],
1544                    );
1545                }
1546                None if !expected.trim().is_empty() => {
1547                    push(
1548                        issues,
1549                        Severity::Error,
1550                        codes::INDEX_SUMMARY_MISMATCH,
1551                        index_rel,
1552                        Some(entry.line),
1553                        None,
1554                        format!("index entry for `{bare}` is missing its summary text (the file has a `summary`)"),
1555                        Some("run `dbmd index rebuild`".into()),
1556                        vec![PathBuf::from(format!("{bare}.md"))],
1557                    );
1558                }
1559                _ => {}
1560            }
1561        }
1562    }
1563
1564    // Missing entries: a member file not listed. Skip the index/log meta files.
1565    // The browse view caps at 500; only flag a missing entry when the folder is
1566    // under the cap (a capped folder legitimately omits older files).
1567    let content_members: Vec<&PathBuf> = members.iter().filter(|m| is_content_file(m)).collect();
1568    if content_members.len() <= 500 {
1569        for m in content_members {
1570            let bare = PathBuf::from(m.to_string_lossy().trim_end_matches(".md").to_string());
1571            if !listed.contains(&bare) {
1572                push(
1573                    issues,
1574                    Severity::Error,
1575                    codes::INDEX_MISSING_ENTRY,
1576                    index_rel,
1577                    None,
1578                    None,
1579                    format!(
1580                        "file `{}` is not listed in its folder's `index.md`",
1581                        m.display()
1582                    ),
1583                    Some("run `dbmd index rebuild`".into()),
1584                    vec![(*m).clone()],
1585                );
1586            }
1587        }
1588    }
1589    let _ = tf;
1590}
1591
1592/// Check a type-folder `index.jsonl` twin: it must list **every** file in the
1593/// folder (uncapped), every record must point at a real file, and each record's
1594/// fields must match the file's frontmatter.
1595fn check_type_folder_index_jsonl(
1596    store: &Store,
1597    tf: &Path,
1598    jsonl_rel: &Path,
1599    members: &[PathBuf],
1600    issues: &mut Vec<Issue>,
1601) {
1602    let abs = store.root.join(jsonl_rel);
1603    let Ok(text) = std::fs::read_to_string(&abs) else {
1604        return;
1605    };
1606
1607    // Parse records (last-write-wins by path), tolerating tombstones/blank lines.
1608    let mut records: BTreeMap<PathBuf, serde_json::Value> = BTreeMap::new();
1609    for (i, line) in text.lines().enumerate() {
1610        let line = line.trim();
1611        if line.is_empty() {
1612            continue;
1613        }
1614        let rec: serde_json::Value = match serde_json::from_str(line) {
1615            Ok(v) => v,
1616            Err(e) => {
1617                push(
1618                    issues,
1619                    Severity::Error,
1620                    codes::INDEX_JSONL_DESYNC,
1621                    jsonl_rel,
1622                    Some((i + 1) as u32),
1623                    None,
1624                    format!("`index.jsonl` line {} is not valid JSON: {e}", i + 1),
1625                    Some("run `dbmd index rebuild`".into()),
1626                    vec![],
1627                );
1628                continue;
1629            }
1630        };
1631        if let Some(path) = rec.get("path").and_then(|v| v.as_str()) {
1632            if !is_safe_store_relative_path(Path::new(path)) {
1633                push(
1634                    issues,
1635                    Severity::Error,
1636                    codes::INDEX_JSONL_DESYNC,
1637                    jsonl_rel,
1638                    Some((i + 1) as u32),
1639                    None,
1640                    format!("`index.jsonl` record path `{path}` is not a safe store-relative path"),
1641                    Some("run `dbmd index rebuild`".into()),
1642                    vec![],
1643                );
1644                continue;
1645            }
1646            records.insert(PathBuf::from(path), rec);
1647        }
1648    }
1649
1650    let member_set: BTreeSet<PathBuf> = members
1651        .iter()
1652        .filter(|m| is_content_file(m))
1653        .cloned()
1654        .collect();
1655
1656    // jsonl record → missing file = desync.
1657    for path in records.keys() {
1658        let target_abs = store.root.join(path);
1659        if !target_abs.is_file() {
1660            push(
1661                issues,
1662                Severity::Error,
1663                codes::INDEX_JSONL_DESYNC,
1664                jsonl_rel,
1665                None,
1666                None,
1667                format!(
1668                    "`index.jsonl` record points at missing file `{}`",
1669                    path.display()
1670                ),
1671                Some("run `dbmd index rebuild`".into()),
1672                vec![],
1673            );
1674        }
1675    }
1676
1677    // file not in jsonl = desync (the jsonl is the complete twin — no cap).
1678    for m in &member_set {
1679        if !records.contains_key(m) {
1680            push(
1681                issues,
1682                Severity::Error,
1683                codes::INDEX_JSONL_DESYNC,
1684                jsonl_rel,
1685                None,
1686                None,
1687                format!(
1688                    "file `{}` is missing from the complete `index.jsonl`",
1689                    m.display()
1690                ),
1691                Some("run `dbmd index rebuild`".into()),
1692                vec![m.clone()],
1693            );
1694        }
1695    }
1696
1697    // Record fields stale vs. frontmatter. SPEC § Validation defines
1698    // `INDEX_JSONL_STALE` as "an `index.jsonl` record's fields don't match the
1699    // file's frontmatter" — ANY field, not just `summary`/`type`. The query and
1700    // search paths read every field straight from these sidecars (`tags`,
1701    // `links`, `created`, `updated`, plus type-specific `email` / `domain` /
1702    // `company` / `amount` / `vendor` …), so a single field left unchecked lets
1703    // a stale value answer queries with data that exists in no `.md` file.
1704    //
1705    // Rather than re-list (and drift from) every projected key, rebuild the
1706    // record the canonical projection would write for this file
1707    // ([`IndexRecord::expected_from_file`], the same path `index rebuild` uses)
1708    // and diff the two as flat JSON maps. Every key the projection emits is
1709    // covered automatically; `path` is the join key and is skipped.
1710    for (path, rec) in &records {
1711        let target_abs = store.root.join(path);
1712        if !target_abs.is_file() {
1713            continue;
1714        }
1715        let Ok(expected) = crate::index::IndexRecord::expected_from_file(&target_abs, path.clone())
1716        else {
1717            continue; // unreadable / unparseable frontmatter is reported elsewhere
1718        };
1719        let Ok(expected_json) = serde_json::to_value(&expected) else {
1720            continue;
1721        };
1722        let (Some(have), Some(want)) = (rec.as_object(), expected_json.as_object()) else {
1723            continue;
1724        };
1725
1726        // Compare the union of keys present on either side; a key the file
1727        // projects but the sidecar omits is just as stale as a wrong value.
1728        let mut mismatched_keys: BTreeSet<&str> = BTreeSet::new();
1729        for key in have.keys().chain(want.keys()) {
1730            if key == "path" {
1731                continue;
1732            }
1733            if have.get(key) != want.get(key) {
1734                mismatched_keys.insert(key);
1735            }
1736        }
1737
1738        if !mismatched_keys.is_empty() {
1739            let keys: Vec<&str> = mismatched_keys.into_iter().collect();
1740            push(
1741                issues,
1742                Severity::Error,
1743                codes::INDEX_JSONL_STALE,
1744                jsonl_rel,
1745                None,
1746                Some(keys.join(",")),
1747                format!(
1748                    "`index.jsonl` record for `{}` is stale ({})",
1749                    path.display(),
1750                    keys.join(", ")
1751                ),
1752                Some("run `dbmd index rebuild`".into()),
1753                vec![path.clone()],
1754            );
1755        }
1756    }
1757    let _ = tf;
1758}
1759
1760/// Check an index's `scope:` frontmatter against its filesystem location.
1761fn check_index_scope(
1762    store: &Store,
1763    index_rel: &Path,
1764    expected_scope: &str,
1765    expected_folder: Option<&str>,
1766    issues: &mut Vec<Issue>,
1767) {
1768    let abs = store.root.join(index_rel);
1769    let Ok(text) = std::fs::read_to_string(&abs) else {
1770        return;
1771    };
1772    let Some((yaml, _, _)) = split_frontmatter(&text) else {
1773        return;
1774    };
1775    let Ok(Value::Mapping(map)) = serde_norway::from_str::<Value>(&yaml) else {
1776        return;
1777    };
1778    let fm = yaml_map_to_btree(&map);
1779
1780    if let Some(scope) = fm.get("scope").and_then(scalar_string) {
1781        // Accept "type-folder" and the SPEC example's looser "folder" alias.
1782        let scope_ok =
1783            scope == expected_scope || (expected_scope == "type-folder" && scope == "folder");
1784        if !scope_ok {
1785            push(
1786                issues,
1787                Severity::Warning,
1788                codes::INDEX_WRONG_SCOPE,
1789                index_rel,
1790                fm_key_line(&yaml, "scope"),
1791                Some("scope".into()),
1792                format!(
1793                    "index `scope: {scope}` doesn't match location (expected `{expected_scope}`)"
1794                ),
1795                Some(format!("set `scope: {expected_scope}`")),
1796                vec![],
1797            );
1798        }
1799    }
1800    // folder: must match for layer/type-folder indexes.
1801    if let Some(expected) = expected_folder {
1802        if let Some(folder) = fm.get("folder").and_then(scalar_string) {
1803            if folder.trim_end_matches('/') != expected.trim_end_matches('/') {
1804                push(
1805                    issues,
1806                    Severity::Warning,
1807                    codes::INDEX_WRONG_SCOPE,
1808                    index_rel,
1809                    fm_key_line(&yaml, "folder"),
1810                    Some("folder".into()),
1811                    format!("index `folder: {folder}` doesn't match location `{expected}`"),
1812                    Some(format!("set `folder: {expected}`")),
1813                    vec![],
1814                );
1815            }
1816        }
1817    }
1818}
1819
1820// ─────────────────────────────────────────────────────────────────────────────
1821//  Cross-file: log.md well-formedness + ordering (validate_all only)
1822// ─────────────────────────────────────────────────────────────────────────────
1823
1824/// `LOG_*` checks: bad timestamps, unknown kinds, out-of-order entries.
1825fn check_log(store: &Store, issues: &mut Vec<Issue>) {
1826    let log_rel = Path::new("log.md");
1827    let abs = store.root.join(log_rel);
1828    let Ok(text) = std::fs::read_to_string(&abs) else {
1829        return;
1830    };
1831
1832    let mut prev: Option<DateTime<FixedOffset>> = None;
1833    for (i, line) in text.lines().enumerate() {
1834        if !line.starts_with("## [") {
1835            continue;
1836        }
1837        let line_no = (i + 1) as u32;
1838        match parse_log_header(line) {
1839            None => push(
1840                issues,
1841                Severity::Error,
1842                codes::LOG_BAD_TIMESTAMP,
1843                log_rel,
1844                Some(line_no),
1845                None,
1846                format!("log entry header has an unparseable timestamp: {line:?}"),
1847                Some("use `## [YYYY-MM-DD HH:MM] <kind> | <object>`".into()),
1848                vec![],
1849            ),
1850            Some((ts, kind, _object)) => {
1851                if !RECOGNIZED_LOG_KINDS.contains(&kind.as_str()) {
1852                    push(
1853                        issues,
1854                        Severity::Warning,
1855                        codes::LOG_UNKNOWN_KIND,
1856                        log_rel,
1857                        Some(line_no),
1858                        None,
1859                        format!("log entry kind `{kind}` is not recognized"),
1860                        Some(format!("use one of: {}", RECOGNIZED_LOG_KINDS.join(", "))),
1861                        vec![],
1862                    );
1863                }
1864                if let Some(p) = prev {
1865                    if ts < p {
1866                        push(
1867                            issues,
1868                            Severity::Warning,
1869                            codes::LOG_OUT_OF_ORDER,
1870                            log_rel,
1871                            Some(line_no),
1872                            None,
1873                            "log entry is older than the entry above it (possible rewrite)".into(),
1874                            Some("append corrective entries; never reorder past ones".into()),
1875                            vec![],
1876                        );
1877                    }
1878                }
1879                prev = Some(ts);
1880            }
1881        }
1882    }
1883}
1884
1885// ─────────────────────────────────────────────────────────────────────────────
1886//  Self-contained primitives (collapse onto sibling modules once they land)
1887// ─────────────────────────────────────────────────────────────────────────────
1888
1889/// A minimal wiki-link found in a body: target, optional display, 1-based line.
1890struct Link {
1891    target: String,
1892    line: u32,
1893}
1894
1895/// True if the store marker (`DB.md`, uppercase) is present at the root. On a
1896/// case-insensitive filesystem `db.md` would also match `DB.md`; we require the
1897/// exact-cased directory entry to be present.
1898fn store_marker_present(store: &Store) -> bool {
1899    let want = store.root.join("DB.md");
1900    if !want.is_file() {
1901        return false;
1902    }
1903    // Reject a case-folded match (`db.md`) on case-insensitive filesystems.
1904    match std::fs::read_dir(&store.root) {
1905        Ok(entries) => entries
1906            .flatten()
1907            .any(|e| e.file_name().to_str() == Some("DB.md")),
1908        Err(_) => true, // can't enumerate; trust the is_file() above
1909    }
1910}
1911
1912/// Validate the store's identity file, `DB.md`: its frontmatter `type:` must be
1913/// `db-md`, it must carry both `scope` and `owner`, and its body may contain
1914/// only the three recognized `##` sections (`Agent instructions`, `Policies`,
1915/// `Schemas`).
1916///
1917/// `DB.md` is not a content file (no `summary`), so it is checked here rather
1918/// than through `check_content_file`. The marker presence is established by the
1919/// caller (`store_marker_present`); a malformed-frontmatter `DB.md` still counts
1920/// as a store (the marker is the filename), so we report its shape rather than
1921/// `NOT_A_STORE`. Issues anchor to `DB.md` as the store-relative path.
1922fn check_db_md(store: &Store, issues: &mut Vec<Issue>) {
1923    let rel = Path::new("DB.md");
1924    let abs = store.root.join("DB.md");
1925    let Ok(text) = std::fs::read_to_string(&abs) else {
1926        return; // marker present but unreadable: nothing more to say.
1927    };
1928
1929    let Some((fm_yaml, body, fm_end_line)) = split_frontmatter(&text) else {
1930        // No frontmatter block at all → it cannot declare `type: db-md` and has
1931        // neither required field. Report the type and both missing fields,
1932        // anchored to line 1 (the would-be opening fence).
1933        push(
1934            issues,
1935            Severity::Error,
1936            codes::DB_MD_BAD_TYPE,
1937            rel,
1938            Some(1),
1939            Some("type".into()),
1940            "DB.md has no frontmatter; it must declare `type: db-md`".into(),
1941            Some("add a `---` frontmatter block with `type: db-md`".into()),
1942            vec![],
1943        );
1944        for field in ["scope", "owner"] {
1945            push(
1946                issues,
1947                Severity::Error,
1948                codes::DB_MD_MISSING_FIELD,
1949                rel,
1950                Some(1),
1951                Some(field.into()),
1952                format!("DB.md frontmatter is missing required field `{field}`"),
1953                Some(format!("add `{field}:` to the DB.md frontmatter")),
1954                vec![],
1955            );
1956        }
1957        return;
1958    };
1959
1960    // Parse the frontmatter mapping. If it doesn't parse, we can still say the
1961    // identity contract is unmet (no provable `type: db-md`, no provable fields).
1962    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
1963        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
1964        Ok(Value::Null) => Some(BTreeMap::new()),
1965        _ => None,
1966    };
1967
1968    match &fm {
1969        Some(map) => {
1970            // ── type: db-md ──────────────────────────────────────────────────
1971            let type_ = map.get("type").and_then(scalar_string);
1972            if type_.as_deref() != Some("db-md") {
1973                let (line, msg) = match &type_ {
1974                    Some(t) => (
1975                        fm_key_line(&fm_yaml, "type"),
1976                        format!("DB.md has `type: {t}`; a store's DB.md must be `type: db-md`"),
1977                    ),
1978                    None => (
1979                        Some(1),
1980                        "DB.md frontmatter has no `type:`; it must be `type: db-md`".to_string(),
1981                    ),
1982                };
1983                push(
1984                    issues,
1985                    Severity::Error,
1986                    codes::DB_MD_BAD_TYPE,
1987                    rel,
1988                    line,
1989                    Some("type".into()),
1990                    msg,
1991                    Some("set `type: db-md` in the DB.md frontmatter".into()),
1992                    vec![],
1993                );
1994            }
1995
1996            // ── required fields: scope + owner ───────────────────────────────
1997            for field in ["scope", "owner"] {
1998                let present = map
1999                    .get(field)
2000                    .and_then(scalar_string)
2001                    .map(|s| !s.trim().is_empty())
2002                    .unwrap_or(false);
2003                if !present {
2004                    push(
2005                        issues,
2006                        Severity::Error,
2007                        codes::DB_MD_MISSING_FIELD,
2008                        rel,
2009                        // A present-but-empty field anchors to its line; a fully
2010                        // absent one to the block top.
2011                        fm_key_line_or_top(&fm_yaml, field),
2012                        Some(field.into()),
2013                        format!("DB.md frontmatter is missing required field `{field}`"),
2014                        Some(format!("add `{field}:` to the DB.md frontmatter")),
2015                        vec![],
2016                    );
2017                }
2018            }
2019        }
2020        None => {
2021            // Unparseable frontmatter: the identity contract is unprovable. Emit
2022            // the type error and both field errors, anchored to the block top.
2023            push(
2024                issues,
2025                Severity::Error,
2026                codes::DB_MD_BAD_TYPE,
2027                rel,
2028                Some(1),
2029                Some("type".into()),
2030                "DB.md frontmatter isn't valid YAML; it must declare `type: db-md`".into(),
2031                Some("fix the DB.md frontmatter and set `type: db-md`".into()),
2032                vec![],
2033            );
2034            for field in ["scope", "owner"] {
2035                push(
2036                    issues,
2037                    Severity::Error,
2038                    codes::DB_MD_MISSING_FIELD,
2039                    rel,
2040                    Some(1),
2041                    Some(field.into()),
2042                    format!("DB.md frontmatter is missing required field `{field}`"),
2043                    Some(format!("add `{field}:` to the DB.md frontmatter")),
2044                    vec![],
2045                );
2046            }
2047        }
2048    }
2049
2050    // ── recognized `##` section headers only ─────────────────────────────────
2051    // The body's H2 headings must be one of the three the toolkit reads; any
2052    // other is a likely typo / misplacement (warning — the parser ignores it,
2053    // so the config is not corrupted, but the operator wrote a section that will
2054    // never be read). H3 sub-headings (Frozen pages, Ignored types, `### <type>`
2055    // schema blocks) live under their H2 and are not flagged here.
2056    for section in crate::parser::extract_sections(&body) {
2057        if section.level != 2 {
2058            continue;
2059        }
2060        let name = section.heading.trim().to_ascii_lowercase();
2061        if matches!(name.as_str(), "agent instructions" | "policies" | "schemas") {
2062            continue;
2063        }
2064        // `Section::line` is 1-based within the body; the body begins at file
2065        // line `fm_end_line + 1`.
2066        let file_line = fm_end_line + section.line;
2067        push(
2068            issues,
2069            Severity::Warning,
2070            codes::DB_MD_UNKNOWN_SECTION,
2071            rel,
2072            Some(file_line),
2073            None,
2074            format!(
2075                "DB.md has an unrecognized `## {}` section",
2076                section.heading.trim()
2077            ),
2078            Some(
2079                "DB.md sections are `## Agent instructions`, `## Policies`, `## Schemas` — \
2080                 remove or rename this heading"
2081                    .into(),
2082            ),
2083            vec![],
2084        );
2085    }
2086}
2087
2088/// The `NOT_A_STORE` issue for a root with no `DB.md`.
2089fn not_a_store_issue(store: &Store) -> Issue {
2090    Issue {
2091        severity: Severity::Error,
2092        code: codes::NOT_A_STORE,
2093        file: store.root.clone(),
2094        line: None,
2095        key: None,
2096        message: format!("{} has no DB.md; not a db.md store", store.root.display()),
2097        suggestion: Some("create a `DB.md` at the store root".into()),
2098        related: vec![],
2099    }
2100}
2101
2102/// True if a store-relative path is a content file: under `sources/`,
2103/// `records/`, or `wiki/` and not an `index.md`/`index.jsonl`/`log.md`.
2104fn is_content_file(rel: &Path) -> bool {
2105    let Some(first) = rel.iter().next().and_then(|s| s.to_str()) else {
2106        return false;
2107    };
2108    if !matches!(first, "sources" | "records" | "wiki") {
2109        return false;
2110    }
2111    let name = rel.file_name().and_then(|s| s.to_str()).unwrap_or("");
2112    // Only the derived catalog twins are meta INSIDE a layer. `DB.md` / `log.md`
2113    // are reserved meta only at the store ROOT, which the `first` layer check
2114    // above already excludes — so a content file named `log.md` / `DB.md` inside
2115    // a layer (e.g. `records/docs/log.md`) is real content, consistent with
2116    // `Store::walk`.
2117    if matches!(name, "index.md" | "index.jsonl") {
2118        return false;
2119    }
2120    name.ends_with(".md")
2121}
2122
2123/// Split a file into `(frontmatter_yaml, body, closing_fence_line)`. The block
2124/// must start at the very first line with `---` and end at the next `---`.
2125/// Returns `None` if there's no leading frontmatter block.
2126fn split_frontmatter(text: &str) -> Option<(String, String, u32)> {
2127    let mut lines = text.lines();
2128    let first = lines.next()?;
2129    if first.trim_end() != "---" {
2130        return None;
2131    }
2132    let mut yaml = String::new();
2133    let mut close_line: Option<u32> = None;
2134    // line 1 is the opening fence; YAML starts at line 2.
2135    let mut current = 1u32;
2136    for line in lines {
2137        current += 1;
2138        if line.trim_end() == "---" {
2139            close_line = Some(current);
2140            break;
2141        }
2142        yaml.push_str(line);
2143        yaml.push('\n');
2144    }
2145    let close_line = close_line?;
2146    // Body = everything after the closing fence.
2147    let body: String = text
2148        .lines()
2149        .skip(close_line as usize)
2150        .collect::<Vec<_>>()
2151        .join("\n");
2152    Some((yaml, body, close_line))
2153}
2154
2155/// Read just the `summary` field of a file, or `None` if absent/unparseable.
2156fn read_summary(abs: &Path) -> Option<String> {
2157    let text = std::fs::read_to_string(abs).ok()?;
2158    let (yaml, _, _) = split_frontmatter(&text)?;
2159    let value: Value = serde_norway::from_str(&yaml).ok()?;
2160    if let Value::Mapping(m) = value {
2161        m.get(Value::String("summary".into()))
2162            .and_then(scalar_string)
2163    } else {
2164        None
2165    }
2166}
2167
2168/// Convert a `serde_norway` mapping into a string-keyed [`BTreeMap`], dropping
2169/// non-string keys (frontmatter keys are always strings).
2170fn yaml_map_to_btree(map: &serde_norway::Mapping) -> BTreeMap<String, Value> {
2171    let mut out = BTreeMap::new();
2172    for (k, v) in map {
2173        if let Value::String(s) = k {
2174            out.insert(s.clone(), v.clone());
2175        }
2176    }
2177    out
2178}
2179
2180/// A scalar YAML value as a string (`String`/`Number`/`Bool`); `None` for
2181/// sequences/mappings/null.
2182fn scalar_string(v: &Value) -> Option<String> {
2183    match v {
2184        Value::String(s) => Some(s.clone()),
2185        Value::Number(n) => Some(n.to_string()),
2186        Value::Bool(b) => Some(b.to_string()),
2187        _ => None,
2188    }
2189}
2190
2191/// True if `tags` is a flat YAML sequence of scalars. A mapping, a scalar, or a
2192/// sequence containing a nested sequence/mapping → false (`TAGS_MALFORMED`).
2193fn is_flat_scalar_list(v: &Value) -> bool {
2194    match v {
2195        Value::Sequence(items) => items.iter().all(|it| scalar_string(it).is_some()),
2196        _ => false,
2197    }
2198}
2199
2200/// Extract every frontmatter wiki-link, returning `(key, Link)` pairs with the
2201/// link's 1-based file line. **Text-based, by necessity:** an unquoted
2202/// `company: [[records/companies/x]]` parses in YAML as a nested *sequence*, not
2203/// a string (because `[[x]]` is YAML flow-list-in-a-list); a quoted
2204/// `"[[...]]"` parses as a string. Scanning the raw frontmatter text catches
2205/// both forms uniformly, the way the link textually appears — the doctrine view.
2206///
2207/// `fm_start_line` is the file line of the first YAML line (file line 2, since
2208/// line 1 is the opening `---`), so the returned `Link::line` is absolute.
2209fn frontmatter_link_fields_text(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, Link)> {
2210    let mut out = Vec::new();
2211    for (key, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2212        for link in links {
2213            out.push((key.clone(), link));
2214        }
2215    }
2216    out
2217}
2218
2219/// The wiki-link targets declared under a single top-level frontmatter key
2220/// (text-based; handles quoted + unquoted forms). Empty if the key is absent or
2221/// carries no `[[...]]`.
2222fn frontmatter_links_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Vec<Link> {
2223    for (k, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2224        if k == key {
2225            return links;
2226        }
2227    }
2228    Vec::new()
2229}
2230
2231/// The raw value text under a single top-level frontmatter key (the remainder of
2232/// the key line plus any indented continuation/sequence lines), trimmed. Used to
2233/// decide whether a `link to` field holds a plain string vs. a wiki-link.
2234fn frontmatter_raw_value_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Option<String> {
2235    for (k, value_text, _links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2236        if k == key {
2237            return Some(value_text);
2238        }
2239    }
2240    None
2241}
2242
2243/// Split a frontmatter YAML block into `(key, raw_value_text, wiki_links)` for
2244/// each top-level key. A top-level key is a line with no leading indentation in
2245/// `name:` form; its value spans the rest of that line plus any deeper-indented
2246/// continuation lines (block scalars, block sequences) until the next top-level
2247/// key. Wiki-links are every `[[...]]` found anywhere in that span, with their
2248/// absolute file line.
2249fn frontmatter_key_blocks(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, String, Vec<Link>)> {
2250    let mut blocks: Vec<(String, String, Vec<Link>)> = Vec::new();
2251    let mut current: Option<(String, String, Vec<Link>)> = None;
2252
2253    for (idx, raw_line) in fm_yaml.lines().enumerate() {
2254        let file_line = fm_start_line + idx as u32;
2255        let indented = raw_line.starts_with(' ') || raw_line.starts_with('\t');
2256        let trimmed = raw_line.trim();
2257
2258        // A new top-level key: no indentation, `name:` prefix, not a list dash or
2259        // comment. (Indented or dash lines belong to the current key's value.)
2260        let new_key = if !indented && !trimmed.starts_with('#') && !trimmed.starts_with('-') {
2261            top_level_key(raw_line)
2262        } else {
2263            None
2264        };
2265
2266        if let Some((key, after)) = new_key {
2267            if let Some(done) = current.take() {
2268                blocks.push(done);
2269            }
2270            let mut links = Vec::new();
2271            collect_line_links(after, file_line, &mut links);
2272            current = Some((key, after.trim().to_string(), links));
2273        } else if let Some((_k, value_text, links)) = current.as_mut() {
2274            // Continuation of the current key's value (indented or dash line).
2275            if !value_text.is_empty() {
2276                value_text.push('\n');
2277            }
2278            value_text.push_str(trimmed);
2279            collect_line_links(raw_line, file_line, links);
2280        }
2281    }
2282    if let Some(done) = current.take() {
2283        blocks.push(done);
2284    }
2285    blocks
2286}
2287
2288/// Parse a top-level frontmatter key line into `(key, value_after_colon)`.
2289/// `None` if the line isn't a `name:` mapping entry.
2290fn top_level_key(line: &str) -> Option<(String, &str)> {
2291    let (key, rest) = line.split_once(':')?;
2292    let key = key.trim();
2293    if key.is_empty()
2294        || !key
2295            .chars()
2296            .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
2297    {
2298        return None;
2299    }
2300    Some((key.to_string(), rest))
2301}
2302
2303/// Append every `[[target]]` / `[[target|display]]` found in `s` to `links`,
2304/// each tagged with `file_line`.
2305fn collect_line_links(s: &str, file_line: u32, links: &mut Vec<Link>) {
2306    let bytes = s.as_bytes();
2307    let mut i = 0;
2308    while i + 1 < bytes.len() {
2309        if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2310            if let Some(close) = s[i + 2..].find("]]") {
2311                let inner = &s[i + 2..i + 2 + close];
2312                // Guard against `[[[` (nested) double-counting: the inner must
2313                // not itself open another `[[`.
2314                let target = inner
2315                    .trim_start_matches('[')
2316                    .split('|')
2317                    .next()
2318                    .unwrap_or(inner)
2319                    .trim()
2320                    .to_string();
2321                if !target.is_empty() {
2322                    links.push(Link {
2323                        target,
2324                        line: file_line,
2325                    });
2326                }
2327                i = i + 2 + close + 2;
2328                continue;
2329            }
2330        }
2331        i += 1;
2332    }
2333}
2334
2335/// Extract every `[[...]]` wiki-link from a body, with 1-based line numbers.
2336/// Skips fenced code blocks (```), so example links in docs don't trip the
2337/// validator.
2338fn extract_wiki_links(body: &str) -> Vec<Link> {
2339    let mut out = Vec::new();
2340    let mut in_fence = false;
2341    for (idx, line) in body.lines().enumerate() {
2342        let trimmed = line.trim_start();
2343        if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
2344            in_fence = !in_fence;
2345            continue;
2346        }
2347        if in_fence {
2348            continue;
2349        }
2350        let line_no = (idx + 1) as u32;
2351        let bytes = line.as_bytes();
2352        let mut i = 0;
2353        while i + 1 < bytes.len() {
2354            if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2355                if let Some(close) = line[i + 2..].find("]]") {
2356                    let inner = &line[i + 2..i + 2 + close];
2357                    let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
2358                    // Skip a triple-bracket `[[[…` opening: the inner content
2359                    // starts with `[`, so this is the rejected flow-form list
2360                    // mis-encoding (`[[[a]], [[b]]]`), not a real wiki-link. A
2361                    // legitimate target never starts with `[`. The frontmatter
2362                    // `WIKI_LINK_FLOW_FORM_LIST` check already owns that error;
2363                    // extracting a bogus body link here would double-report it as
2364                    // a spurious `WIKI_LINK_SHORT_FORM`.
2365                    if !target.is_empty() && !target.starts_with('[') {
2366                        out.push(Link {
2367                            target,
2368                            line: line_no,
2369                        });
2370                    }
2371                    i = i + 2 + close + 2;
2372                    continue;
2373                }
2374            }
2375            i += 1;
2376        }
2377    }
2378    out
2379}
2380
2381/// Detect the frontmatter wiki-link-list mis-encoding: a YAML flow-sequence
2382/// whose items are themselves sequences (`attendees: [[[a]], [[b]]]`). Returns
2383/// the offending keys. The canonical block-sequence form is not flagged.
2384fn detect_flow_form_link_lists(fm_yaml: &str) -> Vec<String> {
2385    let mut out = Vec::new();
2386    for line in fm_yaml.lines() {
2387        let Some((key, rest)) = line.split_once(':') else {
2388            continue;
2389        };
2390        let key = key.trim();
2391        if key.is_empty() || key.starts_with('#') || key.starts_with('-') {
2392            continue;
2393        }
2394        let rest = rest.trim();
2395        // Flow sequence whose first element is itself a `[` (i.e. `[[[`) — a
2396        // nested flow list, which is the wiki-link-list mis-encoding.
2397        if rest.starts_with("[[[") {
2398            out.push(key.to_string());
2399        }
2400    }
2401    out
2402}
2403
2404/// True if a bare target (no `.md`) is a full store-relative path: it contains a
2405/// `/` and its first segment is a known layer.
2406fn is_full_store_path(bare: &str) -> bool {
2407    let mut parts = bare.splitn(2, '/');
2408    let first = parts.next().unwrap_or("");
2409    let has_rest = parts.next().map(|r| !r.is_empty()).unwrap_or(false);
2410    matches!(first, "sources" | "records" | "wiki") && has_rest
2411}
2412
2413/// True if a path contains only normal relative components. Validator inputs
2414/// come from user-authored markdown/JSON sidecars; never let absolute paths,
2415/// platform prefixes, or `..` turn a validation probe into a filesystem escape.
2416fn is_safe_store_relative_path(path: &Path) -> bool {
2417    let mut saw_component = false;
2418    for component in path.components() {
2419        match component {
2420            Component::Normal(_) => saw_component = true,
2421            Component::CurDir => {}
2422            Component::ParentDir | Component::RootDir | Component::Prefix(_) => return false,
2423        }
2424    }
2425    saw_component
2426}
2427
2428fn safe_md_target_rel(bare: &str) -> Option<PathBuf> {
2429    let path = Path::new(bare);
2430    if !is_safe_store_relative_path(path) {
2431        return None;
2432    }
2433    Some(PathBuf::from(format!("{bare}.md")))
2434}
2435
2436/// True if a bare target path is under `prefix` (both `.md`-stripped).
2437fn path_under_prefix(bare: &str, prefix: &str) -> bool {
2438    let prefix = prefix.trim_end_matches('/');
2439    bare == prefix || bare.starts_with(&format!("{prefix}/"))
2440}
2441
2442/// The type-folder for a store-relative content path: `<layer>/<type-folder>`
2443/// (the folder directly under the layer; date-shards roll up to it). `None` for
2444/// files directly in a layer folder or outside the three layers.
2445fn type_folder_of(rel: &Path) -> Option<PathBuf> {
2446    let comps: Vec<&str> = rel.iter().filter_map(|s| s.to_str()).collect();
2447    if comps.len() < 3 {
2448        return None; // need layer/type-folder/file at minimum
2449    }
2450    if !matches!(comps[0], "sources" | "records" | "wiki") {
2451        return None;
2452    }
2453    Some(PathBuf::from(comps[0]).join(comps[1]))
2454}
2455
2456/// **SWEEP.** Walk every `.md` content file under `sources/`/`records/`/`wiki/`,
2457/// returning store-relative paths to be parsed in full. Skips hidden dirs,
2458/// `log/`, and the index twin (`index.jsonl`). Used only by `validate_all`; the
2459/// working-set incoming-linker scan rides the embedded-ripgrep
2460/// `Store::find_links_to_any` (a single presence-only pass), so the loop default
2461/// never walks-and-*parses* the whole content tree.
2462fn walk_content_files(root: &Path) -> Vec<PathBuf> {
2463    let mut out = Vec::new();
2464    for layer in ["sources", "records", "wiki"] {
2465        let base = root.join(layer);
2466        if !base.is_dir() {
2467            continue;
2468        }
2469        for entry in walkdir::WalkDir::new(&base)
2470            .into_iter()
2471            .filter_entry(|e| {
2472                let name = e.file_name().to_str().unwrap_or("");
2473                !name.starts_with('.') && name != "log"
2474            })
2475            .flatten()
2476        {
2477            if !entry.file_type().is_file() {
2478                continue;
2479            }
2480            let name = entry.file_name().to_str().unwrap_or("");
2481            if name.ends_with(".md") && name != "index.md" {
2482                if let Ok(rel) = entry.path().strip_prefix(root) {
2483                    out.push(rel.to_path_buf());
2484                }
2485            }
2486        }
2487    }
2488    out.sort();
2489    out
2490}
2491
2492/// Every `index.md` under the store (root + layers + type-folders), as
2493/// store-relative paths. Used to detect orphan indexes.
2494fn walk_index_files(root: &Path) -> Vec<PathBuf> {
2495    let mut out = Vec::new();
2496    if root.join("index.md").is_file() {
2497        out.push(PathBuf::from("index.md"));
2498    }
2499    for layer in ["sources", "records", "wiki"] {
2500        let base = root.join(layer);
2501        if !base.is_dir() {
2502            continue;
2503        }
2504        for entry in walkdir::WalkDir::new(&base)
2505            .into_iter()
2506            .filter_entry(|e| {
2507                let name = e.file_name().to_str().unwrap_or("");
2508                !name.starts_with('.') && name != "log"
2509            })
2510            .flatten()
2511        {
2512            if entry.file_type().is_file() && entry.file_name().to_str() == Some("index.md") {
2513                if let Ok(rel) = entry.path().strip_prefix(root) {
2514                    out.push(rel.to_path_buf());
2515                }
2516            }
2517        }
2518    }
2519    out.sort();
2520    out
2521}
2522
2523/// A parsed `index.md` entry line: the wiki-link target, the optional summary
2524/// text after the `—`, and the 1-based line number.
2525struct IndexEntry {
2526    target: String,
2527    summary_text: Option<String>,
2528    line: u32,
2529}
2530
2531/// Parse the `- [[<path>]] — <summary>` entry lines of an `index.md`. Stops at a
2532/// `## More` footer (those lines aren't file entries). Root/layer entries with a
2533/// `|display` segment and a `(N)` count are parsed too — the target is the bare
2534/// path, the summary text is whatever follows the em dash.
2535fn parse_index_entries(text: &str) -> Vec<IndexEntry> {
2536    let mut out = Vec::new();
2537    let mut in_more = false;
2538    for (idx, line) in text.lines().enumerate() {
2539        let trimmed = line.trim_start();
2540        if trimmed.starts_with("## More") {
2541            in_more = true;
2542            continue;
2543        }
2544        if in_more {
2545            continue;
2546        }
2547        if !trimmed.starts_with("- ") {
2548            continue;
2549        }
2550        // Find the first `[[...]]`.
2551        let Some(open) = trimmed.find("[[") else {
2552            continue;
2553        };
2554        let Some(close_rel) = trimmed[open + 2..].find("]]") else {
2555            continue;
2556        };
2557        let inner = &trimmed[open + 2..open + 2 + close_rel];
2558        let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
2559
2560        // Summary text: whatever follows the first em dash (`—`) or ` - `.
2561        let after = &trimmed[open + 2 + close_rel + 2..];
2562        let summary_text = extract_index_entry_summary(after);
2563
2564        out.push(IndexEntry {
2565            target,
2566            summary_text,
2567            line: (idx + 1) as u32,
2568        });
2569    }
2570    out
2571}
2572
2573/// Pull the summary portion out of the text trailing an index entry's
2574/// wiki-link: drop a leading `(N files)` count, then the `—`/`-` separator, then
2575/// strip a trailing `· #tag` suffix.
2576fn extract_index_entry_summary(after: &str) -> Option<String> {
2577    let mut s = after.trim();
2578    // Drop a leading "(N ...)" count segment, if present.
2579    if s.starts_with('(') {
2580        if let Some(close) = s.find(')') {
2581            s = s[close + 1..].trim_start();
2582        }
2583    }
2584    // Require an em dash or hyphen separator before the summary.
2585    let s = if let Some(rest) = s.strip_prefix('—') {
2586        rest.trim()
2587    } else if let Some(rest) = s.strip_prefix('-') {
2588        rest.trim()
2589    } else {
2590        return None;
2591    };
2592    if s.is_empty() {
2593        return None;
2594    }
2595    // Strip a trailing `  ·  #tag #tag` suffix.
2596    let s = match s.split_once(" · ") {
2597        Some((summary, _tags)) => summary.trim(),
2598        None => s,
2599    };
2600    Some(s.to_string())
2601}
2602
2603/// Parse a `log.md` entry header `## [YYYY-MM-DD HH:MM] <kind> | <object>`.
2604/// Returns `(timestamp, kind, object)`; `None` if the timestamp is unparseable
2605/// or the header isn't well-formed.
2606fn parse_log_header(line: &str) -> Option<(DateTime<FixedOffset>, String, Option<String>)> {
2607    let rest = line.strip_prefix("## [")?;
2608    let close = rest.find(']')?;
2609    let ts_str = &rest[..close];
2610    let tail = rest[close + 1..].trim();
2611
2612    // Parse `YYYY-MM-DD HH:MM` (the SPEC header form) as a naive local time and
2613    // attach a zero offset — the log header carries minute precision, no zone.
2614    let naive = NaiveDateTime::parse_from_str(ts_str.trim(), "%Y-%m-%d %H:%M").ok()?;
2615    let offset = FixedOffset::east_opt(0)?;
2616    let ts = naive.and_local_timezone(offset).single()?;
2617
2618    // kind | object
2619    let (kind, object) = match tail.split_once('|') {
2620        Some((k, o)) => {
2621            let o = o.trim();
2622            (
2623                k.trim().to_string(),
2624                if o.is_empty() {
2625                    None
2626                } else {
2627                    Some(o.to_string())
2628                },
2629            )
2630        }
2631        None => (tail.to_string(), None),
2632    };
2633    if kind.is_empty() {
2634        return None;
2635    }
2636    Some((ts, kind, object))
2637}
2638
2639/// The timestamp of the most recent `validate` entry across `log.md` (active)
2640/// — the default working-set cutoff. Reads only headers; never the whole store.
2641fn last_validate_at(store: &Store) -> Option<DateTime<FixedOffset>> {
2642    let text = std::fs::read_to_string(store.root.join("log.md")).ok()?;
2643    let mut latest: Option<DateTime<FixedOffset>> = None;
2644    for line in text.lines() {
2645        if !line.starts_with("## [") {
2646            continue;
2647        }
2648        if let Some((ts, kind, _)) = parse_log_header(line) {
2649            if kind == "validate" {
2650                latest = Some(match latest {
2651                    Some(p) if p >= ts => p,
2652                    _ => ts,
2653                });
2654            }
2655        }
2656    }
2657    latest
2658}
2659
2660/// The set of content objects changed since `cutoff`, read from `log.md`
2661/// entries whose kind mutates a file. When `cutoff` is `None`, every mutating
2662/// entry counts (no prior validate window). Returns store-relative `.md` paths.
2663fn changed_objects_since(
2664    store: &Store,
2665    cutoff: Option<DateTime<FixedOffset>>,
2666) -> BTreeSet<PathBuf> {
2667    let mut out = BTreeSet::new();
2668    let Ok(text) = std::fs::read_to_string(store.root.join("log.md")) else {
2669        return out;
2670    };
2671    for line in text.lines() {
2672        if !line.starts_with("## [") {
2673            continue;
2674        }
2675        let Some((ts, kind, object)) = parse_log_header(line) else {
2676            continue;
2677        };
2678        if let Some(c) = cutoff {
2679            if ts < c {
2680                continue;
2681            }
2682        }
2683        if !matches!(
2684            kind.as_str(),
2685            "create" | "update" | "ingest" | "rename" | "delete" | "link"
2686        ) {
2687            continue;
2688        }
2689        if let Some(obj) = object {
2690            // The object slot is a store-relative path (or a wiki-link target).
2691            let bare = obj
2692                .trim()
2693                .trim_start_matches("[[")
2694                .trim_end_matches("]]")
2695                .split('|')
2696                .next()
2697                .unwrap_or("")
2698                .trim()
2699                .trim_end_matches(".md")
2700                .to_string();
2701            if bare.is_empty() {
2702                continue;
2703            }
2704            out.insert(PathBuf::from(format!("{bare}.md")));
2705        }
2706    }
2707    out
2708}
2709
2710/// The result of the [`derived_from_ignored_type`] policy check: the
2711/// `derived_from` target that resolves to an ignored-type record, plus that
2712/// record's type. Carries exactly what both the validate finding and the
2713/// write-time warning need to render their message.
2714#[derive(Debug, Clone, PartialEq, Eq)]
2715pub struct DerivedFromIgnored {
2716    /// The `derived_from` wiki-link target as written (bare store-relative path,
2717    /// no `.md`).
2718    pub target: String,
2719    /// The resolved `type` of that target, which is present in
2720    /// `store.config.ignored_types`.
2721    pub target_type: String,
2722}
2723
2724/// **The single authoritative `### Ignored types` derivation check.** Decides
2725/// whether a `wiki-page` derives from an ignored-type record: the type must be
2726/// `wiki-page`, `### Ignored types` must be non-empty, and some `derived_from`
2727/// target must resolve to a record whose `type` is in `ignored_types`. Returns
2728/// the first such target (and its type), or `None`.
2729///
2730/// Both surfaces call this so the policy lives in exactly one place:
2731/// [`check_content_file`] (read side — `dbmd validate`) feeds it the
2732/// `derived_from` targets it scanned from the raw frontmatter, and the write
2733/// surface (`dbmd write`) feeds it the targets from the composed frontmatter.
2734/// The link *extraction* differs per surface (text-scan with line numbers vs.
2735/// the parsed `Frontmatter`); the *decision* — type gate, target-type
2736/// resolution, and `ignored_types` membership — does not.
2737pub fn derived_from_ignored_type<I, S>(
2738    store: &Store,
2739    type_: &str,
2740    derived_from_targets: I,
2741) -> Option<DerivedFromIgnored>
2742where
2743    I: IntoIterator<Item = S>,
2744    S: AsRef<str>,
2745{
2746    if type_ != "wiki-page" || store.config.ignored_types.is_empty() {
2747        return None;
2748    }
2749    for target in derived_from_targets {
2750        let target = target.as_ref();
2751        if let Some(target_type) = link_target_type(store, target) {
2752            if store.config.ignored_types.contains(&target_type) {
2753                return Some(DerivedFromIgnored {
2754                    target: target.to_string(),
2755                    target_type,
2756                });
2757            }
2758        }
2759    }
2760    None
2761}
2762
2763/// Resolve the `type` of a wiki-link target file (bare, no `.md`), or `None`.
2764fn link_target_type(store: &Store, target: &str) -> Option<String> {
2765    let bare = target.trim_end_matches(".md");
2766    let abs = store.root.join(safe_md_target_rel(bare)?);
2767    let text = std::fs::read_to_string(&abs).ok()?;
2768    let (yaml, _, _) = split_frontmatter(&text)?;
2769    let value: Value = serde_norway::from_str(&yaml).ok()?;
2770    if let Value::Mapping(m) = value {
2771        m.get(Value::String("type".into())).and_then(scalar_string)
2772    } else {
2773        None
2774    }
2775}
2776
2777// ── Shape validators ─────────────────────────────────────────────────────────
2778
2779/// True if a string is RFC3339 / ISO-8601 with a time + zone (the
2780/// `created`/`updated` contract: `2026-05-27T08:00:00-07:00`).
2781fn is_iso8601(s: &str) -> bool {
2782    DateTime::parse_from_rfc3339(s.trim()).is_ok()
2783}
2784
2785/// True if a string is an ISO-8601 *date* (`2026-05-27`) or a full RFC3339
2786/// datetime. Type-specific date fields (`expense.date`, `contact.last_touch`)
2787/// accept the date-only form per the SPEC's worked example.
2788fn is_iso8601_date_or_datetime(s: &str) -> bool {
2789    let s = s.trim();
2790    if DateTime::parse_from_rfc3339(s).is_ok() {
2791        return true;
2792    }
2793    chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_ok()
2794}
2795
2796/// True for `<local>@<domain>` with a non-empty local part and a dotted domain.
2797fn is_email(s: &str) -> bool {
2798    let s = s.trim();
2799    let Some((local, domain)) = s.split_once('@') else {
2800        return false;
2801    };
2802    !local.is_empty()
2803        && domain.contains('.')
2804        && !domain.starts_with('.')
2805        && !domain.ends_with('.')
2806        && !domain.contains(' ')
2807        && !local.contains(' ')
2808}
2809
2810/// True for a currency amount: an optional symbol or 3-letter ISO code, then a
2811/// plain decimal number with optional thousands separators and ≤ 2 decimals.
2812///
2813/// The numeric part is validated by hand (not `f64::parse`) so the non-numeric
2814/// floats `f64` accepts — `inf`, `-inf`, `NaN`, and `1e3`-style exponents — are
2815/// rejected, and the ≤ 2-decimal rule is actually enforced.
2816fn is_currency(s: &str) -> bool {
2817    let mut t = s.trim();
2818    // Strip a leading currency symbol …
2819    for sym in ["$", "€", "£", "¥"] {
2820        if let Some(rest) = t.strip_prefix(sym) {
2821            t = rest.trim_start();
2822            break;
2823        }
2824    }
2825    // … or a leading 3-letter ISO-4217-ish code (`USD 100`, `EUR 9.50`). The
2826    // code must be exactly three ASCII letters and separated from the number by
2827    // whitespace, so a bare `USD` with no amount still fails.
2828    if let Some((head, rest)) = t.split_once(char::is_whitespace) {
2829        if head.len() == 3 && head.chars().all(|c| c.is_ascii_alphabetic()) {
2830            t = rest.trim_start();
2831        }
2832    }
2833
2834    let cleaned: String = t.chars().filter(|c| *c != ',').collect();
2835    is_plain_amount(cleaned.trim())
2836}
2837
2838/// True for a bare decimal amount: optional sign, ≥ 1 digit, an optional
2839/// fractional part of 1–2 digits. No exponents, no `inf`/`NaN`, no empty string.
2840fn is_plain_amount(s: &str) -> bool {
2841    let digits = s.strip_prefix(['+', '-']).unwrap_or(s);
2842    let (int_part, frac_part) = match digits.split_once('.') {
2843        Some((i, f)) => (i, Some(f)),
2844        None => (digits, None),
2845    };
2846    if int_part.is_empty() || !int_part.bytes().all(|b| b.is_ascii_digit()) {
2847        return false;
2848    }
2849    match frac_part {
2850        None => true,
2851        Some(f) => (1..=2).contains(&f.len()) && f.bytes().all(|b| b.is_ascii_digit()),
2852    }
2853}
2854
2855/// True for an http(s) URL.
2856fn is_url(s: &str) -> bool {
2857    let s = s.trim();
2858    (s.starts_with("http://") || s.starts_with("https://")) && s.len() > "https://".len()
2859}
2860
2861/// A short, deterministic suggestion for a `SCHEMA_SHAPE_MISMATCH`.
2862fn shape_suggestion(shape: Shape) -> String {
2863    match shape {
2864        Shape::String => "use a scalar string".into(),
2865        Shape::Int => "use an integer".into(),
2866        Shape::Bool => "use `true` or `false`".into(),
2867        Shape::Date => "use an ISO-8601 date, e.g. 2026-05-27".into(),
2868        Shape::Email => "use a `<local>@<domain>` address".into(),
2869        Shape::Currency => "use a numeric amount, e.g. 1234.56".into(),
2870        Shape::Url => "use an http(s) URL".into(),
2871    }
2872}
2873
2874/// Suggest a full-path rewrite for a short-form wiki-link. Without the layer we
2875/// can't know the folder, so the suggestion is generic but actionable.
2876fn short_form_suggestion(bare: &str) -> Option<String> {
2877    Some(format!(
2878        "use a full store-relative path, e.g. [[records/contacts/{}]]",
2879        slugish(bare)
2880    ))
2881}
2882
2883/// A filesystem-ish leaf for a plain string (lowercase, spaces → hyphens).
2884fn slugish(s: &str) -> String {
2885    s.trim()
2886        .to_lowercase()
2887        .chars()
2888        .map(|c| if c.is_whitespace() { '-' } else { c })
2889        .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '/' || *c == '_')
2890        .collect()
2891}
2892
2893/// Push a fully-formed [`Issue`].
2894#[allow(clippy::too_many_arguments)]
2895fn push(
2896    issues: &mut Vec<Issue>,
2897    severity: Severity,
2898    code: &'static str,
2899    file: &Path,
2900    line: Option<u32>,
2901    key: Option<String>,
2902    message: String,
2903    suggestion: Option<String>,
2904    related: Vec<PathBuf>,
2905) {
2906    issues.push(Issue {
2907        severity,
2908        code,
2909        file: file.to_path_buf(),
2910        line,
2911        key,
2912        message,
2913        suggestion,
2914        related,
2915    });
2916}
2917
2918/// 1-based line of a top-level frontmatter key inside the YAML block, offset to
2919/// the file (the YAML starts at file line 2). `None` if not found.
2920fn fm_key_line(fm_yaml: &str, key: &str) -> Option<u32> {
2921    for (i, line) in fm_yaml.lines().enumerate() {
2922        let trimmed = line.trim_start();
2923        // A top-level key line: `key:` with no leading list dash.
2924        if let Some(rest) = trimmed.strip_prefix(key) {
2925            if rest.starts_with(':') && line.starts_with(key) {
2926                // +2: file line 1 is the opening `---`, YAML line 0 → file line 2.
2927                return Some((i as u32) + 2);
2928            }
2929        }
2930    }
2931    None
2932}
2933
2934/// The line a *field-absence* issue (a required key that is missing entirely)
2935/// anchors to: the key's line when present, else line `1` — the frontmatter
2936/// block's opening `---`. A missing key has no line of its own; anchoring it to
2937/// the block top gives the agent (and the `EXPECTED` golden) a stable, non-null
2938/// line to point at instead of an unhelpful `null`.
2939fn fm_key_line_or_top(fm_yaml: &str, key: &str) -> Option<u32> {
2940    fm_key_line(fm_yaml, key).or(Some(1))
2941}
2942
2943/// A stable sort order for issues: by file, then line, then code. Keeps `--json`
2944/// output deterministic across runs.
2945fn issue_order(a: &Issue, b: &Issue) -> std::cmp::Ordering {
2946    a.file
2947        .cmp(&b.file)
2948        .then(a.line.cmp(&b.line))
2949        .then(a.code.cmp(b.code))
2950        .then(a.key.cmp(&b.key))
2951}
2952
2953// ═════════════════════════════════════════════════════════════════════════════
2954//  Tests
2955// ═════════════════════════════════════════════════════════════════════════════
2956
2957#[cfg(test)]
2958mod tests {
2959    use super::*;
2960    use crate::parser::{Config, FieldSpec};
2961    use std::fs;
2962    use tempfile::TempDir;
2963
2964    /// A test store builder over a real tempdir. Every helper writes real files
2965    /// so the assertions exercise real behavior, not mocks.
2966    struct Fixture {
2967        dir: TempDir,
2968        config: Config,
2969    }
2970
2971    impl Fixture {
2972        /// A fresh store with a **valid** `DB.md` (the identity contract:
2973        /// `type: db-md` + `scope` + `owner`) and the three layer dirs. A valid
2974        /// DB.md keeps `check_db_md` silent so a "clean store" fixture is truly
2975        /// clean; tests that want a broken DB.md write their own via `write`.
2976        fn new() -> Self {
2977            let dir = TempDir::new().unwrap();
2978            fs::write(
2979                dir.path().join("DB.md"),
2980                "---\ntype: db-md\nscope: company\nowner: Test\n---\n",
2981            )
2982            .unwrap();
2983            for layer in ["sources", "records", "wiki"] {
2984                fs::create_dir_all(dir.path().join(layer)).unwrap();
2985            }
2986            Fixture {
2987                dir,
2988                config: Config::default(),
2989            }
2990        }
2991
2992        /// A store with no `DB.md` marker.
2993        fn bare() -> Self {
2994            let dir = TempDir::new().unwrap();
2995            Fixture {
2996                dir,
2997                config: Config::default(),
2998            }
2999        }
3000
3001        /// Write a file at a store-relative path, creating parent dirs.
3002        fn write(&self, rel: &str, contents: &str) {
3003            let abs = self.dir.path().join(rel);
3004            fs::create_dir_all(abs.parent().unwrap()).unwrap();
3005            fs::write(abs, contents).unwrap();
3006        }
3007
3008        fn store(&self) -> Store {
3009            Store {
3010                root: self.dir.path().to_path_buf(),
3011                config: self.config.clone(),
3012            }
3013        }
3014
3015        fn store_all(&self) -> Vec<Issue> {
3016            validate_all(&self.store()).unwrap()
3017        }
3018
3019        /// Write the canonical `index.md` + `index.jsonl` at every level via the
3020        /// real builder ([`crate::index::Index::rebuild_all`]) — the same
3021        /// projection a `dbmd index rebuild` produces. Use this (rather than a
3022        /// hand-typed sidecar line) whenever a test asserts a *clean* store, so
3023        /// the sidecar carries the COMPLETE per-field projection and the fixture
3024        /// can't silently drift from what the index writer emits.
3025        fn rebuild_indexes(&self) {
3026            crate::index::Index::rebuild_all(&self.store()).unwrap();
3027        }
3028    }
3029
3030    /// True if any issue has this code.
3031    fn has(issues: &[Issue], code: &str) -> bool {
3032        issues.iter().any(|i| i.code == code)
3033    }
3034
3035    /// Count issues with a code.
3036    fn count(issues: &[Issue], code: &str) -> usize {
3037        issues.iter().filter(|i| i.code == code).count()
3038    }
3039
3040    /// The first issue with a code, or panic.
3041    fn find<'a>(issues: &'a [Issue], code: &str) -> &'a Issue {
3042        issues
3043            .iter()
3044            .find(|i| i.code == code)
3045            .unwrap_or_else(|| panic!("expected an issue with code {code}; got {issues:#?}"))
3046    }
3047
3048    /// A minimal valid `contact` body for reuse.
3049    fn valid_contact(summary: &str) -> String {
3050        format!(
3051            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{summary}\"\nname: A\n---\n\n# A\n"
3052        )
3053    }
3054
3055    // ── store marker ──────────────────────────────────────────────────────────
3056
3057    #[test]
3058    fn not_a_store_when_db_md_absent() {
3059        let fx = Fixture::bare();
3060        let issues = fx.store_all();
3061        assert_eq!(issues.len(), 1, "only NOT_A_STORE expected: {issues:#?}");
3062        assert_eq!(issues[0].code, codes::NOT_A_STORE);
3063        assert!(issues[0].is_error());
3064    }
3065
3066    #[test]
3067    fn working_set_also_reports_not_a_store() {
3068        let fx = Fixture::bare();
3069        let issues = validate_working_set(&fx.store(), None).unwrap();
3070        assert!(has(&issues, codes::NOT_A_STORE));
3071    }
3072
3073    #[test]
3074    fn clean_store_has_no_issues() {
3075        let fx = Fixture::new();
3076        fx.write("records/contacts/a.md", &valid_contact("A contact"));
3077        // Build the canonical indexes (complete per-field jsonl included) the
3078        // same way `dbmd index rebuild` does, so a freshly-rebuilt store is
3079        // proven clean across every projected field, not just summary/type.
3080        fx.rebuild_indexes();
3081        let issues = fx.store_all();
3082        assert!(
3083            issues.is_empty(),
3084            "expected a clean store, got: {issues:#?}"
3085        );
3086    }
3087
3088    // ── DB.md structure ───────────────────────────────────────────────────────
3089
3090    /// The `Fixture::new` DB.md is valid → no `DB_MD_*` issue. This pins the
3091    /// "valid identity file is silent" half (a bug that flagged a valid DB.md
3092    /// would fail here).
3093    #[test]
3094    fn valid_db_md_emits_no_structure_issue() {
3095        let fx = Fixture::new();
3096        let issues = fx.store_all();
3097        assert!(
3098            !has(&issues, codes::DB_MD_BAD_TYPE)
3099                && !has(&issues, codes::DB_MD_MISSING_FIELD)
3100                && !has(&issues, codes::DB_MD_UNKNOWN_SECTION),
3101            "a valid DB.md (type: db-md + scope + owner, recognized sections) is silent: {issues:#?}"
3102        );
3103    }
3104
3105    /// A DB.md whose `type:` isn't `db-md` → `DB_MD_BAD_TYPE`, keyed on `type`,
3106    /// anchored to the `type:` line (file line 2). Failing to read the type, or
3107    /// accepting a non-`db-md` type, breaks this.
3108    #[test]
3109    fn db_md_wrong_type_is_error() {
3110        let fx = Fixture::new();
3111        fx.write("DB.md", "---\ntype: notes\nscope: company\nowner: T\n---\n");
3112        let issues = fx.store_all();
3113        let i = find(&issues, codes::DB_MD_BAD_TYPE);
3114        assert!(i.is_error());
3115        assert_eq!(i.file, PathBuf::from("DB.md"));
3116        assert_eq!(i.key.as_deref(), Some("type"));
3117        assert_eq!(i.line, Some(2), "anchors to the `type:` line");
3118    }
3119
3120    /// A DB.md missing `scope` and `owner` → one `DB_MD_MISSING_FIELD` per
3121    /// absent field, each keyed on its field name, anchored to the block top.
3122    #[test]
3123    fn db_md_missing_scope_and_owner_each_report() {
3124        let fx = Fixture::new();
3125        fx.write("DB.md", "---\ntype: db-md\n---\n");
3126        let issues = fx.store_all();
3127        assert_eq!(
3128            count(&issues, codes::DB_MD_MISSING_FIELD),
3129            2,
3130            "both scope and owner absent → two issues: {issues:#?}"
3131        );
3132        let keys: BTreeSet<Option<String>> = issues
3133            .iter()
3134            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
3135            .map(|i| i.key.clone())
3136            .collect();
3137        assert_eq!(
3138            keys,
3139            BTreeSet::from([Some("scope".to_string()), Some("owner".to_string())]),
3140            "one issue keyed on each missing field"
3141        );
3142        for i in issues
3143            .iter()
3144            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
3145        {
3146            assert!(i.is_error());
3147            assert_eq!(i.line, Some(1), "absent field anchors to the block top");
3148        }
3149    }
3150
3151    /// A present-but-blank required field is still missing (`DB_MD_MISSING_FIELD`),
3152    /// anchored to its own line — guarding against an "is the key textually
3153    /// present?" shortcut that would miss `owner:` with an empty value.
3154    #[test]
3155    fn db_md_blank_required_field_is_missing() {
3156        let fx = Fixture::new();
3157        fx.write(
3158            "DB.md",
3159            "---\ntype: db-md\nscope: company\nowner: \"\"\n---\n",
3160        );
3161        let issues = fx.store_all();
3162        let i = find(&issues, codes::DB_MD_MISSING_FIELD);
3163        assert_eq!(i.key.as_deref(), Some("owner"));
3164        assert_eq!(
3165            i.line,
3166            Some(4),
3167            "a present-but-empty field anchors to its line"
3168        );
3169        assert!(
3170            count(&issues, codes::DB_MD_MISSING_FIELD) == 1,
3171            "scope is present and non-empty → only owner reported"
3172        );
3173    }
3174
3175    /// An unrecognized `##` section → `DB_MD_UNKNOWN_SECTION` (warning), anchored
3176    /// to the heading's file line; the three recognized sections stay silent.
3177    #[test]
3178    fn db_md_unknown_section_is_warning() {
3179        let fx = Fixture::new();
3180        fx.write(
3181            "DB.md",
3182            // line 1 `---`, 2 type, 3 scope, 4 owner, 5 `---`, 6 blank,
3183            // 7 `## Agent instructions`, 8 blank, 9 prose, 10 blank,
3184            // 11 `## Glossary`.
3185            "---\ntype: db-md\nscope: company\nowner: T\n---\n\n## Agent instructions\n\nbe good\n\n## Glossary\n\nterms\n",
3186        );
3187        let issues = fx.store_all();
3188        let i = find(&issues, codes::DB_MD_UNKNOWN_SECTION);
3189        assert!(!i.is_error(), "unknown section is a warning, not an error");
3190        assert_eq!(i.severity, Severity::Warning);
3191        assert_eq!(
3192            i.line,
3193            Some(11),
3194            "anchors to the `## Glossary` heading line"
3195        );
3196        assert!(
3197            i.message.contains("Glossary"),
3198            "the message names the offending section: {}",
3199            i.message
3200        );
3201        // The recognized `## Agent instructions` section did NOT fire.
3202        assert_eq!(
3203            count(&issues, codes::DB_MD_UNKNOWN_SECTION),
3204            1,
3205            "only the unrecognized section is flagged: {issues:#?}"
3206        );
3207    }
3208
3209    /// A DB.md with no frontmatter at all → `DB_MD_BAD_TYPE` plus both
3210    /// `DB_MD_MISSING_FIELD`s (no provable type, no provable fields).
3211    #[test]
3212    fn db_md_no_frontmatter_reports_type_and_both_fields() {
3213        let fx = Fixture::new();
3214        fx.write("DB.md", "# just a heading, no frontmatter\n");
3215        let issues = fx.store_all();
3216        assert!(has(&issues, codes::DB_MD_BAD_TYPE));
3217        assert_eq!(count(&issues, codes::DB_MD_MISSING_FIELD), 2);
3218    }
3219
3220    // ── frontmatter ─────────────────────────────────────────────────────────
3221
3222    #[test]
3223    fn missing_type_is_error() {
3224        let fx = Fixture::new();
3225        fx.write(
3226            "records/contacts/a.md",
3227            "---\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\n# A\n",
3228        );
3229        let issues = fx.store_all();
3230        assert!(has(&issues, codes::FM_MISSING_TYPE));
3231        assert!(find(&issues, codes::FM_MISSING_TYPE).is_error());
3232    }
3233
3234    #[test]
3235    fn missing_universal_timestamps_are_errors_on_content_files() {
3236        let fx = Fixture::new();
3237        fx.write(
3238            "records/contacts/a.md",
3239            "---\ntype: contact\nsummary: x\nname: A\n---\n\n# A\n",
3240        );
3241        let issues = fx.store_all();
3242
3243        let missing_created = find(&issues, codes::FM_MISSING_CREATED);
3244        assert_eq!(missing_created.key.as_deref(), Some("created"));
3245        assert!(missing_created.is_error());
3246
3247        let missing_updated = find(&issues, codes::FM_MISSING_UPDATED);
3248        assert_eq!(missing_updated.key.as_deref(), Some("updated"));
3249        assert!(missing_updated.is_error());
3250    }
3251
3252    #[test]
3253    fn meta_files_do_not_require_universal_timestamps() {
3254        let fx = Fixture::new();
3255        let issues = fx.store_all();
3256
3257        assert!(
3258            !has(&issues, codes::FM_MISSING_CREATED),
3259            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
3260        );
3261        assert!(
3262            !has(&issues, codes::FM_MISSING_UPDATED),
3263            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
3264        );
3265    }
3266
3267    #[test]
3268    fn content_file_with_no_frontmatter_block_reports_type_and_summary() {
3269        let fx = Fixture::new();
3270        fx.write(
3271            "wiki/people/a.md",
3272            "# Just a heading\n\nNo frontmatter here.\n",
3273        );
3274        let issues = fx.store_all();
3275        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
3276        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
3277    }
3278
3279    #[test]
3280    fn content_file_with_empty_frontmatter_reports_type_and_summary() {
3281        let fx = Fixture::new();
3282        fx.write("wiki/people/a.md", "---\n---\n\nbody\n");
3283        let issues = fx.store_all();
3284        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
3285        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
3286    }
3287
3288    #[test]
3289    fn malformed_yaml_is_error_and_suppresses_field_checks() {
3290        let fx = Fixture::new();
3291        // A tab inside a mapping value is invalid YAML.
3292        fx.write(
3293            "records/contacts/a.md",
3294            "---\ntype: contact\n  bad: : : :\n: : nope\n---\n\nbody\n",
3295        );
3296        let issues = fx.store_all();
3297        let issue = find(&issues, codes::FM_MALFORMED_YAML);
3298        assert!(issue.is_error());
3299        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
3300        // When YAML doesn't parse we don't *also* claim the summary is missing;
3301        // the agent fixes the YAML first.
3302        assert!(
3303            !has(&issues, codes::SUMMARY_MISSING),
3304            "malformed YAML should suppress SUMMARY_MISSING: {issues:#?}"
3305        );
3306    }
3307
3308    #[test]
3309    fn bad_created_timestamp_is_error() {
3310        let fx = Fixture::new();
3311        fx.write(
3312            "records/contacts/a.md",
3313            "---\ntype: contact\ncreated: not-a-date\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
3314        );
3315        let issues = fx.store_all();
3316        let issue = find(&issues, codes::FM_BAD_TIMESTAMP);
3317        assert_eq!(issue.key.as_deref(), Some("created"));
3318        assert!(issue.is_error());
3319    }
3320
3321    #[test]
3322    fn date_only_created_is_rejected_but_type_date_field_accepted() {
3323        let fx = Fixture::new();
3324        // `created` must be a full RFC3339 datetime → a date-only value is bad.
3325        // `last_touch` is a type-specific date field → date-only is fine.
3326        fx.write(
3327            "records/contacts/a.md",
3328            "---\ntype: contact\ncreated: 2026-05-22\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\nlast_touch: 2026-05-22\n---\n\n# A\n",
3329        );
3330        let issues = fx.store_all();
3331        let created_issues: Vec<_> = issues
3332            .iter()
3333            .filter(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created"))
3334            .collect();
3335        assert_eq!(
3336            created_issues.len(),
3337            1,
3338            "date-only `created` must fail: {issues:#?}"
3339        );
3340        assert!(
3341            !issues.iter().any(
3342                |i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("last_touch")
3343            ),
3344            "date-only `last_touch` is valid: {issues:#?}"
3345        );
3346    }
3347
3348    // ── summary ─────────────────────────────────────────────────────────────
3349
3350    #[test]
3351    fn summary_missing_empty_multiline_toolong() {
3352        let fx = Fixture::new();
3353        fx.write(
3354            "wiki/people/missing.md",
3355            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\n---\n\nbody\n",
3356        );
3357        fx.write(
3358            "wiki/people/empty.md",
3359            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"   \"\n---\n\nbody\n",
3360        );
3361        let long = "x".repeat(201);
3362        fx.write(
3363            "wiki/people/long.md",
3364            &format!("---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{long}\"\n---\n\nbody\n"),
3365        );
3366        let issues = fx.store_all();
3367        assert!(has(&issues, codes::SUMMARY_MISSING));
3368        assert_eq!(
3369            find(&issues, codes::SUMMARY_MISSING).file,
3370            PathBuf::from("wiki/people/missing.md")
3371        );
3372        assert!(has(&issues, codes::SUMMARY_EMPTY));
3373        assert!(has(&issues, codes::SUMMARY_TOO_LONG));
3374        assert_eq!(
3375            find(&issues, codes::SUMMARY_TOO_LONG).severity,
3376            Severity::Warning
3377        );
3378    }
3379
3380    #[test]
3381    fn summary_multiline_via_yaml_block_scalar() {
3382        let fx = Fixture::new();
3383        // A literal block scalar produces a value with a newline.
3384        fx.write(
3385            "wiki/people/a.md",
3386            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: |\n  line one\n  line two\n---\n\nbody\n",
3387        );
3388        let issues = fx.store_all();
3389        assert!(has(&issues, codes::SUMMARY_MULTILINE), "{issues:#?}");
3390    }
3391
3392    #[test]
3393    fn summary_exactly_200_chars_is_ok() {
3394        let fx = Fixture::new();
3395        let s = "y".repeat(200);
3396        fx.write(
3397            "wiki/people/a.md",
3398            &format!("---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{s}\"\n---\n\nbody\n"),
3399        );
3400        let issues = fx.store_all();
3401        assert!(
3402            !has(&issues, codes::SUMMARY_TOO_LONG),
3403            "200 is the bound, inclusive: {issues:#?}"
3404        );
3405    }
3406
3407    #[test]
3408    fn meta_files_need_no_summary() {
3409        let fx = Fixture::new();
3410        // The root/layer/type indexes + log carry no summary and must not be
3411        // flagged. (A lone DB.md store with one contact and full indexes.)
3412        fx.write("records/contacts/a.md", &valid_contact("A contact"));
3413        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n# I\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
3414        fx.write(
3415            "records/index.md",
3416            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
3417        );
3418        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — A contact\n");
3419        fx.write(
3420            "records/contacts/index.jsonl",
3421            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"A contact\"}\n",
3422        );
3423        fx.write("log.md", "---\ntype: log\n---\n\n# Log\n");
3424        let issues = fx.store_all();
3425        assert!(!has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
3426    }
3427
3428    // ── tags ────────────────────────────────────────────────────────────────
3429
3430    #[test]
3431    fn nested_tags_warns_flat_tags_ok() {
3432        let fx = Fixture::new();
3433        fx.write(
3434            "records/contacts/nested.md",
3435            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags:\n  - good\n  - [nested, list]\n---\n\n# A\n",
3436        );
3437        fx.write(
3438            "records/contacts/flat.md",
3439            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags: [customer, vip]\n---\n\n# A\n",
3440        );
3441        let issues = fx.store_all();
3442        let tag_issues: Vec<_> = issues
3443            .iter()
3444            .filter(|i| i.code == codes::TAGS_MALFORMED)
3445            .collect();
3446        assert_eq!(
3447            tag_issues.len(),
3448            1,
3449            "only the nested-tags file should warn: {issues:#?}"
3450        );
3451        assert_eq!(
3452            tag_issues[0].file,
3453            PathBuf::from("records/contacts/nested.md")
3454        );
3455        assert_eq!(tag_issues[0].severity, Severity::Warning);
3456    }
3457
3458    // ── wiki-links ────────────────────────────────────────────────────────────
3459
3460    #[test]
3461    fn short_form_wiki_link_is_error() {
3462        let fx = Fixture::new();
3463        let mut body = valid_contact("links to a short form");
3464        body.push_str("\nSee [[sarah-chen]] for details.\n");
3465        fx.write("wiki/people/a.md", &body);
3466        let issues = fx.store_all();
3467        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
3468        assert!(issue.is_error());
3469        assert!(issue.message.contains("sarah-chen"));
3470        // A short-form link must NOT also be reported broken — fix the form first.
3471        assert!(
3472            !issues
3473                .iter()
3474                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.message.contains("sarah-chen")),
3475            "short-form should suppress broken: {issues:#?}"
3476        );
3477    }
3478
3479    #[test]
3480    fn broken_full_path_wiki_link_is_error() {
3481        let fx = Fixture::new();
3482        let mut body = valid_contact("links to a missing file");
3483        body.push_str("\nSee [[records/contacts/ghost]].\n");
3484        fx.write("wiki/people/a.md", &body);
3485        let issues = fx.store_all();
3486        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
3487        assert!(issue.is_error());
3488        assert!(issue.message.contains("records/contacts/ghost"));
3489        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
3490    }
3491
3492    #[test]
3493    fn traversal_full_path_wiki_link_is_rejected_before_probe() {
3494        let fx = Fixture::new();
3495        let mut body = valid_contact("links with traversal");
3496        body.push_str("\nSee [[records/contacts/../../ghost]].\n");
3497        fx.write("wiki/people/a.md", &body);
3498        let issues = fx.store_all();
3499        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
3500        assert!(issue.message.contains("not a safe store-relative path"));
3501        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
3502    }
3503
3504    #[test]
3505    fn valid_full_path_wiki_link_passes() {
3506        let fx = Fixture::new();
3507        fx.write("records/contacts/target.md", &valid_contact("target"));
3508        let mut body = valid_contact("links to target");
3509        body.push_str("\nSee [[records/contacts/target]].\n");
3510        fx.write("wiki/people/a.md", &body);
3511        let issues = fx.store_all();
3512        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
3513        assert!(!has(&issues, codes::WIKI_LINK_SHORT_FORM), "{issues:#?}");
3514    }
3515
3516    #[test]
3517    fn md_extension_wiki_link_warns_and_resolves() {
3518        let fx = Fixture::new();
3519        fx.write("records/contacts/target.md", &valid_contact("target"));
3520        let mut body = valid_contact("links with extension");
3521        body.push_str("\nSee [[records/contacts/target.md]].\n");
3522        fx.write("wiki/people/a.md", &body);
3523        let issues = fx.store_all();
3524        let issue = find(&issues, codes::WIKI_LINK_HAS_EXTENSION);
3525        assert_eq!(issue.severity, Severity::Warning);
3526        assert_eq!(
3527            issue.suggestion.as_deref(),
3528            Some("drop the extension: [[records/contacts/target]]")
3529        );
3530        // The target exists once `.md` is stripped → not broken.
3531        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
3532    }
3533
3534    #[test]
3535    fn wiki_links_in_code_fences_are_ignored() {
3536        let fx = Fixture::new();
3537        let mut body = valid_contact("has a fenced example");
3538        body.push_str("\n```\n[[sarah-chen]]\n```\n");
3539        fx.write("wiki/people/a.md", &body);
3540        let issues = fx.store_all();
3541        assert!(
3542            !has(&issues, codes::WIKI_LINK_SHORT_FORM),
3543            "fenced wiki-links must be ignored: {issues:#?}"
3544        );
3545    }
3546
3547    #[test]
3548    fn flow_form_link_list_in_frontmatter_is_error() {
3549        let fx = Fixture::new();
3550        fx.write(
3551            "records/meetings/m.md",
3552            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees: [[[records/contacts/a]], [[records/contacts/b]]]\n---\n\n# M\n",
3553        );
3554        let issues = fx.store_all();
3555        let issue = find(&issues, codes::WIKI_LINK_FLOW_FORM_LIST);
3556        assert!(issue.is_error());
3557        assert_eq!(issue.key.as_deref(), Some("attendees"));
3558    }
3559
3560    #[test]
3561    fn block_form_link_list_in_frontmatter_is_not_flow_form() {
3562        let fx = Fixture::new();
3563        fx.write("records/contacts/a.md", &valid_contact("a"));
3564        fx.write("records/contacts/b.md", &valid_contact("b"));
3565        fx.write(
3566            "records/meetings/m.md",
3567            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees:\n  - [[records/contacts/a]]\n  - [[records/contacts/b]]\n---\n\n# M\n",
3568        );
3569        let issues = fx.store_all();
3570        assert!(
3571            !has(&issues, codes::WIKI_LINK_FLOW_FORM_LIST),
3572            "{issues:#?}"
3573        );
3574        // Block-form link targets are still integrity-checked (both exist here).
3575        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
3576    }
3577
3578    #[test]
3579    fn frontmatter_short_form_link_field_is_error() {
3580        let fx = Fixture::new();
3581        // `related` is a *custom* (non-schema) wiki-link field, so it goes
3582        // through the generic doctrine path → a short form is WIKI_LINK_SHORT_FORM.
3583        fx.write(
3584            "wiki/people/a.md",
3585            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: \"[[sarah-chen]]\"\n---\n\n# A\n",
3586        );
3587        let issues = fx.store_all();
3588        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
3589        assert!(issue.is_error());
3590        assert_eq!(issue.key.as_deref(), Some("related"));
3591    }
3592
3593    #[test]
3594    fn unquoted_frontmatter_link_is_recognized() {
3595        // An UNQUOTED `[[...]]` parses in YAML as a nested sequence, not a
3596        // string. The validator must still see it as a wiki-link (text-based
3597        // extraction). A short-form custom field must report SHORT_FORM, and a
3598        // full-path one with a missing target must report BROKEN.
3599        let fx = Fixture::new();
3600        fx.write(
3601            "wiki/people/short.md",
3602            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[sarah-chen]]\n---\n\n# A\n",
3603        );
3604        fx.write(
3605            "wiki/people/broken.md",
3606            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[records/contacts/ghost]]\n---\n\n# A\n",
3607        );
3608        let issues = fx.store_all();
3609        assert!(
3610            issues.iter().any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
3611                && i.file == *"wiki/people/short.md"
3612                && i.key.as_deref() == Some("related")),
3613            "unquoted short-form frontmatter link must be caught: {issues:#?}"
3614        );
3615        assert!(
3616            issues
3617                .iter()
3618                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.file == *"wiki/people/broken.md"),
3619            "unquoted full-path frontmatter link to a missing file must be caught: {issues:#?}"
3620        );
3621    }
3622
3623    #[test]
3624    fn short_form_in_declared_link_field_is_prefix_mismatch_not_double_reported() {
3625        // A short-form value in a *declared* link field (a `### contact` schema
3626        // with `company link to records/companies/`) is SCHEMA_LINK_PREFIX_MISMATCH
3627        // (the target isn't under the prefix), and must NOT also be reported as a
3628        // bare WIKI_LINK_SHORT_FORM — the schema path owns that field once.
3629        let mut fx = Fixture::new();
3630        fx.config.schemas.insert(
3631            "contact".into(),
3632            Schema {
3633                fields: vec![FieldSpec {
3634                    name: "company".into(),
3635                    link_prefix: Some(PathBuf::from("records/companies")),
3636                    ..Default::default()
3637                }],
3638                ..Default::default()
3639            },
3640        );
3641        fx.write(
3642            "records/contacts/a.md",
3643            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[northstar]]\"\n---\n\n# A\n",
3644        );
3645        let issues = fx.store_all();
3646        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
3647        assert_eq!(issue.key.as_deref(), Some("company"));
3648        // The same link must NOT also be double-reported via the generic path.
3649        assert!(
3650            !issues
3651                .iter()
3652                .any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
3653                    && i.key.as_deref() == Some("company")),
3654            "schema link fields are checked once, by the schema path: {issues:#?}"
3655        );
3656    }
3657
3658    #[test]
3659    fn schema_link_field_with_md_extension_still_warns() {
3660        let mut fx = Fixture::new();
3661        fx.config.schemas.insert(
3662            "contact".into(),
3663            Schema {
3664                fields: vec![FieldSpec {
3665                    name: "company".into(),
3666                    link_prefix: Some(PathBuf::from("records/companies")),
3667                    ..Default::default()
3668                }],
3669                ..Default::default()
3670            },
3671        );
3672        fx.write(
3673            "records/companies/acme.md",
3674            "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: Acme\nname: Acme\n---\n\n# Acme\n",
3675        );
3676        fx.write(
3677            "records/contacts/a.md",
3678            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[records/companies/acme.md]]\"\n---\n\n# A\n",
3679        );
3680        let issues = fx.store_all();
3681        let issue = issues
3682            .iter()
3683            .find(|i| {
3684                i.code == codes::WIKI_LINK_HAS_EXTENSION && i.key.as_deref() == Some("company")
3685            })
3686            .unwrap_or_else(|| panic!("schema link extension warning missing: {issues:#?}"));
3687        assert_eq!(issue.severity, Severity::Warning);
3688        assert!(
3689            !issues
3690                .iter()
3691                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.key.as_deref() == Some("company")),
3692            "extensionless existence check should still find acme.md: {issues:#?}"
3693        );
3694    }
3695
3696    // ── schema: explicit DB.md schema (required / shape / enum) ───────────────
3697
3698    #[test]
3699    fn explicit_schema_required_shape_enum() {
3700        let fx = {
3701            let mut fx = Fixture::new();
3702            // contact schema: name required, email required+email shape,
3703            // status enum: active|inactive
3704            let schema = Schema {
3705                fields: vec![
3706                    FieldSpec {
3707                        name: "name".into(),
3708                        required: true,
3709                        ..Default::default()
3710                    },
3711                    FieldSpec {
3712                        name: "email".into(),
3713                        required: true,
3714                        shape: Some(Shape::Email),
3715                        ..Default::default()
3716                    },
3717                    FieldSpec {
3718                        name: "status".into(),
3719                        enum_values: Some(vec!["active".into(), "inactive".into()]),
3720                        ..Default::default()
3721                    },
3722                ],
3723                ..Default::default()
3724            };
3725            fx.config.schemas.insert("contact".into(), schema);
3726            fx
3727        };
3728        fx.write(
3729            "records/contacts/a.md",
3730            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nemail: not-an-email\nstatus: archived\n---\n\n# A\n",
3731        );
3732        let issues = fx.store_all();
3733        // name absent → MISSING_REQUIRED
3734        assert!(
3735            issues
3736                .iter()
3737                .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
3738                    && i.key.as_deref() == Some("name")),
3739            "{issues:#?}"
3740        );
3741        // email malformed → SHAPE_MISMATCH
3742        assert!(
3743            issues.iter().any(
3744                |i| i.code == codes::SCHEMA_SHAPE_MISMATCH && i.key.as_deref() == Some("email")
3745            ),
3746            "{issues:#?}"
3747        );
3748        // status archived not in enum → ENUM_VIOLATION
3749        assert!(
3750            issues
3751                .iter()
3752                .any(|i| i.code == codes::SCHEMA_ENUM_VIOLATION
3753                    && i.key.as_deref() == Some("status")),
3754            "{issues:#?}"
3755        );
3756    }
3757
3758    #[test]
3759    fn schema_without_link_field_allows_plain_value() {
3760        // A `contact` schema with no `company` link field means a plain `company`
3761        // string is fine — schema enforcement is exactly what the store declares,
3762        // nothing implicit.
3763        let mut fx = Fixture::new();
3764        fx.config.schemas.insert(
3765            "contact".into(),
3766            Schema {
3767                fields: vec![FieldSpec {
3768                    name: "name".into(),
3769                    required: true,
3770                    ..Default::default()
3771                }],
3772                ..Default::default()
3773            },
3774        );
3775        fx.write(
3776            "records/contacts/a.md",
3777            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
3778        );
3779        let issues = fx.store_all();
3780        assert!(
3781            !has(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH),
3782            "no declared link field for `company` → a plain value is fine: {issues:#?}"
3783        );
3784    }
3785
3786    #[test]
3787    fn schema_link_field_plain_value_is_prefix_mismatch() {
3788        // The surviving link-enforcement path: a declared `link to <prefix>/`
3789        // field with a plain-string value is SCHEMA_LINK_PREFIX_MISMATCH.
3790        let mut fx = Fixture::new();
3791        fx.config.schemas.insert(
3792            "contact".into(),
3793            Schema {
3794                fields: vec![FieldSpec {
3795                    name: "company".into(),
3796                    link_prefix: Some(PathBuf::from("records/companies")),
3797                    ..Default::default()
3798                }],
3799                ..Default::default()
3800            },
3801        );
3802        fx.write(
3803            "records/contacts/a.md",
3804            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
3805        );
3806        let issues = fx.store_all();
3807        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
3808        assert_eq!(issue.key.as_deref(), Some("company"));
3809        assert!(issue
3810            .suggestion
3811            .as_deref()
3812            .unwrap()
3813            .contains("records/companies/"));
3814    }
3815
3816    #[test]
3817    fn schema_shape_int_and_url_and_currency() {
3818        let mut fx = Fixture::new();
3819        fx.config.schemas.insert(
3820            "widget".into(),
3821            Schema {
3822                fields: vec![
3823                    FieldSpec {
3824                        name: "qty".into(),
3825                        shape: Some(Shape::Int),
3826                        ..Default::default()
3827                    },
3828                    FieldSpec {
3829                        name: "site".into(),
3830                        shape: Some(Shape::Url),
3831                        ..Default::default()
3832                    },
3833                    FieldSpec {
3834                        name: "price".into(),
3835                        shape: Some(Shape::Currency),
3836                        ..Default::default()
3837                    },
3838                ],
3839                ..Default::default()
3840            },
3841        );
3842        // `USD 100` is the corpus-realistic shape (an `expense.currency`-style
3843        // ISO code + amount). It must pass — it used to spuriously fail.
3844        fx.write(
3845            "records/widgets/ok.md",
3846            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nqty: 5\nsite: https://example.com\nprice: \"USD 1,234.50\"\n---\n\n# ok\n",
3847        );
3848        // `free` is non-numeric; `inf`/`NaN`/3-decimal used to slip through
3849        // because the old impl leaned on `f64::parse`. `price: inf` here guards
3850        // the under-rejection half of the finding.
3851        fx.write(
3852            "records/widgets/bad.md",
3853            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nqty: five\nsite: ftp://nope\nprice: inf\n---\n\n# bad\n",
3854        );
3855        let issues = fx.store_all();
3856        let bad_shape: Vec<_> = issues
3857            .iter()
3858            .filter(|i| {
3859                i.code == codes::SCHEMA_SHAPE_MISMATCH && i.file == *"records/widgets/bad.md"
3860            })
3861            .map(|i| i.key.clone().unwrap_or_default())
3862            .collect();
3863        assert!(bad_shape.contains(&"qty".to_string()), "{issues:#?}");
3864        assert!(bad_shape.contains(&"site".to_string()), "{issues:#?}");
3865        assert!(
3866            bad_shape.contains(&"price".to_string()),
3867            "inf must be rejected as currency: {issues:#?}"
3868        );
3869        assert!(
3870            !issues
3871                .iter()
3872                .any(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH
3873                    && i.file == *"records/widgets/ok.md"),
3874            "valid shapes (incl. `USD 1,234.50`) must not fire: {issues:#?}"
3875        );
3876    }
3877
3878    #[test]
3879    fn schema_shape_or_enum_field_with_non_scalar_value_is_shape_mismatch() {
3880        let mut fx = Fixture::new();
3881        fx.config.schemas.insert(
3882            "contact".into(),
3883            Schema {
3884                fields: vec![
3885                    FieldSpec {
3886                        name: "email".into(),
3887                        required: true,
3888                        shape: Some(Shape::Email),
3889                        ..Default::default()
3890                    },
3891                    FieldSpec {
3892                        name: "status".into(),
3893                        enum_values: Some(vec!["active".into(), "inactive".into()]),
3894                        ..Default::default()
3895                    },
3896                ],
3897                ..Default::default()
3898            },
3899        );
3900        // A required EMAIL field and an ENUM field, each holding a LIST. Both
3901        // used to slip through entirely (`scalar_string` → None → the shape and
3902        // enum bodies silently no-op); now they flag SCHEMA_SHAPE_MISMATCH.
3903        fx.write(
3904            "records/contacts/bad.md",
3905            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nemail:\n  - a@b.com\n  - c@d.com\nstatus:\n  - active\n---\n\n# bad\n",
3906        );
3907        let issues = fx.store_all();
3908        let mismatched: Vec<_> = issues
3909            .iter()
3910            .filter(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH)
3911            .map(|i| i.key.clone().unwrap_or_default())
3912            .collect();
3913        assert!(
3914            mismatched.contains(&"email".to_string()),
3915            "list-valued required email must flag: {issues:#?}"
3916        );
3917        assert!(
3918            mismatched.contains(&"status".to_string()),
3919            "list-valued enum must flag: {issues:#?}"
3920        );
3921    }
3922
3923    #[test]
3924    fn is_currency_accepts_codes_and_rejects_non_numeric() {
3925        // Symbols and 3-letter ISO codes both strip; plain numbers pass.
3926        for ok in [
3927            "100",
3928            "1234.56",
3929            "$1,234.50",
3930            "USD 100", // the finding's headline probe — used to be false
3931            "usd 100", // case-insensitive code
3932            "EUR 9.50",
3933            "£12",
3934            "¥1000",
3935            "-5.00", // signed amounts are real (refunds)
3936            "+5",
3937            "1,000,000",
3938        ] {
3939            assert!(is_currency(ok), "expected currency: {ok:?}");
3940        }
3941        // Non-numeric floats `f64::parse` would accept, and the > 2-decimal /
3942        // bare-code / exponent cases the docstring forbids.
3943        for bad in [
3944            "inf", "-inf", "infinity", "NaN", "nan",    // f64 accepts these; we must not
3945            "12.999", // 3 decimals
3946            "1.2345", // 4 decimals
3947            "USD",    // bare code, no amount
3948            "$",      // bare symbol
3949            "free", "", " ", "1e3",      // exponent form
3950            "1.",       // trailing dot, no fractional digits
3951            ".5",       // leading dot, no integer digits
3952            "1 000",    // space as separator is not a thousands separator
3953            "USDD 100", // 4-letter "code" must not strip
3954        ] {
3955            assert!(!is_currency(bad), "expected NOT currency: {bad:?}");
3956        }
3957    }
3958
3959    // ── policies ───────────────────────────────────────────────────────────
3960
3961    #[test]
3962    fn ignored_type_present_is_info() {
3963        let mut fx = Fixture::new();
3964        fx.config.ignored_types.push("temp".into());
3965        fx.write(
3966            "records/temps/x.md",
3967            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
3968        );
3969        let issues = fx.store_all();
3970        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_PRESENT);
3971        assert_eq!(issue.severity, Severity::Info);
3972        assert!(!issue.is_error());
3973        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
3974    }
3975
3976    #[test]
3977    fn wiki_page_derived_from_ignored_type_warns() {
3978        let mut fx = Fixture::new();
3979        fx.config.ignored_types.push("temp".into());
3980        fx.write(
3981            "records/temps/x.md",
3982            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
3983        );
3984        fx.write(
3985            "wiki/themes/t.md",
3986            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: derived\nderived_from: \"[[records/temps/x]]\"\n---\n\n# t\n",
3987        );
3988        let issues = fx.store_all();
3989        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_DERIVED);
3990        assert_eq!(issue.severity, Severity::Warning);
3991        assert_eq!(issue.key.as_deref(), Some("derived_from"));
3992        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
3993    }
3994
3995    /// The shared `derived_from_ignored_type` entry point — the single
3996    /// policy-decision both `dbmd validate` (read) and `dbmd write` (write-time
3997    /// warning) now route through, so they cannot diverge. This pins its
3998    /// contract directly: the type gate, the empty-ignored-types gate, a
3999    /// positive match carrying the resolved target type, and a non-ignored
4000    /// target rejected.
4001    #[test]
4002    fn derived_from_ignored_type_is_the_shared_policy_decision() {
4003        let mut fx = Fixture::new();
4004        fx.config.ignored_types.push("secret".into());
4005        // An ignored-type record …
4006        fx.write(
4007            "records/secrets/s.md",
4008            "---\ntype: secret\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: hush\n---\n\n# s\n",
4009        );
4010        // … and a non-ignored record.
4011        fx.write(
4012            "records/contacts/c.md",
4013            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nname: C\n---\n\n# c\n",
4014        );
4015        let store = fx.store();
4016
4017        // Positive: a wiki-page deriving from the ignored-type record matches,
4018        // and the hit carries both the target (as written) and its resolved type.
4019        let hit =
4020            derived_from_ignored_type(&store, "wiki-page", std::iter::once("records/secrets/s"))
4021                .expect("wiki-page → ignored-type record must match");
4022        assert_eq!(hit.target, "records/secrets/s");
4023        assert_eq!(hit.target_type, "secret");
4024
4025        // Type gate: a non-`wiki-page` type never triggers, even with the same
4026        // ignored-type target.
4027        assert_eq!(
4028            derived_from_ignored_type(&store, "contact", std::iter::once("records/secrets/s")),
4029            None,
4030            "only wiki-page derivation is policed"
4031        );
4032
4033        // Target gate: a wiki-page deriving from a non-ignored record is fine.
4034        assert_eq!(
4035            derived_from_ignored_type(&store, "wiki-page", std::iter::once("records/contacts/c")),
4036            None,
4037            "deriving from a non-ignored type is allowed"
4038        );
4039
4040        // First match wins across multiple targets (here the second is the hit).
4041        let hit = derived_from_ignored_type(
4042            &store,
4043            "wiki-page",
4044            ["records/contacts/c", "records/secrets/s"],
4045        )
4046        .expect("a later ignored-type target must still be found");
4047        assert_eq!(hit.target, "records/secrets/s");
4048
4049        // Empty-policy gate: with no `### Ignored types`, nothing is policed.
4050        fx.config.ignored_types.clear();
4051        let store = fx.store();
4052        assert_eq!(
4053            derived_from_ignored_type(&store, "wiki-page", std::iter::once("records/secrets/s")),
4054            None,
4055            "an empty ignored-types policy short-circuits"
4056        );
4057    }
4058
4059    // ── duplicates ───────────────────────────────────────────────────────────
4060
4061    #[test]
4062    fn dup_id_is_hard_error_with_related() {
4063        let fx = Fixture::new();
4064        fx.write(
4065            "records/contacts/a.md",
4066            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
4067        );
4068        fx.write(
4069            "records/contacts/b.md",
4070            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
4071        );
4072        let issues = fx.store_all();
4073        // Reporting rule #1: ONE issue per collision group, keyed on the
4074        // lexicographically smallest path (`a.md`), partner in `related`.
4075        assert_eq!(
4076            count(&issues, codes::DUP_ID),
4077            1,
4078            "one issue per group: {issues:#?}"
4079        );
4080        let a = issues.iter().find(|i| i.code == codes::DUP_ID).unwrap();
4081        assert_eq!(a.file, PathBuf::from("records/contacts/a.md"));
4082        assert!(a.is_error());
4083        assert_eq!(a.key.as_deref(), Some("id"));
4084        assert_eq!(
4085            a.line,
4086            Some(3),
4087            "anchors to the `id` line on the reported file"
4088        );
4089        assert_eq!(a.related, vec![PathBuf::from("records/contacts/b.md")]);
4090    }
4091
4092    #[test]
4093    fn dup_id_not_fired_in_working_set() {
4094        // DUP_* is an --all-only cross-file check; the working set must not run it.
4095        let fx = Fixture::new();
4096        fx.write(
4097            "records/contacts/a.md",
4098            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
4099        );
4100        fx.write(
4101            "records/contacts/b.md",
4102            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
4103        );
4104        // Log says both changed since epoch, so they're in the working set.
4105        fx.write(
4106            "log.md",
4107            "---\ntype: log\n---\n\n## [2026-05-22 10:00] create | records/contacts/a\nx\n\n## [2026-05-22 10:01] create | records/contacts/b\nx\n",
4108        );
4109        let issues = validate_working_set(&fx.store(), None).unwrap();
4110        assert!(
4111            !has(&issues, codes::DUP_ID),
4112            "DUP_ID is --all only: {issues:#?}"
4113        );
4114    }
4115
4116    #[test]
4117    fn dup_unique_key_single_field_is_warning() {
4118        let mut fx = Fixture::new();
4119        // contact declares `- unique: email`.
4120        fx.config.schemas.insert(
4121            "contact".into(),
4122            Schema {
4123                unique_keys: vec![vec!["email".into()]],
4124                ..Default::default()
4125            },
4126        );
4127        for (f, name) in [("a", "A"), ("b", "B")] {
4128            fx.write(
4129                &format!("records/contacts/{f}.md"),
4130                &format!("---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: s\nname: {name}\nemail: dup@x.com\n---\n\n# {name}\n"),
4131            );
4132        }
4133        let issues = fx.store_all();
4134        // One issue per group (rule #1), keyed on the smallest path, anchored to
4135        // the single `email` field.
4136        assert_eq!(count(&issues, codes::DUP_UNIQUE_KEY), 1);
4137        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
4138        assert_eq!(dup.severity, Severity::Warning);
4139        assert_eq!(dup.file, PathBuf::from("records/contacts/a.md"));
4140        assert_eq!(dup.key.as_deref(), Some("email"));
4141        assert_eq!(dup.related, vec![PathBuf::from("records/contacts/b.md")]);
4142    }
4143
4144    #[test]
4145    fn dup_unique_key_compound_and_clean_when_one_field_differs() {
4146        let mut fx = Fixture::new();
4147        // expense declares `- unique: date, amount, vendor` (a compound key).
4148        fx.config.schemas.insert(
4149            "expense".into(),
4150            Schema {
4151                unique_keys: vec![vec!["date".into(), "amount".into(), "vendor".into()]],
4152                ..Default::default()
4153            },
4154        );
4155        fx.write("records/companies/acme.md", "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: c\nname: Acme\n---\n# A\n");
4156        let exp = |f: &str, amount: &str| {
4157            format!(
4158            "---\ntype: expense\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: e\ndate: 2026-05-01\namount: {amount}\nvendor: \"[[records/companies/acme]]\"\n---\n\n# {f}\n"
4159        )
4160        };
4161        fx.write("records/expenses/e1.md", &exp("e1", "100"));
4162        fx.write("records/expenses/e2.md", &exp("e2", "100"));
4163        fx.write("records/expenses/e3.md", &exp("e3", "200")); // different amount
4164        let issues = fx.store_all();
4165        // One issue for the e1+e2 group (rule #1), keyed on the smallest path
4166        // (e1) with e2 in `related`; e3 differs on amount and never appears.
4167        assert_eq!(
4168            count(&issues, codes::DUP_UNIQUE_KEY),
4169            1,
4170            "only e1+e2 collide, one issue: {issues:#?}"
4171        );
4172        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
4173        assert_eq!(dup.file, PathBuf::from("records/expenses/e1.md"));
4174        assert_eq!(
4175            dup.line,
4176            Some(1),
4177            "compound-key collision anchors to line 1"
4178        );
4179        assert_eq!(dup.related, vec![PathBuf::from("records/expenses/e2.md")]);
4180        assert!(
4181            !issues.iter().any(|i| i.code == codes::DUP_UNIQUE_KEY
4182                && i.related.contains(&PathBuf::from("records/expenses/e3.md"))),
4183            "e3 differs on amount and must not collide: {issues:#?}"
4184        );
4185    }
4186
4187    #[test]
4188    fn dup_unique_key_list_field_is_order_independent() {
4189        let mut fx = Fixture::new();
4190        // meeting declares `- unique: date, attendees`; the list field is a set.
4191        fx.config.schemas.insert(
4192            "meeting".into(),
4193            Schema {
4194                unique_keys: vec![vec!["date".into(), "attendees".into()]],
4195                ..Default::default()
4196            },
4197        );
4198        fx.write("records/contacts/a.md", &valid_contact("a"));
4199        fx.write("records/contacts/b.md", &valid_contact("b"));
4200        let m = |f: &str, order: &str| {
4201            let attendees = if order == "ab" {
4202                "  - [[records/contacts/a]]\n  - [[records/contacts/b]]"
4203            } else {
4204                "  - [[records/contacts/b]]\n  - [[records/contacts/a]]"
4205            };
4206            format!(
4207                "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nattendees:\n{attendees}\n---\n\n# {f}\n"
4208            )
4209        };
4210        fx.write("records/meetings/m1.md", &m("m1", "ab"));
4211        fx.write("records/meetings/m2.md", &m("m2", "ba"));
4212        let issues = fx.store_all();
4213        // The attendee SET is order-independent, so m1 (ab) and m2 (ba) collide
4214        // → a single issue on the smaller path.
4215        assert_eq!(
4216            count(&issues, codes::DUP_UNIQUE_KEY),
4217            1,
4218            "same date + same attendee set (any order) collide as one issue: {issues:#?}"
4219        );
4220        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
4221        assert_eq!(dup.file, PathBuf::from("records/meetings/m1.md"));
4222        assert_eq!(dup.related, vec![PathBuf::from("records/meetings/m2.md")]);
4223    }
4224
4225    // ── indexes ───────────────────────────────────────────────────────────────
4226
4227    #[test]
4228    fn missing_indexes_at_all_three_levels() {
4229        let fx = Fixture::new();
4230        fx.write("records/contacts/a.md", &valid_contact("a"));
4231        let issues = fx.store_all();
4232        // root, layer (records), and type-folder (records/contacts) all missing.
4233        // The type-folder INDEX_MISSING is keyed on the FOLDER path (not its
4234        // would-be index.md), per the field convention `EXPECTED` pins.
4235        let missing_files: BTreeSet<PathBuf> = issues
4236            .iter()
4237            .filter(|i| i.code == codes::INDEX_MISSING)
4238            .map(|i| i.file.clone())
4239            .collect();
4240        assert!(
4241            missing_files.contains(&PathBuf::from("index.md")),
4242            "{issues:#?}"
4243        );
4244        assert!(
4245            missing_files.contains(&PathBuf::from("records/index.md")),
4246            "{issues:#?}"
4247        );
4248        assert!(
4249            missing_files.contains(&PathBuf::from("records/contacts")),
4250            "{issues:#?}"
4251        );
4252        // When the index.md is entirely absent we do NOT additionally fire
4253        // INDEX_JSONL_MISSING — one INDEX_MISSING covers the folder (rule #4).
4254        assert!(!has(&issues, codes::INDEX_JSONL_MISSING), "{issues:#?}");
4255    }
4256
4257    #[test]
4258    fn index_stale_entry_and_missing_entry() {
4259        let fx = Fixture::new();
4260        fx.write(
4261            "records/contacts/present.md",
4262            &valid_contact("present contact"),
4263        );
4264        // Indexes for the parents (root/layer) present so we isolate type-folder.
4265        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4266        fx.write(
4267            "records/index.md",
4268            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4269        );
4270        // Type-folder index lists a GHOST (stale) and omits `present` (missing).
4271        fx.write(
4272            "records/contacts/index.md",
4273            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/ghost]] — gone\n",
4274        );
4275        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/present.md\",\"type\":\"contact\",\"summary\":\"present contact\"}\n");
4276        let issues = fx.store_all();
4277        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
4278        assert!(stale.message.contains("ghost"));
4279        assert!(stale.is_error());
4280        let missing = find(&issues, codes::INDEX_MISSING_ENTRY);
4281        assert!(
4282            missing.message.contains("present.md"),
4283            "{}",
4284            missing.message
4285        );
4286    }
4287
4288    #[test]
4289    fn index_md_entry_with_traversal_path_is_stale_not_probe() {
4290        let fx = Fixture::new();
4291        fx.write("records/contacts/a.md", &valid_contact("a"));
4292        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4293        fx.write(
4294            "records/index.md",
4295            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4296        );
4297        fx.write(
4298            "records/contacts/index.md",
4299            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/../../ghost]] — unsafe\n",
4300        );
4301        fx.write(
4302            "records/contacts/index.jsonl",
4303            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
4304        );
4305        let issues = fx.store_all();
4306        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
4307        assert!(stale.message.contains("not a safe store-relative path"));
4308    }
4309
4310    #[test]
4311    fn index_summary_mismatch() {
4312        let fx = Fixture::new();
4313        fx.write("records/contacts/a.md", &valid_contact("the real summary"));
4314        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4315        fx.write(
4316            "records/index.md",
4317            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4318        );
4319        fx.write(
4320            "records/contacts/index.md",
4321            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a STALE summary\n",
4322        );
4323        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"the real summary\"}\n");
4324        let issues = fx.store_all();
4325        let issue = find(&issues, codes::INDEX_SUMMARY_MISMATCH);
4326        assert!(issue.is_error());
4327        assert_eq!(issue.related, vec![PathBuf::from("records/contacts/a.md")]);
4328    }
4329
4330    #[test]
4331    fn index_summary_match_passes() {
4332        let fx = Fixture::new();
4333        fx.write("records/contacts/a.md", &valid_contact("matching summary"));
4334        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4335        fx.write(
4336            "records/index.md",
4337            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4338        );
4339        fx.write(
4340            "records/contacts/index.md",
4341            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — matching summary\n",
4342        );
4343        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"matching summary\"}\n");
4344        let issues = fx.store_all();
4345        assert!(!has(&issues, codes::INDEX_SUMMARY_MISMATCH), "{issues:#?}");
4346    }
4347
4348    #[test]
4349    fn index_entry_with_tag_suffix_matches_summary() {
4350        let fx = Fixture::new();
4351        fx.write("records/contacts/a.md", &valid_contact("clean summary"));
4352        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4353        fx.write(
4354            "records/index.md",
4355            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4356        );
4357        // Entry carries a ` · #tag` suffix which must be stripped before compare.
4358        fx.write(
4359            "records/contacts/index.md",
4360            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — clean summary · #customer\n",
4361        );
4362        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"clean summary\"}\n");
4363        let issues = fx.store_all();
4364        assert!(
4365            !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
4366            "tag suffix should be stripped: {issues:#?}"
4367        );
4368    }
4369
4370    #[test]
4371    fn index_jsonl_desync_missing_file_in_jsonl() {
4372        let fx = Fixture::new();
4373        fx.write("records/contacts/a.md", &valid_contact("a"));
4374        fx.write("records/contacts/b.md", &valid_contact("b"));
4375        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (2 files)\n");
4376        fx.write(
4377            "records/index.md",
4378            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4379        );
4380        fx.write(
4381            "records/contacts/index.md",
4382            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n- [[records/contacts/b]] — b\n",
4383        );
4384        // jsonl only lists `a` → `b` is a desync (the twin must be complete).
4385        fx.write(
4386            "records/contacts/index.jsonl",
4387            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
4388        );
4389        let issues = fx.store_all();
4390        let desync = find(&issues, codes::INDEX_JSONL_DESYNC);
4391        assert!(desync.message.contains("b.md"), "{}", desync.message);
4392    }
4393
4394    #[test]
4395    fn index_jsonl_desync_record_points_at_missing_file() {
4396        let fx = Fixture::new();
4397        fx.write("records/contacts/a.md", &valid_contact("a"));
4398        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4399        fx.write(
4400            "records/index.md",
4401            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4402        );
4403        fx.write(
4404            "records/contacts/index.md",
4405            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
4406        );
4407        fx.write(
4408            "records/contacts/index.jsonl",
4409            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
4410        );
4411        let issues = fx.store_all();
4412        assert!(
4413            issues
4414                .iter()
4415                .any(|i| i.code == codes::INDEX_JSONL_DESYNC && i.message.contains("ghost.md")),
4416            "{issues:#?}"
4417        );
4418    }
4419
4420    #[test]
4421    fn index_jsonl_record_with_traversal_path_is_desync_not_probe() {
4422        let fx = Fixture::new();
4423        fx.write("records/contacts/a.md", &valid_contact("a"));
4424        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4425        fx.write(
4426            "records/index.md",
4427            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4428        );
4429        fx.write(
4430            "records/contacts/index.md",
4431            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
4432        );
4433        fx.write(
4434            "records/contacts/index.jsonl",
4435            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/../../ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
4436        );
4437        let issues = fx.store_all();
4438        assert!(
4439            issues.iter().any(|i| i.code == codes::INDEX_JSONL_DESYNC
4440                && i.message.contains("not a safe store-relative path")),
4441            "{issues:#?}"
4442        );
4443    }
4444
4445    #[test]
4446    fn index_jsonl_stale_summary() {
4447        let fx = Fixture::new();
4448        fx.write("records/contacts/a.md", &valid_contact("real summary"));
4449        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4450        fx.write(
4451            "records/index.md",
4452            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4453        );
4454        fx.write(
4455            "records/contacts/index.md",
4456            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — real summary\n",
4457        );
4458        // jsonl summary disagrees with the file frontmatter.
4459        fx.write(
4460            "records/contacts/index.jsonl",
4461            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"OUTDATED\"}\n",
4462        );
4463        let issues = fx.store_all();
4464        let stale = find(&issues, codes::INDEX_JSONL_STALE);
4465        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
4466        assert!(stale.key.as_deref().unwrap().contains("summary"));
4467    }
4468
4469    /// The whole point of `INDEX_JSONL_STALE`: a sidecar field the query/search
4470    /// path actually reads (`email`, `domain`, the `(date,amount,vendor)` dedup
4471    /// tuple, `tags`, `updated`, `links`, `company` …) that disagrees with the
4472    /// `.md` is STALE — even when `summary` and `type` are perfectly correct.
4473    /// Pre-fix the validator only diffed summary+type, so a sidecar with a wrong
4474    /// `email` validated clean and answered `--where email=…` with a phantom
4475    /// value present in no file. This is the direct regression guard.
4476    #[test]
4477    fn index_jsonl_stale_queryable_field_email() {
4478        let fx = Fixture::new();
4479        let contact = "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"a contact\"\nname: A\nemail: real@correct.com\n---\n\n# A\n";
4480        fx.write("records/contacts/a.md", contact);
4481        // Start from the canonical, fully-correct sidecar set …
4482        fx.rebuild_indexes();
4483        let jsonl_path = fx.dir.path().join("records/contacts/index.jsonl");
4484        let good = fs::read_to_string(&jsonl_path).unwrap();
4485        // sanity: the canonical store is clean (no STALE on a fresh rebuild).
4486        assert!(
4487            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
4488            "freshly-rebuilt sidecar must not be stale"
4489        );
4490        // … then desync ONLY the email so it's the single differing field.
4491        assert!(
4492            good.contains("real@correct.com"),
4493            "sidecar projects email: {good}"
4494        );
4495        fx.write(
4496            "records/contacts/index.jsonl",
4497            &good.replace("real@correct.com", "STALE-WRONG@evil.com"),
4498        );
4499
4500        let issues = fx.store_all();
4501        let stale = find(&issues, codes::INDEX_JSONL_STALE);
4502        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
4503        // The mismatch is reported precisely on `email`, and summary/type — which
4504        // still match — are NOT named.
4505        let key = stale.key.as_deref().unwrap();
4506        assert!(
4507            key.contains("email"),
4508            "expected `email` in stale key, got {key:?}"
4509        );
4510        assert!(!key.contains("summary"), "summary still matches: {key:?}");
4511        assert!(!key.contains("type"), "type still matches: {key:?}");
4512    }
4513
4514    /// Broaden the guard across the typed/list/timestamp projections at once:
4515    /// a wrong `tags`, `updated`, and a custom dedup field (`amount`) are each
4516    /// caught, with all three named in one issue.
4517    #[test]
4518    fn index_jsonl_stale_typed_and_list_fields() {
4519        let fx = Fixture::new();
4520        let expense = "---\ntype: expense\ncreated: 2026-05-20T08:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"office chairs\"\ntags: [furniture, q2]\namount: 1299\nvendor: Acme\ndate: 2026-05-20\n---\n\n# Expense\n";
4521        fx.write("records/expenses/e.md", expense);
4522        fx.rebuild_indexes();
4523        let jsonl_path = fx.dir.path().join("records/expenses/index.jsonl");
4524        let good = fs::read_to_string(&jsonl_path).unwrap();
4525        assert!(
4526            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
4527            "freshly-rebuilt sidecar must not be stale"
4528        );
4529        // Desync a list field (tags), a timestamp (updated), and a number (amount).
4530        let stale_line = good
4531            .replace("\"q2\"", "\"WRONG-TAG\"")
4532            .replace("2026-05-22T10:00:00-07:00", "2099-01-01T00:00:00-07:00")
4533            .replace("1299", "9999");
4534        fx.write("records/expenses/index.jsonl", &stale_line);
4535
4536        let issues = fx.store_all();
4537        let stale = find(&issues, codes::INDEX_JSONL_STALE);
4538        let key = stale.key.as_deref().unwrap();
4539        for expected in ["amount", "tags", "updated"] {
4540            assert!(
4541                key.contains(expected),
4542                "expected `{expected}` in stale key, got {key:?}"
4543            );
4544        }
4545    }
4546
4547    #[test]
4548    fn index_orphan_in_noncanonical_folder() {
4549        let fx = Fixture::new();
4550        fx.write("records/contacts/a.md", &valid_contact("a"));
4551        // Build the canonical indexes so they aren't reported as orphans.
4552        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4553        fx.write(
4554            "records/index.md",
4555            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4556        );
4557        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
4558        fx.write(
4559            "records/contacts/index.jsonl",
4560            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
4561        );
4562        // An index.md inside a sub-sub-folder (operator territory) is an orphan.
4563        fx.write(
4564            "records/contacts/subfolder/index.md",
4565            "---\ntype: index\nscope: type-folder\n---\n\n# stray\n",
4566        );
4567        let issues = fx.store_all();
4568        let orphan = find(&issues, codes::INDEX_ORPHAN);
4569        assert_eq!(orphan.severity, Severity::Warning);
4570        assert_eq!(
4571            orphan.file,
4572            PathBuf::from("records/contacts/subfolder/index.md")
4573        );
4574    }
4575
4576    #[test]
4577    fn index_wrong_scope() {
4578        let fx = Fixture::new();
4579        fx.write("records/contacts/a.md", &valid_contact("a"));
4580        // Root index declares the wrong scope.
4581        fx.write("index.md", "---\ntype: index\nscope: layer\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4582        fx.write(
4583            "records/index.md",
4584            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4585        );
4586        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
4587        fx.write(
4588            "records/contacts/index.jsonl",
4589            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
4590        );
4591        let issues = fx.store_all();
4592        let issue = find(&issues, codes::INDEX_WRONG_SCOPE);
4593        assert_eq!(issue.severity, Severity::Warning);
4594        assert_eq!(issue.file, PathBuf::from("index.md"));
4595    }
4596
4597    #[test]
4598    fn capped_type_folder_index_does_not_flag_missing_entries() {
4599        // Over the 500-entry cap, omitted entries are expected, not an error.
4600        let fx = Fixture::new();
4601        for i in 0..501 {
4602            fx.write(
4603                &format!("records/contacts/c{i:04}.md"),
4604                &valid_contact(&format!("contact {i}")),
4605            );
4606        }
4607        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (501 files)\n");
4608        fx.write(
4609            "records/index.md",
4610            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4611        );
4612        // Type-folder index lists only ONE entry + a More footer.
4613        fx.write(
4614            "records/contacts/index.md",
4615            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/c0000]] — contact 0\n\n## More\n\nThis folder has 501 files.\n",
4616        );
4617        // jsonl must still be complete — write all 501 lines.
4618        let mut jsonl = String::new();
4619        for i in 0..501 {
4620            jsonl.push_str(&format!(
4621                "{{\"path\":\"records/contacts/c{i:04}.md\",\"type\":\"contact\",\"summary\":\"contact {i}\"}}\n"
4622            ));
4623        }
4624        fx.write("records/contacts/index.jsonl", &jsonl);
4625        let issues = fx.store_all();
4626        assert!(
4627            !has(&issues, codes::INDEX_MISSING_ENTRY),
4628            "over the cap, missing browse entries are expected: {issues:#?}"
4629        );
4630        // But the jsonl is complete → no desync.
4631        assert!(
4632            !has(&issues, codes::INDEX_JSONL_DESYNC),
4633            "{:#?}",
4634            issues
4635                .iter()
4636                .filter(|i| i.code == codes::INDEX_JSONL_DESYNC)
4637                .collect::<Vec<_>>()
4638        );
4639    }
4640
4641    // ── log ────────────────────────────────────────────────────────────────
4642
4643    #[test]
4644    fn log_bad_timestamp_unknown_kind_out_of_order() {
4645        let fx = Fixture::new();
4646        fx.write(
4647            "log.md",
4648            concat!(
4649                "---\ntype: log\n---\n\n# Log\n\n",
4650                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
4651                "## [2026-05-27 09:00] update | records/contacts/b\nx\n\n", // out of order
4652                "## [2026-05-27 11:00] frobnicate | records/contacts/c\nx\n\n", // unknown kind
4653                "## [not-a-date] create | records/contacts/d\nx\n",         // bad timestamp
4654            ),
4655        );
4656        let issues = fx.store_all();
4657        assert!(has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
4658        assert_eq!(
4659            find(&issues, codes::LOG_OUT_OF_ORDER).severity,
4660            Severity::Warning
4661        );
4662        let unknown = find(&issues, codes::LOG_UNKNOWN_KIND);
4663        assert_eq!(unknown.severity, Severity::Warning);
4664        assert!(unknown.message.contains("frobnicate"));
4665        assert!(unknown
4666            .suggestion
4667            .as_deref()
4668            .is_some_and(|s| s.contains("create")));
4669        let bad = find(&issues, codes::LOG_BAD_TIMESTAMP);
4670        assert!(bad.is_error());
4671    }
4672
4673    #[test]
4674    fn log_validate_entry_without_object_is_well_formed() {
4675        let fx = Fixture::new();
4676        fx.write(
4677            "log.md",
4678            "---\ntype: log\n---\n\n## [2026-05-27 10:00] validate\nPASS\n",
4679        );
4680        let issues = fx.store_all();
4681        assert!(!has(&issues, codes::LOG_BAD_TIMESTAMP), "{issues:#?}");
4682        assert!(!has(&issues, codes::LOG_UNKNOWN_KIND), "{issues:#?}");
4683    }
4684
4685    #[test]
4686    fn log_in_order_is_clean() {
4687        let fx = Fixture::new();
4688        fx.write(
4689            "log.md",
4690            concat!(
4691                "---\ntype: log\n---\n\n",
4692                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
4693                "## [2026-05-27 10:05] update | records/contacts/a\nx\n",
4694            ),
4695        );
4696        let issues = fx.store_all();
4697        assert!(!has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
4698    }
4699
4700    #[test]
4701    fn log_not_checked_in_working_set() {
4702        // log.md ordering is an --all-only check.
4703        let fx = Fixture::new();
4704        fx.write(
4705            "log.md",
4706            concat!(
4707                "---\ntype: log\n---\n\n",
4708                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
4709                "## [2026-05-27 09:00] update | records/contacts/a\nx\n",
4710            ),
4711        );
4712        let issues = validate_working_set(&fx.store(), None).unwrap();
4713        assert!(
4714            !has(&issues, codes::LOG_OUT_OF_ORDER),
4715            "log ordering is --all only: {issues:#?}"
4716        );
4717    }
4718
4719    // ── working-set scoping ───────────────────────────────────────────────────
4720
4721    #[test]
4722    fn working_set_validates_only_changed_files() {
4723        let fx = Fixture::new();
4724        // `dirty` has a bad timestamp; `clean_but_unlogged` also does but is NOT
4725        // in the log → working set must skip it.
4726        fx.write(
4727            "records/contacts/dirty.md",
4728            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
4729        );
4730        fx.write(
4731            "records/contacts/unlogged.md",
4732            "---\ntype: contact\ncreated: ALSO-BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
4733        );
4734        fx.write(
4735            "log.md",
4736            "---\ntype: log\n---\n\n## [2026-05-22 10:00] update | records/contacts/dirty\nedited\n",
4737        );
4738        let issues = validate_working_set(&fx.store(), None).unwrap();
4739        assert!(
4740            issues.iter().any(
4741                |i| i.code == codes::FM_BAD_TIMESTAMP && i.file == *"records/contacts/dirty.md"
4742            ),
4743            "{issues:#?}"
4744        );
4745        assert!(
4746            !issues
4747                .iter()
4748                .any(|i| i.file == *"records/contacts/unlogged.md"),
4749            "unlogged file must not be in the working set: {issues:#?}"
4750        );
4751    }
4752
4753    #[test]
4754    fn working_set_includes_incoming_linkers_to_changed_path() {
4755        let fx = Fixture::new();
4756        // `changed` was renamed/removed (logged). `linker` points at it with a
4757        // now-broken link and was NOT itself logged — but must be pulled in.
4758        fx.write(
4759            "wiki/people/linker.md",
4760            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: links to a removed page\n---\n\nSee [[records/contacts/changed]].\n",
4761        );
4762        // `changed.md` does NOT exist on disk (removed).
4763        fx.write(
4764            "log.md",
4765            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/changed\nremoved\n",
4766        );
4767        let issues = validate_working_set(&fx.store(), None).unwrap();
4768        assert!(
4769            issues
4770                .iter()
4771                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.file == *"wiki/people/linker.md"),
4772            "incoming linker to a removed path must be validated: {issues:#?}"
4773        );
4774    }
4775
4776    #[test]
4777    fn working_set_respects_explicit_since_cutoff() {
4778        let fx = Fixture::new();
4779        fx.write(
4780            "records/contacts/old.md",
4781            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
4782        );
4783        fx.write(
4784            "records/contacts/new.md",
4785            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
4786        );
4787        fx.write(
4788            "log.md",
4789            concat!(
4790                "---\ntype: log\n---\n\n",
4791                "## [2026-05-20 10:00] update | records/contacts/old\nx\n\n",
4792                "## [2026-05-25 10:00] update | records/contacts/new\nx\n",
4793            ),
4794        );
4795        // Cutoff after `old` but before `new`.
4796        let since = DateTime::parse_from_rfc3339("2026-05-22T00:00:00+00:00").unwrap();
4797        let issues = validate_working_set(&fx.store(), Some(since)).unwrap();
4798        assert!(
4799            issues.iter().any(|i| i.file == *"records/contacts/new.md"),
4800            "{issues:#?}"
4801        );
4802        assert!(
4803            !issues.iter().any(|i| i.file == *"records/contacts/old.md"),
4804            "old change is before the cutoff: {issues:#?}"
4805        );
4806    }
4807
4808    #[test]
4809    fn working_set_default_since_is_last_validate_entry() {
4810        let fx = Fixture::new();
4811        // `before` changed before the last validate; `after` changed after.
4812        fx.write(
4813            "records/contacts/before.md",
4814            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
4815        );
4816        fx.write(
4817            "records/contacts/after.md",
4818            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
4819        );
4820        fx.write(
4821            "log.md",
4822            concat!(
4823                "---\ntype: log\n---\n\n",
4824                "## [2026-05-20 10:00] update | records/contacts/before\nx\n\n",
4825                "## [2026-05-21 10:00] validate\nPASS\n\n",
4826                "## [2026-05-22 10:00] update | records/contacts/after\nx\n",
4827            ),
4828        );
4829        let issues = validate_working_set(&fx.store(), None).unwrap();
4830        assert!(
4831            issues
4832                .iter()
4833                .any(|i| i.file == *"records/contacts/after.md"),
4834            "{issues:#?}"
4835        );
4836        assert!(
4837            !issues
4838                .iter()
4839                .any(|i| i.file == *"records/contacts/before.md"),
4840            "change before the last validate entry is outside the default window: {issues:#?}"
4841        );
4842    }
4843
4844    // ── ordering / determinism ────────────────────────────────────────────────
4845
4846    #[test]
4847    fn issues_are_sorted_by_file_then_line() {
4848        let fx = Fixture::new();
4849        fx.write("wiki/people/z.md", "---\ntype: wiki-page\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
4850        fx.write("wiki/people/a.md", "---\ntype: wiki-page\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
4851        let issues = fx.store_all();
4852        let files: Vec<&PathBuf> = issues.iter().map(|i| &i.file).collect();
4853        let mut sorted = files.clone();
4854        sorted.sort();
4855        assert_eq!(
4856            files, sorted,
4857            "issues must be emitted in a stable file order"
4858        );
4859    }
4860
4861    // ── boundaries: codes validate must NOT emit ──────────────────────────────
4862
4863    #[test]
4864    fn frozen_page_is_not_a_validate_error() {
4865        // POLICY_FROZEN_PAGE is a *write-time* refusal, never a validate finding.
4866        // A clean file listed in `### Frozen pages` must validate clean.
4867        let mut fx = Fixture::new();
4868        fx.config
4869            .frozen_pages
4870            .push(PathBuf::from("records/decisions/d.md"));
4871        fx.write(
4872            "records/decisions/d.md",
4873            "---\ntype: decision\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a finalized decision\n---\n\n# D\n",
4874        );
4875        let issues = fx.store_all();
4876        assert!(
4877            !has(&issues, codes::POLICY_FROZEN_PAGE),
4878            "frozen pages are enforced at write-time, not by validate: {issues:#?}"
4879        );
4880    }
4881
4882    #[test]
4883    fn wiki_link_ambiguous_is_never_emitted_under_full_path_doctrine() {
4884        // The full-path doctrine makes ambiguity impossible; the defensive code
4885        // must never fire on a normal store.
4886        let fx = Fixture::new();
4887        fx.write("records/contacts/sarah-chen.md", &valid_contact("sarah"));
4888        let mut body = valid_contact("links to sarah");
4889        body.push_str("\nSee [[records/contacts/sarah-chen]].\n");
4890        fx.write("wiki/people/p.md", &body);
4891        let issues = fx.store_all();
4892        assert!(!has(&issues, codes::WIKI_LINK_AMBIGUOUS), "{issues:#?}");
4893    }
4894
4895    // ── unknown-type / unknown-field passthrough ──────────────────────────────
4896
4897    #[test]
4898    fn unknown_type_passes_through() {
4899        // A custom type is ambient context: it has a `type`, so no
4900        // FM_MISSING_TYPE, and with no matching schema there are no schema
4901        // errors. Only the universal contract (summary, timestamps) applies.
4902        let fx = Fixture::new();
4903        fx.write(
4904            "records/proposals/x.md",
4905            "---\ntype: proposal\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a proposal\ncustom_field: anything\nbudget: 5000\n---\n\n# Proposal\n",
4906        );
4907        let issues = fx.store_all();
4908        assert!(!has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
4909        assert!(!has(&issues, codes::SCHEMA_MISSING_REQUIRED), "{issues:#?}");
4910        assert!(!has(&issues, codes::SCHEMA_SHAPE_MISMATCH), "{issues:#?}");
4911        // The unknown fields don't trip anything.
4912        assert!(
4913            !issues
4914                .iter()
4915                .any(|i| i.key.as_deref() == Some("custom_field")
4916                    || i.key.as_deref() == Some("budget")),
4917            "unknown fields are ambient context: {issues:#?}"
4918        );
4919    }
4920
4921    // ── find_links_to prefix-collision safety (working set) ───────────────────
4922
4923    #[test]
4924    fn incoming_linker_scan_does_not_prefix_match() {
4925        // A changed `records/contacts/sarah` must NOT pull in a file that only
4926        // links to `records/contacts/sarah-chen` (a longer path sharing a prefix).
4927        let fx = Fixture::new();
4928        fx.write(
4929            "wiki/people/only-sarah-chen.md",
4930            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
4931        );
4932        // The log says `records/contacts/sarah` (the shorter path) changed.
4933        fx.write(
4934            "log.md",
4935            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah\nremoved\n",
4936        );
4937        let issues = validate_working_set(&fx.store(), None).unwrap();
4938        assert!(
4939            !issues
4940                .iter()
4941                .any(|i| i.file == *"wiki/people/only-sarah-chen.md"),
4942            "a prefix-sharing link must not pull a file into the working set: {issues:#?}"
4943        );
4944    }
4945
4946    #[test]
4947    fn incoming_linker_scan_pulls_in_catalog_index_md() {
4948        // CONTRACT: the working-set incoming-linker scan rides the embedded-
4949        // ripgrep `Store::find_links_to`, which scans EVERY `.md` (including
4950        // `index.md` catalogs) — NOT the walk-and-read over `walk_content_files`,
4951        // which excludes `index.md`. A type-folder `index.md` that lists a now-
4952        // deleted target must be pulled into the working set so its dangling
4953        // catalog entry is flagged `WIKI_LINK_BROKEN`. The old walk-and-read
4954        // implementation skipped `index.md` and let this broken link survive the
4955        // loop silently; this test fails if anyone reverts to that path.
4956        let fx = Fixture::new();
4957        // A catalog that still lists the deleted contact (a real, common stale
4958        // state after a `delete`). No other file references the target, so the
4959        // catalog is the ONLY incoming linker — if it isn't scanned, nothing is.
4960        fx.write(
4961            "records/contacts/index.md",
4962            "---\ntype: index\n---\n\n- [[records/contacts/sarah-chen]] — Sarah Chen\n",
4963        );
4964        // The log says `records/contacts/sarah-chen` was deleted.
4965        fx.write(
4966            "log.md",
4967            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n",
4968        );
4969        let issues = validate_working_set(&fx.store(), None).unwrap();
4970        assert!(
4971            issues.iter().any(
4972                |i| i.file == *"records/contacts/index.md" && i.code == codes::WIKI_LINK_BROKEN
4973            ),
4974            "the catalog `index.md` linking to the deleted target must be pulled \
4975             into the working set and flagged WIKI_LINK_BROKEN (proves the scan \
4976             uses embedded-ripgrep `Store::find_links_to`, not the index-skipping \
4977             walk-and-read): {issues:#?}"
4978        );
4979    }
4980
4981    #[test]
4982    fn incoming_linker_scan_covers_the_whole_changed_set_in_one_pass() {
4983        // CONTRACT (the O(changed × store) fix): the working-set scan finds
4984        // incoming linkers for EVERY changed object, and does so via the single
4985        // batch pass `Store::find_links_to_any` — not one full store read per
4986        // changed object. This test pins the behavior that makes the single-pass
4987        // correct: with two DISTINCT deleted targets, the linker to EACH is pulled
4988        // into the working set and flagged. A regression that scanned for only the
4989        // first/last changed object, or that dropped the batch union, would leave
4990        // one of the two broken links unreported and fail here.
4991        let fx = Fixture::new();
4992        // Linker A → deleted target #1 (in the body).
4993        fx.write(
4994            "wiki/people/refers-sarah.md",
4995            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
4996        );
4997        // Linker B → deleted target #2 (in a typed frontmatter field — an edge the
4998        // sidecar `links` projection would miss, which is why this must be a
4999        // content scan, not a sidecar read).
5000        fx.write(
5001            "records/meetings/2026/05/kickoff.md",
5002            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\ncompany: \"[[records/companies/acme]]\"\n---\n\n# Kickoff\n",
5003        );
5004        // The log says BOTH targets were deleted in this window.
5005        fx.write(
5006            "log.md",
5007            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n\n## [2026-05-22 10:05] delete | records/companies/acme\nremoved\n",
5008        );
5009
5010        let issues = validate_working_set(&fx.store(), None).unwrap();
5011        assert!(
5012            issues
5013                .iter()
5014                .any(|i| i.file == *"wiki/people/refers-sarah.md"
5015                    && i.code == codes::WIKI_LINK_BROKEN),
5016            "linker to the FIRST deleted target must be pulled in and flagged: {issues:#?}"
5017        );
5018        assert!(
5019            issues
5020                .iter()
5021                .any(|i| i.file == *"records/meetings/2026/05/kickoff.md"
5022                    && i.code == codes::WIKI_LINK_BROKEN),
5023            "linker to the SECOND deleted target (typed-field edge) must also be \
5024             pulled in and flagged — proves the scan covers the whole changed set, \
5025             not just one object: {issues:#?}"
5026        );
5027    }
5028
5029    #[test]
5030    fn frontmatter_block_sequence_links_each_get_their_own_line() {
5031        // Each block-sequence wiki-link reports on its own source line.
5032        let fx = Fixture::new();
5033        // Neither target exists → two WIKI_LINK_BROKEN, on different lines.
5034        fx.write(
5035            "records/meetings/m.md",
5036            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nparticipants:\n  - [[records/contacts/ghost1]]\n  - [[records/contacts/ghost2]]\n---\n\n# M\n",
5037        );
5038        let issues = fx.store_all();
5039        let broken_lines: BTreeSet<Option<u32>> = issues
5040            .iter()
5041            .filter(|i| i.code == codes::WIKI_LINK_BROKEN)
5042            .map(|i| i.line)
5043            .collect();
5044        assert_eq!(
5045            broken_lines.len(),
5046            2,
5047            "two distinct broken-link lines: {issues:#?}"
5048        );
5049    }
5050
5051    /// Every code in `mod codes` must appear as a row in SPEC.md § Validation —
5052    /// the SPEC table is the declared "complete vocabulary" an agent branches on,
5053    /// and the module doc-comment promises this code implements "exactly those
5054    /// codes — no more, no fewer." This guards against the code/SPEC drift where a
5055    /// new validation code is added to the engine but never documented.
5056    #[test]
5057    fn every_code_constant_is_documented_in_spec() {
5058        // Parse the canonical constant *values* straight out of this module's
5059        // source, so a future `pub const X: &str = "X";` is covered with no test
5060        // edit. Format is uniform: `    pub const NAME: &str = "VALUE";`.
5061        let this_src = include_str!("validate.rs");
5062        let mut codes_in_module: Vec<String> = Vec::new();
5063        let mut in_codes_mod = false;
5064        for line in this_src.lines() {
5065            let t = line.trim();
5066            if t.starts_with("pub mod codes") {
5067                in_codes_mod = true;
5068                continue;
5069            }
5070            // The `mod codes` block ends at its closing brace at column 0.
5071            if in_codes_mod && line == "}" {
5072                break;
5073            }
5074            if in_codes_mod {
5075                if let Some(rest) = t.strip_prefix("pub const ") {
5076                    // rest = `NAME: &str = "VALUE";`
5077                    let value = rest
5078                        .split_once('=')
5079                        .map(|(_, v)| v.trim())
5080                        .and_then(|v| v.strip_prefix('"'))
5081                        .and_then(|v| v.strip_suffix("\";"))
5082                        .unwrap_or_else(|| panic!("unparseable code constant line: {line:?}"));
5083                    codes_in_module.push(value.to_string());
5084                }
5085            }
5086        }
5087        assert!(
5088            codes_in_module.len() >= 36,
5089            "parsed only {} code constants from `mod codes`; the parser likely \
5090             broke against a source-format change",
5091            codes_in_module.len()
5092        );
5093
5094        // SPEC.md lives at the repo root, two levels up from this crate's manifest.
5095        let spec_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../../SPEC.md");
5096        let spec = fs::read_to_string(&spec_path)
5097            .unwrap_or_else(|e| panic!("cannot read {}: {e}", spec_path.display()));
5098
5099        // Each code must appear as a SPEC § Validation table cell: `` | `CODE` | ``.
5100        let missing: Vec<&String> = codes_in_module
5101            .iter()
5102            .filter(|code| !spec.contains(&format!("| `{code}` |")))
5103            .collect();
5104        assert!(
5105            missing.is_empty(),
5106            "validation codes emitted by the engine but absent from SPEC.md \
5107             § Validation (the declared complete vocabulary): {missing:?}"
5108        );
5109    }
5110}