dbmd-cli 0.2.2 - Docs.rs

//! Block 7 — end-to-end **agent-eval** harness (deterministic, CI-stable).
//!
//! The plan (plans/db-md-rust-toolkit.md, Block 7, lines 536-552) asks for an
//! eval that "wires a Claude Code session (or scripted equivalent) to a temp
//! store via `dbmd`" and "records every `dbmd` invocation … so lifecycle
//! ordering can be asserted." It explicitly permits "a Claude Code session OR
//! scripted agent." We use a **scripted curator** so the eval is a stable,
//! repeatable CI gate with zero LLM dependency — the determinism the whole
//! toolkit was built for (see the `DBMD_NOW` reproducibility hook in
//! `crates/dbmd-core/src/time.rs`) is what makes a byte-for-byte golden
//! possible at all.
//!
//! This file drives the **real release binary** (`target/release/dbmd`, per the
//! Block 7 brief) as a subprocess for every step, with the temp store as the
//! working directory (the way a real agent session runs — `cd` into the store).
//! It covers, in one place:
//!
//!   1. **`corpus-e` end-to-end** — a fixed, lifecycle-ordered sequence of real
//!      `dbmd` invocations (warm-up `log tail` → fold shipped sources → per
//!      entity `fm query` dedup → `write` → block-form links → `log` → working
//!      -set `validate`) against a temp copy of `tests/corpora/corpus-e-agent`,
//!      recording every invocation (args + exit code) to an in-test command log.
//!      The produced store is asserted (a) byte-for-byte against the committed
//!      `EXPECTED/` golden AND (b) against golden-independent **intent
//!      properties** derived from `NOTES.md` + the SPEC (so a golden that was
//!      itself regenerated from buggy output would still be caught).
//!   2. **Session-lifecycle assertions** over the recorded command log (first
//!      call is `log tail`; an `fm query` precedes each contact write; full-path
//!      wiki-links only; a `log` follows each write; `validate` in the back
//!      half; zero `index rebuild` in the operating loop; a final `log`).
//!   3. **Supporting evals** — `search` over corpus-a (20 representative
//!      queries incl. `--type`/`--in`/`--updated-after`, diffed vs the committed
//!      golden), `validate --all` over corpus-b (diffed vs `EXPECTED/
//!      validate.json`), `extract` over corpus-c (diffed vs known-good `.txt`),
//!      and the policy-refusal eval (a write against a frozen page is refused
//!      with `POLICY_FROZEN_PAGE` and leaves the file byte-identical).
//!   4. **Perf 1M tier** — an opt-in `#[ignore]` test (the 10k tier lives in
//!      `perf_budget.rs`; the 1M tier is documented in `tests/PERF.md` and never
//!      generated/run in CI).
//!
//! Run: `cargo test -p dbmd-cli --test agent_eval`
//! 1M tier: `cargo test -p dbmd-cli --test agent_eval -- --ignored perf_1m`

mod common;

use std::collections::BTreeSet;
use std::path::{Path, PathBuf};
use std::process::Command as StdCommand;
use std::sync::OnceLock;

use common::{copy_store_to_temp, corpora_dir, corpus_a, corpus_b};

// ─────────────────────────────────────────────────────────────────────────────
// The release binary (`target/release/dbmd`) — the Block 7 brief drives THIS,
// not the debug bin `assert_cmd` would pick. We (re)build it under the workspace
// target dir on every test-process start (rebuild-if-stale, NOT build-if-absent)
// so the suite always drives code built from the current tree — never a stale
// leftover from an earlier commit, even under `cargo test --workspace` (which
// builds debug) or a repeated local run where a release binary already exists.
// ─────────────────────────────────────────────────────────────────────────────

/// Absolute path to a **freshly-built** `target/release/dbmd`.
///
/// `CARGO_MANIFEST_DIR` is `<repo>/crates/dbmd-cli`; the workspace target dir is
/// `<repo>/target`. The brief is explicit that the eval drives the *optimized
/// release* artifact (not the debug bin `assert_cmd`/`cargo_bin` would pick, and
/// not via a new build-driver dependency like `escargot`), so we invoke `cargo
/// build --release -p dbmd-cli` ourselves and hand back the artifact path.
///
/// **The build is unconditional, by design.** It is NOT guarded on
/// `bin.is_file()`. A stale release binary left over from an earlier commit is a
/// soundness hazard: this whole file (the flagship byte-for-byte golden plus
/// every supporting eval) would otherwise run against pre-edit code and report
/// green while a regression ships — the failure mode is silent, not loud. Cargo
/// is the staleness oracle: when the binary is genuinely up to date the build is
/// a sub-second no-op; when any source under the dependency graph changed, cargo
/// rebuilds before we return the path. Skipping the build "because a binary
/// already exists" is exactly the bug this guards against — do not re-add an
/// `if !bin.is_file()` short-circuit.
///
/// The build runs **once per test process**, memoized in a `OnceLock`: the first
/// of the file's tests to reach here triggers the (rebuild-if-stale) build, all
/// others observe the completed result. That keeps the guarantee — every test
/// drives a binary built from the current tree — while a parallel `cargo test`
/// run does not fire seven redundant, lock-contending `cargo build` no-ops.
fn release_dbmd() -> PathBuf {
    static DBMD: OnceLock<PathBuf> = OnceLock::new();
    DBMD.get_or_init(build_release_dbmd).clone()
}

/// Run `cargo build --release -p dbmd-cli` unconditionally and return the
/// artifact path, asserting the build succeeded and the on-disk binary is
/// current with the sources cargo just saw. Called once via [`release_dbmd`].
fn build_release_dbmd() -> PathBuf {
    let repo_root = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
        .join("..")
        .join("..");
    let exe = if cfg!(windows) { "dbmd.exe" } else { "dbmd" };
    let bin = repo_root.join("target").join("release").join(exe);

    // Capture the prior artifact's mtime (if any) so we can *prove* the build
    // ran and the on-disk binary is current with the sources cargo just saw —
    // not merely that some file is present. This converts the silent
    // stale-binary failure mode into a loud assertion.
    let mtime_before = std::fs::metadata(&bin).and_then(|m| m.modified()).ok();

    // Build the CLI in release. `--release` is mandatory (the eval drives the
    // optimized artifact). Inherit stdio so a build failure is visible in the
    // test log rather than swallowed. Cargo no-ops when truly up to date and
    // rebuilds when any input changed, giving rebuild-if-stale semantics
    // without a hand-rolled (and provably wrong) freshness check.
    let status = StdCommand::new(env!("CARGO"))
        .args(["build", "--release", "-p", "dbmd-cli"])
        .current_dir(&repo_root)
        .status()
        .expect("spawn `cargo build --release -p dbmd-cli`");
    assert!(
        status.success(),
        "`cargo build --release -p dbmd-cli` failed — the agent eval drives \
         the release binary at {}",
        bin.display()
    );

    // The build reported success, so the artifact must now exist. If cargo had
    // to do work, its mtime must have advanced past what we saw before; if it
    // no-op'd, the binary was already current with the sources cargo checked.
    // Either way we are guaranteed to be driving code built from the *current*
    // tree, never a stale leftover from an earlier commit.
    let meta = std::fs::metadata(&bin).unwrap_or_else(|e| {
        panic!(
            "release binary absent after a successful `cargo build --release`: \
             {} ({e})",
            bin.display()
        )
    });
    if let (Some(before), Ok(after)) = (mtime_before, meta.modified()) {
        assert!(
            after >= before,
            "release binary at {} went backwards in time across a build \
             (before {before:?}, after {after:?}) — refusing to drive a stale \
             artifact",
            bin.display()
        );
    }

    bin
}

// ─────────────────────────────────────────────────────────────────────────────
// Command-log-recording invocation harness.
//
// Every `dbmd` call the scripted curator makes goes through `Session::run`,
// which records (args, exit code) so the lifecycle ordering can be asserted —
// the plan's "the harness records every `dbmd` invocation" requirement. The
// store is always the working directory (no `--dir`): that is how a real agent
// session runs, AND it is mandatory for the `log` append form, whose external
// -subcommand parsing cannot accept a `--dir` flag (it always operates on cwd).
// ─────────────────────────────────────────────────────────────────────────────

/// One recorded `dbmd` invocation: the argument vector (sans the binary path)
/// and the process exit code.
#[derive(Debug, Clone)]
struct Invocation {
    args: Vec<String>,
    exit_code: i32,
}

impl Invocation {
    /// The first arg that is a subcommand verb (skips the global `--json` /
    /// `--color` flags), e.g. `"write"`, `"log"`, `"fm"`, `"validate"`.
    fn verb(&self) -> Option<&str> {
        self.args
            .iter()
            .find(|a| !a.starts_with('-'))
            .map(String::as_str)
    }

    /// For a two-level command, the sub-verb after the top verb (e.g. `fm
    /// query` → `"query"`, `log tail` → `"tail"`, `index rebuild` →
    /// `"rebuild"`). `None` for single-level commands.
    fn subverb(&self) -> Option<&str> {
        let mut positionals = self.args.iter().filter(|a| !a.starts_with('-'));
        positionals.next(); // the top verb
        positionals.next().map(String::as_str)
    }

    /// `true` if any argument contains the substring `needle`.
    fn arg_contains(&self, needle: &str) -> bool {
        self.args.iter().any(|a| a.contains(needle))
    }
}

/// A scripted curator session against one temp store: holds the store root, the
/// release binary path, and the append-only command log.
struct Session {
    bin: PathBuf,
    store: PathBuf,
    /// The recorded invocations, in execution order.
    log: Vec<Invocation>,
}

impl Session {
    /// Start a session against a fresh temp copy of the `corpus-e-agent` INPUTS
    /// (`DB.md` + `sources/**` only — the agent produces everything else).
    fn open_corpus_e() -> (tempfile::TempDir, Session) {
        let src = corpora_dir().join("corpus-e-agent");
        let tmp = tempfile::TempDir::new().expect("tempdir for the corpus-e session");
        let store = tmp.path().join("store");
        std::fs::create_dir_all(&store).expect("create store dir");
        // Copy ONLY the inputs, not NOTES.md / EXPECTED/ (those are the contract,
        // not the store the agent operates on).
        copy_into(&src.join("DB.md"), &store.join("DB.md"));
        copy_tree(&src.join("sources"), &store.join("sources"));
        let session = Session {
            bin: release_dbmd(),
            store,
            log: Vec::new(),
        };
        (tmp, session)
    }

    /// Run `dbmd <args>` with the store as cwd at a pinned `DBMD_NOW`, recording
    /// the invocation. Returns `(stdout, exit_code)`. Does NOT assert success —
    /// some steps (a refusal eval, a `validate` on a store with an expected
    /// `info`) legitimately exit non-zero; callers assert what they need.
    fn run(&mut self, now: &str, args: &[&str]) -> (String, i32) {
        let output = StdCommand::new(&self.bin)
            .args(args)
            .current_dir(&self.store)
            .env("DBMD_NOW", now)
            .output()
            .expect("spawn release dbmd");
        let exit_code = output.status.code().unwrap_or(-1);
        self.log.push(Invocation {
            args: args.iter().map(|s| s.to_string()).collect(),
            exit_code,
        });
        (
            String::from_utf8_lossy(&output.stdout).into_owned(),
            exit_code,
        )
    }

    /// `run`, asserting the call succeeded (exit 0). For the write/log/query
    /// steps where a non-zero exit is a real failure.
    fn run_ok(&mut self, now: &str, args: &[&str]) -> String {
        let (stdout, code) = self.run(now, args);
        assert_eq!(
            code,
            0,
            "expected success from `dbmd {}` (cwd {}), got exit {code}; stdout:\n{stdout}",
            args.join(" "),
            self.store.display()
        );
        stdout
    }

    /// Write a markdown body to a temp file and return its path, for `--body
    /// -file`. The file lives under the store's tempdir parent so it is cleaned
    /// up with the session.
    fn body_file(&self, contents: &str) -> PathBuf {
        // Use a unique name under the store's parent (the TempDir root).
        let dir = self.store.parent().expect("store has a parent");
        let mut path;
        let mut n = 0u32;
        loop {
            path = dir.join(format!("body-{n}.md"));
            if !path.exists() {
                break;
            }
            n += 1;
        }
        std::fs::write(&path, contents).expect("write body file");
        path
    }
}

/// Copy a single file, creating parents.
fn copy_into(src: &Path, dst: &Path) {
    std::fs::create_dir_all(dst.parent().expect("dst has a parent")).expect("create parents");
    std::fs::copy(src, dst)
        .unwrap_or_else(|e| panic!("copy {} → {}: {e}", src.display(), dst.display()));
}

/// Recursive directory copy (files + subdirs).
fn copy_tree(src: &Path, dst: &Path) {
    std::fs::create_dir_all(dst).expect("create dest dir");
    for entry in std::fs::read_dir(src).unwrap_or_else(|e| panic!("read {}: {e}", src.display())) {
        let entry = entry.expect("dir entry");
        let target = dst.join(entry.file_name());
        if entry.file_type().expect("file type").is_dir() {
            copy_tree(&entry.path(), &target);
        } else {
            std::fs::copy(entry.path(), &target).expect("copy file");
        }
    }
}

// ─────────────────────────────────────────────────────────────────────────────
// The scripted curator session over corpus-e.
//
// A FIXED, lifecycle-ordered sequence. The `DBMD_NOW` for each step advances
// monotonically so timestamps are deterministic AND distinct (exercising the
// index recency ordering). Every payload — fields, summaries, bodies, links —
// is authored from the source evidence + DB.md schemas/policies/instructions
// (the curator's judgment), NOT copied from any tool output.
// ─────────────────────────────────────────────────────────────────────────────

/// Drive the full curator session against `s`. After this returns, `s.store`
/// holds the produced store and `s.log` holds the ordered command log.
fn run_curator_session(s: &mut Session) {
    // ── 1. Open + warm up ────────────────────────────────────────────────────
    // The FIRST dbmd call is `log tail` (lifecycle step 2). The log is absent
    // initially, so this returns an empty list and exit 0.
    s.run_ok("2026-05-29T17:00:00Z", &["--json", "log", "tail", "20"]);
    // The shipped `sources/` is the store's initial state — a "bulk external
    // drop" in SPEC terms. Fold it into the catalog with a SINGLE full rebuild
    // during warm-up, BEFORE the operating loop. This is the one SPEC-sanctioned
    // rebuild; the operating loop below never calls rebuild (write-through only).
    s.run_ok("2026-05-29T17:00:30Z", &["--json", "index", "rebuild"]);
    log_entry(
        s,
        "2026-05-29T17:00:40Z",
        "ingest",
        "-",
        "folded shipped sources into the catalog",
    );

    // ── 2. Operate: companies (create the company before its contacts) ───────
    // Tideform — the client. Dedup by domain first.
    s.run_ok(
        "2026-05-29T17:01:00Z",
        &[
            "--json",
            "fm",
            "query",
            "domain=tideform.com",
            "--type",
            "company",
        ],
    );
    write_record(
        s,
        "2026-05-29T17:01:00Z",
        "records/companies/tideform",
        "company",
        "Tideform — tide-and-surf forecasting app; phase-one mobile rebrand client",
        &[
            "name=Tideform",
            "domain=tideform.com",
            "industry=Consumer mobile app",
            "relationship=client",
        ],
        None,
    );
    log_entry(
        s,
        "2026-05-29T17:01:00Z",
        "create",
        "records/companies/tideform.md",
        "client company for the rebrand",
    );

    // Helio Type — the vendor.
    s.run_ok(
        "2026-05-29T17:02:00Z",
        &[
            "--json",
            "fm",
            "query",
            "domain=heliotype.com",
            "--type",
            "company",
        ],
    );
    write_record(
        s,
        "2026-05-29T17:02:00Z",
        "records/companies/helio-type",
        "company",
        "Helio Type Foundry — annual studio typeface licence vendor",
        &[
            "name=Helio Type Foundry",
            "domain=heliotype.com",
            "industry=Type foundry",
            "relationship=vendor",
        ],
        None,
    );
    log_entry(
        s,
        "2026-05-29T17:02:00Z",
        "create",
        "records/companies/helio-type.md",
        "type licence vendor",
    );

    // Northgate Coffee — the prospect.
    s.run_ok(
        "2026-05-29T17:03:00Z",
        &[
            "--json",
            "fm",
            "query",
            "domain=northgatecoffee.co",
            "--type",
            "company",
        ],
    );
    write_record(
        s,
        "2026-05-29T17:03:00Z",
        "records/companies/northgate-coffee",
        "company",
        "Northgate Coffee — small-batch roastery; packaging-redesign prospect",
        &[
            "name=Northgate Coffee",
            "domain=northgatecoffee.co",
            "industry=Coffee roastery",
            "relationship=prospect",
        ],
        None,
    );
    log_entry(
        s,
        "2026-05-29T17:03:00Z",
        "create",
        "records/companies/northgate-coffee.md",
        "inbound packaging prospect",
    );

    // Lumen Labs — "us". The schema makes `contact.company` required and Theo
    // is an independent contractor with a gmail address; anchoring him to the
    // studio (NOTES "resolution (a)") gives the required link a real target.
    // `relationship` is `partner` (NOT client/vendor/prospect — "us" is not a
    // counterparty), which the enum permits.
    write_record(
        s,
        "2026-05-29T17:04:00Z",
        "records/companies/lumen-labs",
        "company",
        "Lumen Labs — the studio (us); five-person product-design practice",
        &[
            "name=Lumen Labs",
            "domain=lumenlabs.studio",
            "industry=Product design studio",
            "relationship=partner",
        ],
        None,
    );
    log_entry(
        s,
        "2026-05-29T17:04:00Z",
        "create",
        "records/companies/lumen-labs.md",
        "own-company anchor",
    );

    // ── 3. Operate: contacts (fm query dedup precedes EACH contact write) ────
    s.run_ok(
        "2026-05-29T17:05:00Z",
        &[
            "--json",
            "fm",
            "query",
            "email=daniel.osei@tideform.com",
            "--type",
            "contact",
        ],
    );
    write_record(
        s,
        "2026-05-29T17:05:00Z",
        "records/contacts/daniel-osei",
        "contact",
        "Head of Product at Tideform; economic buyer on the Lumen rebrand engagement",
        &[
            "name=Daniel Osei",
            "email=daniel.osei@tideform.com",
            "role=Head of Product",
            "company=[[records/companies/tideform]]",
            "first_touch=2026-04-09",
            "last_touch=2026-04-14",
        ],
        None,
    );
    log_entry(
        s,
        "2026-05-29T17:05:00Z",
        "create",
        "records/contacts/daniel-osei.md",
        "Tideform buyer",
    );

    s.run_ok(
        "2026-05-29T17:06:00Z",
        &[
            "--json",
            "fm",
            "query",
            "email=mara@tideform.com",
            "--type",
            "contact",
        ],
    );
    write_record(
        s,
        "2026-05-29T17:06:00Z",
        "records/contacts/mara-lindqvist",
        "contact",
        "Design Lead at Tideform; Lumen's day-to-day contact on the rebrand",
        &[
            "name=Mara Lindqvist",
            "email=mara@tideform.com",
            "role=Design Lead",
            "company=[[records/companies/tideform]]",
            "first_touch=2026-04-09",
            "last_touch=2026-04-14",
        ],
        None,
    );
    log_entry(
        s,
        "2026-05-29T17:06:00Z",
        "create",
        "records/contacts/mara-lindqvist.md",
        "Tideform design lead",
    );

    s.run_ok(
        "2026-05-29T17:07:00Z",
        &[
            "--json",
            "fm",
            "query",
            "email=sofia@northgatecoffee.co",
            "--type",
            "contact",
        ],
    );
    write_record(
        s,
        "2026-05-29T17:07:00Z",
        "records/contacts/sofia-reyes",
        "contact",
        "Founder of Northgate Coffee; packaging-redesign enquiry",
        &[
            "name=Sofia Reyes",
            "email=sofia@northgatecoffee.co",
            "role=Founder",
            "company=[[records/companies/northgate-coffee]]",
            "first_touch=2026-05-04",
            "last_touch=2026-05-04",
        ],
        None,
    );
    log_entry(
        s,
        "2026-05-29T17:07:00Z",
        "create",
        "records/contacts/sofia-reyes.md",
        "Northgate founder",
    );

    s.run_ok(
        "2026-05-29T17:08:00Z",
        &[
            "--json",
            "fm",
            "query",
            "email=theo.vance@gmail.com",
            "--type",
            "contact",
        ],
    );
    write_record(
        s,
        "2026-05-29T17:08:00Z",
        "records/contacts/theo-vance",
        "contact",
        "Freelance motion designer contracted by Lumen for the Tideform rebrand",
        &[
            "name=Theo Vance",
            "email=theo.vance@gmail.com",
            "role=Freelance Motion Designer",
            "company=[[records/companies/lumen-labs]]",
            "first_touch=2026-04-15",
            "last_touch=2026-04-14",
        ],
        None,
    );
    log_entry(
        s,
        "2026-05-29T17:08:00Z",
        "create",
        "records/contacts/theo-vance.md",
        "contracted motion designer",
    );

    // ── 4. Operate: meeting / invoice / expense ──────────────────────────────
    let meeting_body = "# Tideform rebrand kickoff\n\nKickoff call confirming the phase-one scope and the $45k fixed fee. Derived from the transcript [[sources/transcripts/2026/04/2026-04-14-tideform-kickoff]] and confirmed by the SOW [[sources/docs/2026-04-14-tideform-sow]].\n";
    let mb = s.body_file(meeting_body);
    write_record(
        s,
        "2026-05-29T17:09:00Z",
        "records/meetings/2026-04-14-tideform-kickoff",
        "meeting",
        "Tideform rebrand kickoff; scope, 8-week term, $45k phase-one fee confirmed",
        &[
            "date=2026-04-14",
            // Block-form attendees list of full-path wiki-links (schema:
            // `attendees (required, link to records/contacts/)`).
            "attendees=[[[records/contacts/daniel-osei]], [[records/contacts/mara-lindqvist]], [[records/contacts/theo-vance]]]",
            "location=Video call",
            "duration_min=48",
        ],
        Some(&mb),
    );
    log_entry(
        s,
        "2026-05-29T17:09:00Z",
        "create",
        "records/meetings/2026/04/2026-04-14-tideform-kickoff.md",
        "kickoff from transcript",
    );

    let invoice_body = "# Invoice HT-2026-0417\n\nAnnual Helio Type studio licence. Source: [[sources/emails/2026/04/2026-04-22-helio-type-invoice]]. Payment confirmed by [[sources/docs/2026-05-06-helio-type-receipt]].\n";
    let ib = s.body_file(invoice_body);
    write_record(
        s,
        "2026-05-29T17:10:00Z",
        "records/invoices/2026-04-22-helio-type-ht-2026-0417",
        "invoice",
        "Helio Type HT-2026-0417 annual licence; $1,188 USD; paid 2026-05-06",
        &[
            "date=2026-04-22",
            "amount=1188.00",
            "vendor=[[records/companies/helio-type]]",
            "status=paid",
            "paid_at=2026-05-06",
        ],
        Some(&ib),
    );
    log_entry(
        s,
        "2026-05-29T17:10:00Z",
        "create",
        "records/invoices/2026/04/2026-04-22-helio-type-ht-2026-0417.md",
        "vendor invoice paid",
    );

    // The DB.md rule: every vendor invoice gets a MATCHING expense, linked to
    // the invoice it paid.
    let expense_body = "# Helio Type annual licence\n\nPaid via company card. Settles invoice [[records/invoices/2026/04/2026-04-22-helio-type-ht-2026-0417]].\n";
    let eb = s.body_file(expense_body);
    write_record(
        s,
        "2026-05-29T17:11:00Z",
        "records/expenses/2026-05-06-helio-type-1188",
        "expense",
        "Helio Type annual type licence; $1,188 paid 2026-05-06 via company card",
        &[
            "date=2026-05-06",
            "amount=1188.00",
            "currency=USD",
            "category=Software / type licences",
            "vendor=[[records/companies/helio-type]]",
        ],
        Some(&eb),
    );
    log_entry(
        s,
        "2026-05-29T17:11:00Z",
        "create",
        "records/expenses/2026/05/2026-05-06-helio-type-1188.md",
        "matching expense for the invoice",
    );

    // ── 5. Operate: wiki synthesis (British English per DB.md) ───────────────
    let project_body = "# Tideform rebrand\n\nPhase-one mobile rebrand for [[records/companies/tideform]]: a new visual language, a reusable component library, a marketing-site refresh, and motion design. The studio organised the work around an eight-week term with a **$45,000** fixed fee; the first design review is the week of 5 May.\n\nThe economic buyer is [[records/contacts/daniel-osei]]; [[records/contacts/mara-lindqvist]] is the day-to-day design lead and [[records/contacts/theo-vance]] handles motion. Scope and budget were confirmed at the kickoff [[records/meetings/2026/04/2026-04-14-tideform-kickoff]].\n\nDerived from [[sources/emails/2026/04/2026-04-09-tideform-project-intro]], [[sources/transcripts/2026/04/2026-04-14-tideform-kickoff]], and [[sources/docs/2026-04-14-tideform-sow]].\n";
    let wp = s.body_file(project_body);
    write_record(
        s,
        "2026-05-29T17:12:00Z",
        "wiki/projects/tideform-rebrand",
        "wiki-page",
        "Tideform phase-one mobile rebrand; $45k fixed fee, eight-week term",
        &[
            "topic=Tideform rebrand",
            "derived_from=[[[sources/emails/2026/04/2026-04-09-tideform-project-intro]], [[sources/transcripts/2026/04/2026-04-14-tideform-kickoff]], [[sources/docs/2026-04-14-tideform-sow]]]",
        ],
        Some(&wp),
    );
    log_entry(
        s,
        "2026-05-29T17:12:00Z",
        "create",
        "wiki/projects/tideform-rebrand.md",
        "flagship project synthesis",
    );

    let daniel_body = "# Daniel Osei\n\nHead of Product at [[records/companies/tideform]] and the economic buyer for the Lumen rebrand. He set the phase-one scope and budget, then delegated the day-to-day to [[records/contacts/mara-lindqvist]]. See the kickoff [[records/meetings/2026/04/2026-04-14-tideform-kickoff]] and the project [[wiki/projects/tideform-rebrand]].\n\nDerived from [[sources/emails/2026/04/2026-04-09-tideform-project-intro]], [[sources/transcripts/2026/04/2026-04-14-tideform-kickoff]], and [[sources/docs/2026-04-14-tideform-sow]].\n";
    let dp = s.body_file(daniel_body);
    write_record(
        s,
        "2026-05-29T17:13:00Z",
        "wiki/people/daniel-osei",
        "wiki-page",
        "Tideform Head of Product; economic buyer on the Lumen rebrand",
        &[
            "topic=Daniel Osei",
            "derived_from=[[[sources/emails/2026/04/2026-04-09-tideform-project-intro]], [[sources/transcripts/2026/04/2026-04-14-tideform-kickoff]], [[sources/docs/2026-04-14-tideform-sow]]]",
        ],
        Some(&dp),
    );
    log_entry(
        s,
        "2026-05-29T17:13:00Z",
        "create",
        "wiki/people/daniel-osei.md",
        "buyer bio",
    );

    let mara_body = "# Mara Lindqvist\n\nDesign Lead at [[records/companies/tideform]] and Lumen Labs day-to-day contact on the rebrand. She prioritised the component library and organised the brand-asset handover. See the kickoff [[records/meetings/2026/04/2026-04-14-tideform-kickoff]] and the project [[wiki/projects/tideform-rebrand]].\n\nDerived from [[sources/emails/2026/04/2026-04-09-tideform-project-intro]] and [[sources/transcripts/2026/04/2026-04-14-tideform-kickoff]].\n";
    let mp = s.body_file(mara_body);
    write_record(
        s,
        "2026-05-29T17:14:00Z",
        "wiki/people/mara-lindqvist",
        "wiki-page",
        "Tideform design lead; Lumen's day-to-day contact on the rebrand",
        &[
            "topic=Mara Lindqvist",
            "derived_from=[[[sources/emails/2026/04/2026-04-09-tideform-project-intro]], [[sources/transcripts/2026/04/2026-04-14-tideform-kickoff]]]",
        ],
        Some(&mp),
    );
    log_entry(
        s,
        "2026-05-29T17:14:00Z",
        "create",
        "wiki/people/mara-lindqvist.md",
        "design-lead bio",
    );

    // ── 6. Validate (working set, back half) + close ─────────────────────────
    let (_v, _code) = s.run("2026-05-29T17:20:00Z", &["--json", "validate"]);
    log_entry(
        s,
        "2026-05-29T17:20:00Z",
        "validate",
        "-",
        "working-set check",
    );
}

/// Issue a `dbmd write` for a content file with `--summary` + `--fm` pairs and
/// an optional `--body-file`, recording the invocation. The `--summary` flag is
/// ALWAYS passed (lifecycle pre-write check #4). Returns the resolved
/// store-relative path the writer printed (JSON `written`).
fn write_record(
    s: &mut Session,
    now: &str,
    path: &str,
    type_: &str,
    summary: &str,
    fm: &[&str],
    body_file: Option<&Path>,
) -> String {
    let mut args: Vec<String> = vec![
        "--json".into(),
        "write".into(),
        path.into(),
        "--type".into(),
        type_.into(),
        "--summary".into(),
        summary.into(),
    ];
    for kv in fm {
        args.push("--fm".into());
        args.push((*kv).into());
    }
    if let Some(bf) = body_file {
        args.push("--body-file".into());
        args.push(bf.to_string_lossy().into_owned());
    }
    let arg_refs: Vec<&str> = args.iter().map(String::as_str).collect();
    let stdout = s.run_ok(now, &arg_refs);
    let v: serde_json::Value = serde_json::from_str(stdout.trim())
        .unwrap_or_else(|e| panic!("`dbmd write {path}` did not emit JSON ({e}): {stdout:?}"));
    v["written"].as_str().unwrap_or_default().to_string()
}

/// Append a `dbmd log <kind> <object> -m <note>` entry, recording it. `object`
/// of `"-"` is the store-wide sentinel.
fn log_entry(s: &mut Session, now: &str, kind: &str, object: &str, note: &str) {
    s.run_ok(now, &["--json", "log", kind, object, "-m", note]);
}

// ─────────────────────────────────────────────────────────────────────────────
// 1 — corpus-e end to end: produce the store, assert intent properties AND the
//     byte-for-byte golden.
// ─────────────────────────────────────────────────────────────────────────────

/// The companies the eval REQUIRES (NOTES.md "Summary of the required entity
/// set"): the three counterparties + the studio anchor.
const REQUIRED_COMPANIES: &[&str] = &[
    "records/companies/tideform.md",
    "records/companies/helio-type.md",
    "records/companies/northgate-coffee.md",
    "records/companies/lumen-labs.md",
];

/// The contacts the eval REQUIRES.
const REQUIRED_CONTACTS: &[&str] = &[
    "records/contacts/daniel-osei.md",
    "records/contacts/mara-lindqvist.md",
    "records/contacts/sofia-reyes.md",
    "records/contacts/theo-vance.md",
];

/// The non-contact/company records the eval REQUIRES.
const REQUIRED_EVENT_RECORDS: &[&str] = &[
    "records/meetings/2026/04/2026-04-14-tideform-kickoff.md",
    "records/invoices/2026/04/2026-04-22-helio-type-ht-2026-0417.md",
    "records/expenses/2026/05/2026-05-06-helio-type-1188.md",
];

/// The wiki pages the eval REQUIRES.
const REQUIRED_WIKI: &[&str] = &[
    "wiki/projects/tideform-rebrand.md",
    "wiki/people/daniel-osei.md",
    "wiki/people/mara-lindqvist.md",
];

/// Addresses that must NOT become contacts (NOTES.md negative cases): bare
/// role / no-reply / own-inbox addresses, and the newsletter sender.
const NON_CONTACT_ADDRESSES: &[&str] = &[
    "billing@heliotype.com",
    "newsletter@designweekly.email",
    "hello@lumenlabs.studio",
    "accounts@lumenlabs.studio",
];

#[test]
fn corpus_e_agent_session_produces_the_expected_store() {
    let (_tmp, mut s) = Session::open_corpus_e();
    run_curator_session(&mut s);
    let store = s.store.clone();

    // ── A. Golden-INDEPENDENT intent properties (NOTES.md + SPEC) ────────────
    // Each required entity exists as a file with the right `type` in frontmatter.
    for (paths, expect_type) in [
        (REQUIRED_COMPANIES, "company"),
        (REQUIRED_CONTACTS, "contact"),
    ] {
        for rel in paths {
            assert_file_type(&store, rel, expect_type);
        }
    }
    assert_file_type(&store, REQUIRED_EVENT_RECORDS[0], "meeting");
    assert_file_type(&store, REQUIRED_EVENT_RECORDS[1], "invoice");
    assert_file_type(&store, REQUIRED_EVENT_RECORDS[2], "expense");
    for rel in REQUIRED_WIKI {
        assert_file_type(&store, rel, "wiki-page");
    }

    // The invoice is `paid` with `paid_at` the receipt date (NOTES requires it).
    let invoice = read(&store, REQUIRED_EVENT_RECORDS[1]);
    assert!(
        invoice.contains("status: paid"),
        "the Helio Type invoice must be status: paid (the receipt confirms payment); got:\n{invoice}"
    );
    assert!(
        invoice.contains("paid_at: '2026-05-06'") || invoice.contains("paid_at: 2026-05-06"),
        "the invoice must carry paid_at: 2026-05-06; got:\n{invoice}"
    );

    // The expense links to the invoice it paid (DB.md invoice→expense rule).
    let expense = read(&store, REQUIRED_EVENT_RECORDS[2]);
    assert!(
        expense.contains("[[records/invoices/2026/04/2026-04-22-helio-type-ht-2026-0417]]"),
        "the expense must wiki-link the invoice it settles; got:\n{expense}"
    );

    // Every contact links its company via a full-path wiki-link.
    for rel in REQUIRED_CONTACTS {
        let c = read(&store, rel);
        assert!(
            c.contains("company: '[[records/companies/")
                || c.contains("company: \"[[records/companies/"),
            "{rel} must link its company via a full-path wiki-link; got:\n{c}"
        );
    }

    // The flagship project wiki page links its evidence (records + sources) and
    // states the $45k fee, in British English ("organised").
    let project = read(&store, REQUIRED_WIKI[0]);
    for needle in [
        "[[records/companies/tideform]]",
        "[[records/contacts/daniel-osei]]",
        "[[records/meetings/2026/04/2026-04-14-tideform-kickoff]]",
        "[[sources/transcripts/2026/04/2026-04-14-tideform-kickoff]]",
        "$45,000",
        "organised",
    ] {
        assert!(
            project.contains(needle),
            "the Tideform project wiki page must contain {needle:?}; got:\n{project}"
        );
    }

    // NEGATIVE: no contact for any bare-role / own-inbox / newsletter address.
    let contacts_dir = store.join("records/contacts");
    let contact_blob = read_all_md(&contacts_dir);
    for addr in NON_CONTACT_ADDRESSES {
        assert!(
            !contact_blob.contains(addr),
            "no contact may be created for the bare-role/own-inbox/newsletter address {addr:?} \
             (DB.md agent instructions); but it appears in records/contacts/"
        );
    }

    // NEGATIVE: nothing in records/ or wiki/ is derived from the newsletter, and
    // the Tideform $45k fee is never modelled as an expense.
    let records_wiki = format!(
        "{}{}",
        read_all_md(&store.join("records")),
        read_all_md(&store.join("wiki"))
    );
    assert!(
        !records_wiki.contains("designweekly") && !records_wiki.contains("Design Weekly"),
        "no record or wiki page may be derived from the newsletter (Ignored type + transient)"
    );
    // No expense names Tideform (the $45k SOW fee is a receivable, not a cost).
    let expenses_blob = read_all_md(&store.join("records/expenses"));
    assert!(
        !expenses_blob.to_lowercase().contains("tideform"),
        "the Tideform $45k fee must NOT be modelled as an expense (it is a receivable)"
    );

    // The full index hierarchy + a well-formed log exist.
    for rel in [
        "index.md",
        "log.md",
        "sources/index.md",
        "records/index.md",
        "wiki/index.md",
    ] {
        assert!(
            store.join(rel).is_file(),
            "{rel} must exist after the session"
        );
    }
    for type_folder in [
        "sources/docs",
        "sources/emails",
        "sources/transcripts",
        "records/companies",
        "records/contacts",
        "records/meetings",
        "records/invoices",
        "records/expenses",
        "wiki/people",
        "wiki/projects",
    ] {
        assert!(
            store.join(type_folder).join("index.md").is_file(),
            "{type_folder}/index.md (type-folder index) must exist"
        );
        assert!(
            store.join(type_folder).join("index.jsonl").is_file(),
            "{type_folder}/index.jsonl (the complete twin) must exist"
        );
    }

    // ── B. validate --all is clean (zero errors / zero warnings) ─────────────
    // The lone expected signal is the `info`-level POLICY_IGNORED_TYPE_PRESENT
    // for the newsletter source — asserted explicitly, never silently ignored.
    let (vout, vcode) = s.run("2026-05-29T17:25:00Z", &["--json", "validate", "--all"]);
    assert_eq!(
        vcode, 0,
        "validate --all must exit 0 (zero errors); stdout:\n{vout}"
    );
    let report: serde_json::Value = serde_json::from_str(vout.trim())
        .unwrap_or_else(|e| panic!("validate --all must emit JSON ({e}): {vout:?}"));
    assert_eq!(
        report["summary"]["errors"], 0,
        "zero errors required; report:\n{report:#}"
    );
    assert_eq!(
        report["summary"]["warnings"], 0,
        "zero warnings required; report:\n{report:#}"
    );
    let issues = report["issues"].as_array().expect("issues array");
    assert_eq!(
        issues.len(),
        1,
        "exactly one (info) issue expected; report:\n{report:#}"
    );
    assert_eq!(issues[0]["severity"], "info");
    assert_eq!(issues[0]["code"], "POLICY_IGNORED_TYPE_PRESENT");
    assert!(
        issues[0]["file"]
            .as_str()
            .unwrap_or_default()
            .contains("designweekly-digest"),
        "the lone info must be the newsletter source; got {}",
        issues[0]
    );

    // ── C. Byte-for-byte golden — the produced store equals EXPECTED/ ────────
    // Catches any regression in the write / index-write-through / log-append /
    // canonical-serialization paths. (Run AFTER validate --all so the validate
    // call's working-set bookkeeping does not perturb the comparison — the
    // `index rebuild` during warm-up means validate --all is read-only here.)
    assert_store_matches_golden(&store);
}

/// Compare the produced store against `corpus-e-agent/EXPECTED/` byte-for-byte.
/// Only the agent-produced files are golden (records/, wiki/, the index
/// hierarchy, log.md); `DB.md` + `sources/**` content files are inputs and are
/// excluded from the golden tree, so we compare exactly the set EXPECTED ships.
fn assert_store_matches_golden(store: &Path) {
    let expected = corpora_dir().join("corpus-e-agent").join("EXPECTED");
    assert!(
        expected.is_dir(),
        "the golden tree {} must be committed",
        expected.display()
    );

    // Every file under EXPECTED/ (minus README.md, which documents the golden
    // and is not part of the store) must exist in the store and be byte-equal.
    let mut golden_rels: BTreeSet<PathBuf> = BTreeSet::new();
    for rel in walk_rel(&expected) {
        if rel == Path::new("README.md") {
            continue;
        }
        golden_rels.insert(rel.clone());
        let got = store.join(&rel);
        let want_bytes = std::fs::read(expected.join(&rel))
            .unwrap_or_else(|e| panic!("read golden {}: {e}", rel.display()));
        let got_bytes = std::fs::read(&got).unwrap_or_else(|_| {
            panic!(
                "the produced store is missing {} which EXPECTED/ pins",
                rel.display()
            )
        });
        assert!(
            want_bytes == got_bytes,
            "BYTE MISMATCH vs golden at {}:\n--- EXPECTED ---\n{}\n--- GOT ---\n{}",
            rel.display(),
            String::from_utf8_lossy(&want_bytes),
            String::from_utf8_lossy(&got_bytes),
        );
    }

    // Converse: the store must not have produced any records/wiki/index/log file
    // the golden does NOT account for (a stray extra write is a regression too).
    // We scope this to the agent-produced surface (skip DB.md + source CONTENT).
    for rel in walk_rel(store) {
        let first = rel.iter().next().and_then(|c| c.to_str()).unwrap_or("");
        let is_index_or_log = rel
            .file_name()
            .and_then(|n| n.to_str())
            .map(|n| n == "index.md" || n == "index.jsonl" || n == "log.md")
            .unwrap_or(false);
        let in_records_or_wiki = first == "records" || first == "wiki";
        // Source CONTENT files are inputs; only the source INDEX files are golden.
        let is_source_index = first == "sources" && is_index_or_log;
        let golden_governed = in_records_or_wiki
            || is_source_index
            || (rel == Path::new("index.md"))
            || (rel == Path::new("log.md"));
        if golden_governed {
            assert!(
                golden_rels.contains(&rel),
                "the store produced {} which the golden does not pin — \
                 update EXPECTED/ or fix the writer",
                rel.display()
            );
        }
    }
}

// ─────────────────────────────────────────────────────────────────────────────
// 2 — session-lifecycle assertions over the recorded command log.
// ─────────────────────────────────────────────────────────────────────────────

#[test]
fn corpus_e_command_log_satisfies_the_session_lifecycle() {
    let (_tmp, mut s) = Session::open_corpus_e();
    run_curator_session(&mut s);
    let log = &s.log;
    assert!(
        !log.is_empty(),
        "the session must have recorded invocations"
    );

    // Step 2 — the FIRST dbmd call is `log tail` (or `log since`).
    let first = &log[0];
    assert_eq!(first.verb(), Some("log"), "first call must be a `log` read");
    assert!(
        matches!(first.subverb(), Some("tail") | Some("since")),
        "first call must be `log tail` or `log since`, got `log {:?}`",
        first.subverb()
    );

    // Step 3 — for every `write` of a CONTACT, a preceding `fm query email=…`
    // exists earlier in the log (pre-write dedup check #1).
    for (i, inv) in log.iter().enumerate() {
        if is_contact_write(inv) {
            let has_preceding_email_query = log[..i].iter().any(|p| {
                p.verb() == Some("fm") && p.subverb() == Some("query") && p.arg_contains("email=")
            });
            assert!(
                has_preceding_email_query,
                "contact write at index {i} ({:?}) has no preceding `fm query email=…`",
                inv.args
            );
        }
    }

    // Step 3 — ZERO short-form wiki-links in any write / fm set (full paths
    // only). Factored into a shared guard so it can also be exercised against a
    // log that actually contains `fm set` invocations (see the focused
    // `fm_set_and_rename_*` test) — the curator session here happens to carry no
    // `fm set`, so the `fm set` arm would otherwise never be hit.
    assert_no_short_form_wiki_links(log);

    // Step 3 — every CONTENT-file `write` passes `--summary` (pre-write #4).
    for inv in log {
        if inv.verb() == Some("write") {
            assert!(
                inv.args.iter().any(|a| a == "--summary"),
                "content write {:?} must pass --summary",
                inv.args
            );
        }
    }

    // Step 3 — PER-MUTATION logging discipline: a `log <kind>` append follows
    // EVERY `write` *and* every `rename`, *immediately* (NOTES.md § Session
    // lifecycle: "a `dbmd log <kind> <object>` follows every `dbmd write` /
    // `dbmd rename`"). A weaker "some log append exists somewhere after this
    // mutation" is vacuous: the closing `log validate` append satisfies it for
    // every mutation regardless of whether each is individually logged.
    //
    // (a) Immediacy — factored into a shared guard so the `rename` arm, which the
    //     curator session never produces, is genuinely exercised by the focused
    //     `fm_set_and_rename_*` test rather than sitting vacuous here.
    assert_mutations_immediately_logged(log);

    // (b) Count parity — the number of `create`-kind `log` appends equals the
    //     number of writes. Each content write logs exactly one `log create`, so
    //     a dropped or duplicated write-log breaks the equality even if some
    //     OTHER append (the `ingest` warm-up or `validate` close) happens to sit
    //     adjacent. The `ingest`/`validate` appends are deliberately NOT
    //     `create`-kind, so they do not pad this count. (Curator-session-specific:
    //     this session performs only `write` mutations, all logged `log create`.)
    let write_count = log.iter().filter(|inv| inv.verb() == Some("write")).count();
    let create_log_appends = log
        .iter()
        .filter(|inv| inv.verb() == Some("log") && inv.subverb() == Some("create"))
        .count();
    assert_eq!(
        create_log_appends, write_count,
        "expected exactly one `log create` append per write ({write_count} writes), found \
         {create_log_appends} `log create` appends — per-write logging discipline is broken",
    );

    // Step 4 — at least one `validate` (working set) ran in the SECOND half.
    let half = log.len() / 2;
    let validate_in_back_half = log[half..].iter().any(|inv| inv.verb() == Some("validate"));
    assert!(
        validate_in_back_half,
        "a `validate` must run in the second half of the session (step 4)"
    );

    // Step 5 — ZERO `index rebuild` calls in the OPERATING LOOP. The one allowed
    // rebuild is during warm-up (the bulk-external-drop fold), which must occur
    // BEFORE the first content write. Any rebuild AT/AFTER the first write fails.
    let first_write = log
        .iter()
        .position(|inv| inv.verb() == Some("write"))
        .expect("the session performs writes");
    let rebuilds: Vec<usize> = log
        .iter()
        .enumerate()
        .filter(|(_, inv)| inv.verb() == Some("index") && inv.subverb() == Some("rebuild"))
        .map(|(i, _)| i)
        .collect();
    assert_eq!(
        rebuilds.len(),
        1,
        "exactly one `index rebuild` (the warm-up sources fold) is expected; found at {rebuilds:?}"
    );
    assert!(
        rebuilds[0] < first_write,
        "the `index rebuild` (idx {}) must be in warm-up, before the first write (idx {first_write}) — \
         no rebuild in the operating loop",
        rebuilds[0]
    );

    // Step 6 — the FINAL recorded call is a well-formed `log` append (close).
    let last = log.last().unwrap();
    assert!(
        is_log_append(last),
        "the final recorded call must be a `log <kind>` append (close), got {:?}",
        last.args
    );

    // Sanity: every recorded invocation exited successfully (the lifecycle is a
    // sequence of CORRECT operations — a crash mid-session is the worst
    // regression). The single non-success-allowed call (`validate` may exit
    // non-zero if it finds issues) DID exit 0 here, so a blanket check holds.
    for inv in log {
        assert_eq!(
            inv.exit_code, 0,
            "every lifecycle call must exit 0; {:?} exited {}",
            inv.args, inv.exit_code
        );
    }
}

// ─────────────────────────────────────────────────────────────────────────────
// 2b — focused lifecycle coverage for `fm set` + `rename`.
//
// The scripted curator session (`run_curator_session`) exercises `write` / `fm
// query` / `log`, but never `dbmd fm set` or `dbmd rename` — so the two
// lifecycle guards that name those commands (`assert_no_short_form_wiki_links`'s
// `fm set` arm and `assert_mutations_immediately_logged`'s `rename` arm, per
// NOTES.md § Session lifecycle: "follows every `dbmd write` / `dbmd rename`")
// would otherwise be evaluated against an empty input set and pass vacuously.
//
// This test drives REAL `fm set` and `rename` invocations through the same
// release binary + recording `Session`, then asserts BOTH directions:
//   (1) the guards PASS on a valid log that contains a full-path `fm set` value
//       and a `rename` immediately followed by a `log rename` append; AND
//   (2) the guards' non-panicking `check_*` cores return `Err` on adversarial
//       logs — a short-form `[[name]]` in an `fm set`, and a `rename` NOT
//       followed by a `log` — proving they actually bite the failure each is
//       meant to catch (without touching the process-global panic hook).
// ─────────────────────────────────────────────────────────────────────────────

#[test]
fn fm_set_and_rename_lifecycle_guards_fire_on_real_invocations() {
    let (_tmp, mut s) = Session::open_corpus_e();

    // ── Minimal real session: warm up, write a company + a contact (the rename
    //    target + its link), then `fm set` a full-path link and `rename` the
    //    contact, logging each mutation immediately. Every call hits the release
    //    binary, so the recorded log is exactly what a real agent would produce.
    s.run_ok("2026-05-29T17:00:00Z", &["--json", "log", "tail", "20"]);
    s.run_ok("2026-05-29T17:00:30Z", &["--json", "index", "rebuild"]);

    write_record(
        &mut s,
        "2026-05-29T17:01:00Z",
        "records/companies/tideform",
        "company",
        "Tideform — phase-one rebrand client",
        &[
            "name=Tideform",
            "domain=tideform.com",
            "industry=Consumer mobile app",
            "relationship=client",
        ],
        None,
    );
    log_entry(
        &mut s,
        "2026-05-29T17:01:00Z",
        "create",
        "records/companies/tideform.md",
        "client company",
    );

    // `fm query` dedup precedes the contact write (Step 3 #1).
    s.run_ok(
        "2026-05-29T17:02:00Z",
        &[
            "--json",
            "fm",
            "query",
            "email=daniel.osei@tideform.com",
            "--type",
            "contact",
        ],
    );
    write_record(
        &mut s,
        "2026-05-29T17:02:00Z",
        "records/contacts/daniel-osei",
        "contact",
        "Head of Product at Tideform",
        &[
            "name=Daniel Osei",
            "email=daniel.osei@tideform.com",
            "role=Head of Product",
            "company=[[records/companies/tideform]]",
        ],
        None,
    );
    log_entry(
        &mut s,
        "2026-05-29T17:02:00Z",
        "create",
        "records/contacts/daniel-osei.md",
        "Tideform buyer",
    );

    // `fm set` with a FULL-PATH wiki-link value (re-affirm the company link), then
    // log the update — exercises the `fm set` arm of the short-form guard with a
    // VALID link, and the `fm set` write-through path against a real store.
    let set_out = s.run_ok(
        "2026-05-29T17:03:00Z",
        &[
            "--json",
            "fm",
            "set",
            "records/contacts/daniel-osei.md",
            "company=[[records/companies/tideform]]",
        ],
    );
    let set_json: serde_json::Value = serde_json::from_str(set_out.trim())
        .unwrap_or_else(|e| panic!("`fm set` must emit JSON ({e}): {set_out:?}"));
    assert_eq!(
        set_json["index_updated"], true,
        "fm set must keep the index write-through current"
    );
    log_entry(
        &mut s,
        "2026-05-29T17:03:00Z",
        "update",
        "records/contacts/daniel-osei.md",
        "re-affirmed company link",
    );

    // `rename` the contact, then log it (NOTES.md: a `log` follows every rename).
    let rename_out = s.run_ok(
        "2026-05-29T17:04:00Z",
        &[
            "--json",
            "rename",
            "records/contacts/daniel-osei.md",
            "records/contacts/daniel-osei-hop.md",
        ],
    );
    let rename_json: serde_json::Value = serde_json::from_str(rename_out.trim())
        .unwrap_or_else(|e| panic!("`rename` must emit JSON ({e}): {rename_out:?}"));
    assert_eq!(
        rename_json["renamed"]["to"], "records/contacts/daniel-osei-hop.md",
        "rename must report the destination it moved to"
    );
    assert!(
        rename_json["links_rewritten"].is_number(),
        "rename must report a links_rewritten count; got {rename_json}"
    );
    assert!(
        s.store
            .join("records/contacts/daniel-osei-hop.md")
            .is_file()
            && !s.store.join("records/contacts/daniel-osei.md").is_file(),
        "rename must move the file on disk"
    );
    log_entry(
        &mut s,
        "2026-05-29T17:04:00Z",
        "rename",
        "records/contacts/daniel-osei-hop.md",
        "renamed contact to reflect role",
    );

    // The store is still clean after the fm-set + rename write-through (proves the
    // mutations kept indexes + links valid, not just that they ran).
    let (_v, vcode) = s.run("2026-05-29T17:05:00Z", &["--json", "validate", "--all"]);
    assert_eq!(
        vcode, 0,
        "validate --all must be clean after fm set + rename"
    );

    let log = &s.log;

    // ── Sanity: the log actually contains a real `fm set` and a real `rename`
    //    (otherwise this test would itself be vacuous — the exact trap it guards).
    assert!(
        log.iter()
            .any(|i| i.verb() == Some("fm") && i.subverb() == Some("set")),
        "the focused session must record at least one `fm set`"
    );
    assert!(
        log.iter().any(is_rename),
        "the focused session must record at least one `rename`"
    );

    // ── (1) The guards PASS on this valid log (no short-form links; every
    //    write/rename immediately logged).
    assert_no_short_form_wiki_links(log);
    assert_mutations_immediately_logged(log);

    // ── (2) The guards FIRE on adversarial logs — proving the `fm set` arm and
    //    the `rename` arm are not vacuous. We build minimal hand-rolled logs and
    //    assert each guard's non-panicking `check_*` core returns `Err`. (Using
    //    the `check_*` core rather than `catch_unwind` on the panicking wrapper
    //    keeps this test from touching the process-global panic hook, which would
    //    race the other tests running concurrently in this binary.)

    // (2a) A short-form `[[name]]` (no `/`) in an `fm set` value must be rejected
    //      by the short-form guard's `fm set` arm.
    let bad_fm_set = vec![Invocation {
        args: vec![
            "--json".into(),
            "fm".into(),
            "set".into(),
            "records/contacts/daniel-osei.md".into(),
            "company=[[tideform]]".into(), // short-form: no `/`
        ],
        exit_code: 0,
    }];
    let err = check_no_short_form_wiki_links(&bad_fm_set)
        .expect_err("a short-form wiki-link in an `fm set` value must be rejected");
    assert!(
        err.contains("[[tideform]]") || err.contains("tideform"),
        "the rejection must name the offending short-form link; got {err:?}"
    );

    // (2b) A `rename` NOT followed by any `log` append must be rejected by the
    //      per-mutation logging guard's `rename` arm. (A trailing `fm query` read
    //      is not a `log` append.)
    let unlogged_rename = vec![
        Invocation {
            args: vec![
                "rename".into(),
                "records/contacts/a.md".into(),
                "records/contacts/b.md".into(),
            ],
            exit_code: 0,
        },
        Invocation {
            args: vec!["fm".into(), "query".into(), "email=x@y.z".into()],
            exit_code: 0,
        },
    ];
    let err = check_mutations_immediately_logged(&unlogged_rename)
        .expect_err("a rename not immediately followed by a `log` append must be rejected");
    assert!(
        err.contains("rename"),
        "the rejection must name the unlogged rename; got {err:?}"
    );
}

/// `true` if `inv` is a `dbmd write … --type contact …`.
fn is_contact_write(inv: &Invocation) -> bool {
    if inv.verb() != Some("write") {
        return false;
    }
    // Find the value following `--type`.
    let mut it = inv.args.iter();
    while let Some(a) = it.next() {
        if a == "--type" {
            return it.next().map(String::as_str) == Some("contact");
        }
    }
    false
}

/// `true` if `inv` is a `log` APPEND (a `log <kind>` where `<kind>` is not a
/// read sub-verb). `log tail` / `log since` are reads, not appends.
fn is_log_append(inv: &Invocation) -> bool {
    inv.verb() == Some("log") && !matches!(inv.subverb(), Some("tail") | Some("since") | None)
}

/// Extract the `target` of every `[[target]]` / `[[target|display]]` occurrence
/// in a single CLI argument string. Used to assert full-path-only links.
fn extract_wiki_link_targets(arg: &str) -> Vec<String> {
    let mut out = Vec::new();
    let bytes = arg.as_bytes();
    let mut i = 0;
    while i + 1 < bytes.len() {
        if bytes[i] == b'[' && bytes[i + 1] == b'[' {
            if let Some(close) = arg[i + 2..].find("]]") {
                let inner = &arg[i + 2..i + 2 + close];
                let target = inner.split('|').next().unwrap_or(inner).trim();
                if !target.is_empty() {
                    out.push(target.to_string());
                }
                i = i + 2 + close + 2;
                continue;
            }
        }
        i += 1;
    }
    out
}

/// `true` if `inv` is a `dbmd rename <old> <new>`.
fn is_rename(inv: &Invocation) -> bool {
    inv.verb() == Some("rename")
}

/// Shared guard — Step 3 link discipline: ZERO short-form wiki-links in any
/// `write` OR `fm set` value (full store-relative paths only; a short-form link
/// is `[[name]]` with no `/`). Scans every arg of each such invocation for a
/// `[[…]]` target and asserts it contains `/`.
///
/// Both write surfaces that take a wiki-link value (`write --fm k=[[…]]` and
/// `fm set <file> k=[[…]]`) are covered. Pulled out of the curator-log test so
/// the `fm set` arm — which that session never produces — is exercised by the
/// focused `fm_set_and_rename_*` test against a log that actually contains one.
///
/// Split into a non-panicking `check_*` core (returns the first offending
/// invocation's message) and an `assert_*` wrapper, so the focused test can
/// assert the guard *fires* by inspecting an `Err` rather than swapping the
/// process-global panic hook (which would race concurrent tests).
fn check_no_short_form_wiki_links(log: &[Invocation]) -> Result<(), String> {
    for inv in log {
        let is_write = inv.verb() == Some("write");
        let is_fm_set = inv.verb() == Some("fm") && inv.subverb() == Some("set");
        if is_write || is_fm_set {
            for a in &inv.args {
                for link in extract_wiki_link_targets(a) {
                    if !link.contains('/') {
                        return Err(format!(
                            "short-form wiki-link {link:?} in {:?} — full store-relative paths only",
                            inv.args
                        ));
                    }
                }
            }
        }
    }
    Ok(())
}

fn assert_no_short_form_wiki_links(log: &[Invocation]) {
    if let Err(msg) = check_no_short_form_wiki_links(log) {
        panic!("{msg}");
    }
}

/// Shared guard — Step 3 per-mutation logging discipline: a `log <kind>` append
/// follows EVERY `write` *and* every `rename`, **immediately** (NOTES.md §
/// Session lifecycle: "a `dbmd log <kind> <object>` follows every `dbmd write` /
/// `dbmd rename`"). The immediate-next-call form is the non-vacuous one: a
/// weaker "some append exists later" is satisfied by the closing `log validate`
/// for every mutation regardless of whether each is individually logged.
///
/// The `rename` arm is the coverage the curator session lacks; the focused
/// `fm_set_and_rename_*` test drives a real `rename` through this guard. Same
/// `check_*`/`assert_*` split as above, for the same panic-hook-free reason.
fn check_mutations_immediately_logged(log: &[Invocation]) -> Result<(), String> {
    for (i, inv) in log.iter().enumerate() {
        if inv.verb() == Some("write") || is_rename(inv) {
            let next = log.get(i + 1);
            if !next.is_some_and(is_log_append) {
                return Err(format!(
                    "{} at index {i} ({:?}) is not IMMEDIATELY followed by a `log <kind>` append; \
                     next call was {:?} — each write/rename must be logged right after it",
                    inv.verb().unwrap_or("?"),
                    inv.args,
                    next.map(|n| &n.args),
                ));
            }
        }
    }
    Ok(())
}

fn assert_mutations_immediately_logged(log: &[Invocation]) {
    if let Err(msg) = check_mutations_immediately_logged(log) {
        panic!("{msg}");
    }
}

// ─────────────────────────────────────────────────────────────────────────────
// 3 — supporting evals.
// ─────────────────────────────────────────────────────────────────────────────

/// One case in the corpus-a agent-eval search golden.
#[derive(serde::Deserialize)]
struct SearchCase {
    query: String,
    #[serde(default)]
    args: Vec<String>,
    matches: Vec<String>,
}

#[derive(serde::Deserialize)]
struct SearchGolden {
    queries: Vec<SearchCase>,
}

/// `dbmd search` over corpus-a: 20 representative queries (incl. `--type` /
/// `--in` / `--updated-after`) each return EXACTLY the golden file set, in both
/// text and `--json` modes, driving the release binary.
#[test]
fn search_eval_over_corpus_a_matches_golden() {
    let bin = release_dbmd();
    let golden_path = corpus_a().join("EXPECTED").join("search-agent-eval.json");
    let raw = std::fs::read_to_string(&golden_path)
        .unwrap_or_else(|_| panic!("{} is committed", golden_path.display()));
    let golden: SearchGolden =
        serde_json::from_str(&raw).expect("search-agent-eval.json is valid JSON");
    assert_eq!(
        golden.queries.len(),
        20,
        "the agent-eval search golden pins exactly 20 representative queries"
    );

    // Coverage guarantee: the 20 must collectively exercise --type, --in, AND
    // --updated-after (the plan's explicit requirement).
    let all_args: Vec<&str> = golden
        .queries
        .iter()
        .flat_map(|q| q.args.iter().map(String::as_str))
        .collect();
    for required in ["--type", "--in", "--updated-after"] {
        assert!(
            all_args.contains(&required),
            "the 20-query set must include at least one {required} case"
        );
    }

    for case in &golden.queries {
        let want: BTreeSet<&str> = case.matches.iter().map(String::as_str).collect();

        // Text mode: `file:line: text`; collect the distinct file column.
        let text_out = run_capture(&bin, &corpus_a(), {
            let mut v = vec!["search", case.query.as_str()];
            v.extend(case.args.iter().map(String::as_str));
            v
        });
        let got_text: BTreeSet<String> = text_out
            .lines()
            .filter(|l| !l.trim().is_empty())
            .map(|l| l.split(':').next().unwrap_or("").to_string())
            .collect();
        let got_text_refs: BTreeSet<&str> = got_text.iter().map(String::as_str).collect();
        assert_eq!(
            got_text_refs, want,
            "text-mode search {:?} {:?} returned the wrong file set",
            case.query, case.args
        );

        // JSON mode: an array of objects each with a `file` (or `path`) field.
        let json_out = run_capture(&bin, &corpus_a(), {
            let mut v = vec!["--json", "search", case.query.as_str()];
            v.extend(case.args.iter().map(String::as_str));
            v
        });
        let parsed: serde_json::Value = serde_json::from_str(json_out.trim()).unwrap_or_else(|e| {
            panic!(
                "search --json {:?} {:?} must emit JSON ({e}): {json_out:?}",
                case.query, case.args
            )
        });
        let got_json: BTreeSet<String> = json_match_files(&parsed);
        let got_json_refs: BTreeSet<&str> = got_json.iter().map(String::as_str).collect();
        assert_eq!(
            got_json_refs, want,
            "json-mode search {:?} {:?} returned the wrong file set",
            case.query, case.args
        );
    }
}

/// `dbmd validate --all` over corpus-b reproduces EXACTLY the committed
/// `EXPECTED/validate.json` issue set (the designed-to-fail store's contract).
#[test]
fn validate_eval_over_corpus_b_matches_golden() {
    let bin = release_dbmd();
    let out = run_capture(&bin, &corpus_b(), vec!["--json", "validate", "--all"]);
    let report: serde_json::Value =
        serde_json::from_str(out.trim()).expect("validate --all emits JSON on corpus-b");

    let golden: serde_json::Value = {
        let raw = std::fs::read_to_string(corpus_b().join("EXPECTED").join("validate.json"))
            .expect("corpus-b EXPECTED/validate.json is committed");
        serde_json::from_str(&raw).expect("EXPECTED/validate.json is valid JSON")
    };

    // Compare the summary tallies and the issue multiset (the stable fields:
    // code, severity, file, line, key). Ordering is not contractual.
    assert_eq!(
        report["summary"], golden["summary"],
        "corpus-b validate --all summary must match the golden"
    );
    let live = issue_multiset(&report["issues"]);
    let want = issue_multiset(&golden["issues"]);
    assert_eq!(
        live, want,
        "corpus-b validate --all issue set must equal EXPECTED/validate.json"
    );
    // Sanity: this is the broken store — it MUST report errors (a clean result
    // would mean validation silently went blind).
    assert!(
        report["summary"]["errors"].as_u64().unwrap_or(0) > 0,
        "corpus-b is the designed-to-fail store; validate must report errors"
    );
}

/// `dbmd extract` over corpus-c: each text-bearing fixture's output matches its
/// known-good `.txt` (token-normalized — decoders agree on words, differ on
/// layout), the image-only PDF yields empty, and the encrypted PDF is refused.
/// (The exhaustive per-fixture pass lives in `extract_e2e.rs`; this is the
/// agent-eval slice the plan names — `dbmd extract over corpus-c diffed vs
/// known-good`.)
#[test]
fn extract_eval_over_corpus_c_matches_known_good() {
    let bin = release_dbmd();
    let docs = corpora_dir()
        .join("corpus-c-formats")
        .join("sources")
        .join("docs");

    // Text-bearing fixtures compared token-normalized (whitespace-run-agnostic).
    for fixture in [
        "text.pdf",
        "weird-fonts.pdf",
        "sample.docx",
        "sample.xlsx",
        "sample.epub",
        "sample.html",
    ] {
        let doc = docs.join(fixture);
        let known = docs.join(format!("{fixture}.txt"));
        assert!(doc.is_file(), "corpus-c fixture {fixture} must exist");
        assert!(
            known.is_file(),
            "corpus-c known-good {fixture}.txt must exist"
        );

        let out = run_capture(&bin, &docs, vec!["extract", doc.to_str().unwrap()]);
        let want = std::fs::read_to_string(&known).expect("read known-good");
        assert_eq!(
            normalize_tokens(&out),
            normalize_tokens(&want),
            "extract of {fixture} disagrees (token-normalized) with its known-good .txt"
        );
        assert!(
            !normalize_tokens(&out).is_empty(),
            "extract of {fixture} produced no text"
        );
    }

    // image-only.pdf: no text layer → empty out, never hallucinated text.
    let image_only = docs.join("image-only.pdf");
    if image_only.is_file() {
        let out = run_capture(&bin, &docs, vec!["extract", image_only.to_str().unwrap()]);
        assert!(
            out.trim().is_empty(),
            "image-only.pdf must extract to empty (no hallucinated text); got {out:?}"
        );
    }

    // encrypted.pdf: must FAIL cleanly with DOCUMENT_ENCRYPTED, emit nothing.
    let encrypted = docs.join("encrypted.pdf");
    if encrypted.is_file() {
        let output = StdCommand::new(&bin)
            .args(["--json", "extract", encrypted.to_str().unwrap()])
            .current_dir(&docs)
            .output()
            .expect("spawn dbmd extract on encrypted.pdf");
        assert!(
            !output.status.success(),
            "an encrypted PDF must be refused (non-zero exit)"
        );
        assert!(
            output.stdout.is_empty(),
            "a refused extract must emit nothing to stdout"
        );
        let err: serde_json::Value =
            serde_json::from_str(String::from_utf8_lossy(&output.stderr).trim())
                .expect("encrypted-refusal error is JSON under --json");
        assert_eq!(
            err["error"]["code"], "DOCUMENT_ENCRYPTED",
            "the refusal must carry the DOCUMENT_ENCRYPTED code; got {}",
            err["error"]
        );
    }
}

/// Policy-refusal eval: the agent attempts a `write` against corpus-b's frozen
/// page; it is refused with structured `POLICY_FROZEN_PAGE`, exits non-zero, the
/// file is byte-identical, and the recovery move is one of the two valid options
/// (escalate, or write to an alternate path).
#[test]
fn policy_refusal_eval_refuses_and_leaves_file_byte_identical() {
    let bin = release_dbmd();
    // Operate on a temp copy so the committed corpus is never touched.
    let (_tmp, store) = copy_store_to_temp(&corpus_b());
    let frozen_rel = "records/decisions/2026-q1-strategy.md";
    let frozen_abs = store.join(frozen_rel);
    assert!(
        frozen_abs.is_file(),
        "the frozen fixture must exist in corpus-b"
    );
    let before = std::fs::read(&frozen_abs).expect("read frozen before");

    // The agent attempts to overwrite the frozen decision page.
    let output = StdCommand::new(&bin)
        .args([
            "--json",
            "write",
            "records/decisions/2026-q1-strategy",
            "--type",
            "decision",
            "--summary",
            "overwrite attempt",
        ])
        .current_dir(&store)
        .output()
        .expect("spawn dbmd write on frozen page");

    // Refused: non-zero exit (exit 4 = ExitCode::Policy), structured error.
    assert!(
        !output.status.success(),
        "a write to a frozen page must be refused (non-zero exit)"
    );
    assert_eq!(
        output.status.code(),
        Some(4),
        "a frozen-page refusal exits 4 (ExitCode::Policy)"
    );
    assert!(
        output.stdout.is_empty(),
        "a refused write must print nothing to stdout (no success object)"
    );
    let err: serde_json::Value =
        serde_json::from_str(String::from_utf8_lossy(&output.stderr).trim())
            .expect("the refusal error is JSON under --json");
    assert_eq!(
        err["error"]["code"], "POLICY_FROZEN_PAGE",
        "the refusal must carry the structured POLICY_FROZEN_PAGE code; got {}",
        err["error"]
    );
    assert!(
        err["error"]["message"]
            .as_str()
            .unwrap_or_default()
            .contains(frozen_rel),
        "the refusal message must name the frozen path {frozen_rel:?}; got {}",
        err["error"]["message"]
    );

    // The frozen file is byte-for-byte unchanged (no write occurred).
    let after = std::fs::read(&frozen_abs).expect("read frozen after");
    assert!(
        before == after,
        "the frozen page must be byte-for-byte unchanged after a refused write"
    );

    // Recovery: writing to an ALTERNATE (non-frozen) path succeeds — one of the
    // two valid recovery moves (the other, escalate-to-operator, is a no-op
    // against the store). This proves the refusal is path-scoped, not a wedge.
    let recover = StdCommand::new(&bin)
        .args([
            "--json",
            "write",
            "records/decisions/2026-q1-strategy-revised",
            "--type",
            "decision",
            "--summary",
            "revised strategy (alternate, non-frozen path)",
        ])
        .current_dir(&store)
        .env("DBMD_NOW", "2026-05-29T18:00:00Z")
        .output()
        .expect("spawn dbmd write on the alternate path");
    assert!(
        recover.status.success(),
        "writing to a non-frozen alternate path must succeed (valid recovery); stderr:\n{}",
        String::from_utf8_lossy(&recover.stderr)
    );
    assert!(
        store
            .join("records/decisions/2026-q1-strategy-revised.md")
            .is_file(),
        "the alternate-path recovery write must have created the file"
    );
}

// ─────────────────────────────────────────────────────────────────────────────
// 4 — perf 1M tier (opt-in, #[ignore]). The 10k tier lives in perf_budget.rs.
//     This is the documented, opt-in scale gate (tests/PERF.md "1M tier").
//     It is NEVER generated/run in CI — only via `-- --ignored perf_1m`.
// ─────────────────────────────────────────────────────────────────────────────

/// Opt-in 1M-tier perf gate: generate the ~1M-file `corpus-d-scale` and assert
/// the loop ops stay flat in store size (within the plan's 1M budgets) while the
/// sweep ops stay within their linear budgets. Minutes + several GB of disk;
/// `#[ignore]` so `cargo test` never runs it. Invoke explicitly:
/// `cargo test -p dbmd-cli --test agent_eval -- --ignored perf_1m`.
#[test]
#[ignore = "1M-tier perf: opt-in only (minutes + GB of disk); run with `-- --ignored perf_1m`"]
fn perf_1m_loop_ops_stay_flat_and_sweeps_stay_in_budget() {
    use std::time::{Duration, Instant};

    let bin = release_dbmd();
    let tmp = tempfile::TempDir::new().expect("tempdir for the 1M scale corpus");

    // 1. Compile + run the std-only generator at the `1m` tier.
    let gen_src = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
        .join("..")
        .join("..")
        .join("tests")
        .join("gen-scale.rs");
    assert!(
        gen_src.is_file(),
        "tests/gen-scale.rs (the scale generator) must exist"
    );
    let gen_bin = tmp.path().join(if cfg!(windows) {
        "gen-scale.exe"
    } else {
        "gen-scale"
    });
    let compile = StdCommand::new("rustc")
        .args(["-O"])
        .arg(&gen_src)
        .arg("-o")
        .arg(&gen_bin)
        .status()
        .expect("compile gen-scale.rs");
    assert!(
        compile.success(),
        "gen-scale.rs must compile with `rustc -O`"
    );
    let store = tmp.path().join("corpus-d-scale-1m");
    let run = StdCommand::new(&gen_bin)
        .args(["1m"])
        .arg(&store)
        .arg("--force")
        .status()
        .expect("run gen-scale 1m");
    assert!(
        run.success(),
        "gen-scale 1m must generate the corpus cleanly"
    );

    // 2. Reach the index-rebuild fixed point (same precondition as the 10k gate)
    //    so the read-only sweeps time against a valid store.
    let rebuild = StdCommand::new(&bin)
        .args(["index", "rebuild"])
        .current_dir(&store)
        .stdout(std::process::Stdio::null())
        .stderr(std::process::Stdio::null())
        .status()
        .expect("index rebuild on the 1M corpus");
    assert!(
        rebuild.success(),
        "index rebuild on the 1M corpus must succeed"
    );

    // 3. Time helper: median of a few subprocess runs (warm cache).
    let time_median = |args: &[&str]| -> Duration {
        for _ in 0..1 {
            let _ = StdCommand::new(&bin)
                .args(args)
                .current_dir(&store)
                .stdout(std::process::Stdio::null())
                .stderr(std::process::Stdio::null())
                .status();
        }
        let mut samples: Vec<Duration> = (0..3)
            .map(|_| {
                let start = Instant::now();
                let _ = StdCommand::new(&bin)
                    .args(args)
                    .current_dir(&store)
                    .stdout(std::process::Stdio::null())
                    .stderr(std::process::Stdio::null())
                    .status()
                    .expect("spawn dbmd");
                start.elapsed()
            })
            .collect();
        samples.sort();
        samples[samples.len() / 2]
    };

    // 4. Loop budgets @1M (plan line 501), with the same CI headroom factor the
    //    10k gate uses; these must stay FLAT in store size.
    const SLACK: u32 = 6;
    let log_tail = time_median(&["log", "tail", "20"]);
    assert!(
        log_tail <= Duration::from_millis(50) * SLACK,
        "log tail 20 @1M {log_tail:?} exceeds the flat budget (50ms × {SLACK})"
    );
    let fm_query = time_median(&["fm", "query", "status=active", "--type", "company"]);
    assert!(
        fm_query <= Duration::from_secs(2) * SLACK,
        "fm query @1M {fm_query:?} exceeds the flat budget (2s × {SLACK})"
    );
    let search = time_median(&["search", "Kickoff", "--type", "email"]);
    assert!(
        search <= Duration::from_secs(2) * SLACK,
        "search --type @1M {search:?} exceeds the flat budget (2s × {SLACK})"
    );

    // 5. Sweep budgets @1M (linear, off-loop).
    let validate_all = time_median(&["validate", "--all"]);
    assert!(
        validate_all <= Duration::from_secs(60) * SLACK,
        "validate --all @1M {validate_all:?} exceeds the linear budget (60s × {SLACK})"
    );
    let stats = time_median(&["stats"]);
    assert!(
        stats <= Duration::from_secs(60) * SLACK,
        "stats @1M {stats:?} exceeds the linear budget (60s × {SLACK})"
    );

    eprintln!(
        "[perf 1M] log_tail={log_tail:?} fm_query={fm_query:?} search={search:?} \
         validate_all={validate_all:?} stats={stats:?}"
    );
}

// ─────────────────────────────────────────────────────────────────────────────
// Small shared helpers.
// ─────────────────────────────────────────────────────────────────────────────

/// Run `dbmd <args>` with `dir` as cwd, return stdout (lossy UTF-8). Does not
/// assert success — search/validate legitimately exit non-zero with output.
fn run_capture(bin: &Path, dir: &Path, args: Vec<&str>) -> String {
    let out = StdCommand::new(bin)
        .args(&args)
        .current_dir(dir)
        .output()
        .unwrap_or_else(|e| panic!("spawn dbmd {args:?}: {e}"));
    String::from_utf8_lossy(&out.stdout).into_owned()
}

/// Read a store file to a string (panicking with the path on failure).
fn read(store: &Path, rel: &str) -> String {
    std::fs::read_to_string(store.join(rel))
        .unwrap_or_else(|e| panic!("read {}: {e}", store.join(rel).display()))
}

/// Concatenate the text of every `.md` file directly under (and recursively
/// below) `dir` — for negative substring checks across a folder.
fn read_all_md(dir: &Path) -> String {
    let mut blob = String::new();
    fn walk(dir: &Path, blob: &mut String) {
        let Ok(entries) = std::fs::read_dir(dir) else {
            return;
        };
        for entry in entries.flatten() {
            let p = entry.path();
            if p.is_dir() {
                walk(&p, blob);
            } else if p.extension().and_then(|e| e.to_str()) == Some("md") {
                if let Ok(s) = std::fs::read_to_string(&p) {
                    blob.push_str(&s);
                    blob.push('\n');
                }
            }
        }
    }
    walk(dir, &mut blob);
    blob
}

/// Assert a store file's frontmatter declares `type: <expect>`.
fn assert_file_type(store: &Path, rel: &str, expect: &str) {
    let abs = store.join(rel);
    assert!(abs.is_file(), "required file {rel} is missing");
    let text = read(store, rel);
    assert!(
        text.contains(&format!("type: {expect}\n")),
        "{rel} must declare `type: {expect}` in frontmatter; got:\n{text}"
    );
}

/// Every file path under `root`, relative to `root`, sorted.
fn walk_rel(root: &Path) -> Vec<PathBuf> {
    let mut out = Vec::new();
    fn walk(base: &Path, dir: &Path, out: &mut Vec<PathBuf>) {
        let Ok(entries) = std::fs::read_dir(dir) else {
            return;
        };
        for entry in entries.flatten() {
            let p = entry.path();
            if p.is_dir() {
                walk(base, &p, out);
            } else {
                out.push(p.strip_prefix(base).expect("under base").to_path_buf());
            }
        }
    }
    walk(root, root, &mut out);
    out.sort();
    out
}

/// The set of store-relative file paths a `search --json` result names. Search
/// JSON emits an array of objects each with a `file` field (and a `line`).
fn json_match_files(v: &serde_json::Value) -> BTreeSet<String> {
    let mut out = BTreeSet::new();
    if let Some(arr) = v.as_array() {
        for item in arr {
            if let Some(f) = item.get("file").and_then(|f| f.as_str()) {
                out.insert(f.to_string());
            } else if let Some(p) = item.get("path").and_then(|p| p.as_str()) {
                out.insert(p.to_string());
            }
        }
    } else if let Some(arr) = v.get("matches").and_then(|m| m.as_array()) {
        // Tolerate a `{matches:[...]}` envelope shape.
        for item in arr {
            if let Some(f) = item.get("file").and_then(|f| f.as_str()) {
                out.insert(f.to_string());
            }
        }
    }
    out
}

/// Normalize a validate `issues` array into a sorted multiset of the stable
/// fields, so two reports compare regardless of issue ordering.
fn issue_multiset(issues: &serde_json::Value) -> Vec<(String, String, String, String, String)> {
    let mut v: Vec<(String, String, String, String, String)> = issues
        .as_array()
        .map(|arr| {
            arr.iter()
                .map(|i| {
                    (
                        i["code"].as_str().unwrap_or_default().to_string(),
                        i["severity"].as_str().unwrap_or_default().to_string(),
                        i["file"].as_str().unwrap_or_default().to_string(),
                        i.get("line").map(|l| l.to_string()).unwrap_or_default(),
                        i.get("key").map(|k| k.to_string()).unwrap_or_default(),
                    )
                })
                .collect()
        })
        .unwrap_or_default();
    v.sort();
    v
}

/// Collapse every whitespace run (incl. newlines) to a single space and trim —
/// the layout-agnostic "same words, same order" comparison for extract output.
fn normalize_tokens(s: &str) -> String {
    s.split_whitespace().collect::<Vec<_>>().join(" ")
}