nab 0.12.0 - Docs.rs

//! `nab::task` — the task-engine schema (shared contract).
//!
//! These are the plain serde types the API-first web-task engine speaks: the
//! action the loop emits ([`TaskAction`]), the per-step observation
//! ([`ActionObservation`]), the rung-1 API leads ([`DiscoveredApi`]), the final
//! [`TaskOutcome`], and the terminal [`TaskStatus`].
//!
//! They live in the **library** (not the `nab` binary's `cmd` module) so both
//! consumers share one contract: the `nab` CLI executor (`cmd::task`) and the
//! `nab-mcp` self-contained sampling loop (slice 4). The executor and the loop
//! are built on top of these types; see
//! `docs/design/2026-05-31-nab-task-engine.md` §12 for the binary-boundary
//! rationale behind keeping the schema here.
//!
//! Feature-gated behind `task` (experimental until the loop is proven).

use serde::{Deserialize, Serialize};

use crate::{ApiDiscovery, ApiEndpoint};
use std::fmt::Write as _;

fn default_get_method() -> String {
    "GET".to_string()
}

/// Discover candidate API endpoints in a raw HTML body. Returns an empty vec
/// when discovery is unavailable or finds nothing (never fails). Shared by the
/// `nab` CLI (`cmd::task`) and the `nab-mcp` `task` tool.
#[must_use]
pub fn discover_apis(raw_html: &str) -> Vec<DiscoveredApi> {
    if raw_html.is_empty() {
        return Vec::new();
    }
    match ApiDiscovery::new() {
        Ok(d) => d
            .discover_from_html(raw_html)
            .into_iter()
            .map(DiscoveredApi::from)
            .collect(),
        Err(_) => Vec::new(),
    }
}

/// One action the task loop can take at a given rung (§4 of the design).
///
/// The schema is stable for the host LLM (or the slice-4 sampling loop) that
/// produces these. Variants beyond what the current executor runs are part of
/// the forward API — `submit` (rung 2) and `extract` land with the loop slice.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", tag = "kind")]
pub enum TaskAction {
    /// Rung 1: call a discovered JSON API directly.
    ApiCall {
        url: String,
        #[serde(default = "default_get_method")]
        method: String,
        #[serde(default)]
        headers: Vec<(String, String)>,
        #[serde(default)]
        body: Option<String>,
        #[serde(default)]
        extract_query: Option<String>,
    },
    /// Rung 1: evaluate page JS via `QuickJS` + the authenticated fetch bridge.
    JsEval { url: String, script: String },
    /// Rung 2: submit a (CSRF-aware) form.
    Submit {
        url: String,
        fields: Vec<(String, String)>,
    },
    /// Shape the current response to a query via the content pipeline.
    Extract { extract_query: String },
    /// Rung 3: escalate to the opt-in external-CDP browser. `url` is the page the
    /// browser should render (falls back to the seed page when omitted).
    NeedsBrowser {
        reason: String,
        #[serde(default)]
        url: Option<String>,
    },
    /// Terminal: the goal is complete.
    Done {
        #[serde(default)]
        summary: Option<String>,
    },
}

/// Terminal status of a task run. `Incomplete` / `NeedsHuman` are produced by
/// the bounded loop in later slices.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum TaskStatus {
    Done,
    Incomplete,
    NeedsHuman,
}

/// A candidate API endpoint discovered on the fetched page — a rung-1 lead the
/// host LLM can choose to call directly instead of escalating to a browser.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct DiscoveredApi {
    pub url: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub method: Option<String>,
    /// The detector that surfaced this endpoint (for debugging).
    pub source: String,
}

impl From<ApiEndpoint> for DiscoveredApi {
    fn from(e: ApiEndpoint) -> Self {
        Self {
            url: e.url,
            method: e.method,
            source: e.source,
        }
    }
}

/// The result of a `nab task` run, shaped for an LLM consumer.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct TaskOutcome {
    pub goal: String,
    pub url: String,
    /// The rung that produced the result (0 = fetch … 3 = browser).
    pub rung: u8,
    pub status: TaskStatus,
    pub content: String,
    /// Rung-1 API leads discovered on the page (may be empty). The host LLM can
    /// call these directly rather than escalating to a browser.
    #[serde(default)]
    pub discovered_apis: Vec<DiscoveredApi>,
}

/// The result of executing ONE [`TaskAction`] — the OBSERVE half of the loop
/// (§4 step 4). Returned by the executor so the host LLM (or the slice-4
/// sampling loop) can inspect the outcome and decide the next step.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ActionObservation {
    /// The rung that executed the action (1 = API/JS, 2 = submit, 3 = browser).
    pub rung: u8,
    pub status: TaskStatus,
    /// YARA-screened, token-budgeted content the action produced (empty on error
    /// or when the action is not executable in the current slice).
    pub content: String,
    /// Set when the action failed or is deferred to a later slice.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub error: Option<String>,
}

/// A single fetch request the executor hands to a [`TaskFetcher`] — a rung-1
/// `api_call` reduced to wire essentials. The fetcher owns the moat (client,
/// cookies, fingerprint, YARA screen, budget); the library owns routing.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FetchRequest {
    pub url: String,
    pub method: String,
    pub headers: Vec<(String, String)>,
    pub body: Option<String>,
}

/// The fetch backend the task executor runs actions through. Each binary injects
/// its own: the `nab` CLI wraps `cmd::fetch::fetch_screened` (full moat);
/// `nab-mcp` wraps its `FetchTool` path. The library never references a
/// binary-only fetch type, so it stays buildable on both sides of the binary
/// boundary (design §12.2). Injection (not a library-internal fetch) is what
/// lets the moat scope stay a per-binary concern.
///
/// Native async-fn-in-trait (not `async_trait`): the returned future inherits
/// the concrete impl's `Send`-ness. The CLI's `fetch_screened` future holds a
/// `RefCell` (not `Send`) and is awaited inline; the `nab-mcp` backend's future
/// is `Send`, so `run_task_loop` over it satisfies the MCP framework's `Send`
/// requirement. `async_trait(?Send)` would box both as non-`Send`, breaking the
/// MCP path.
#[allow(async_fn_in_trait)]
pub trait TaskFetcher {
    /// Execute the request through the moat and return screened, shaped content.
    async fn fetch(&self, req: FetchRequest) -> anyhow::Result<String>;

    /// Fetch a page as screened but **unconverted** HTML — used by the `submit`
    /// rung, which must parse `<form>` markup that markdown conversion would
    /// strip. The default delegates to [`fetch`](Self::fetch), which is correct
    /// only for backends whose `fetch` already returns raw HTML (the CLI's
    /// `raw_html=true` path). Backends that convert to markdown (the `nab-mcp`
    /// fetcher) MUST override this to return the raw body so form parsing works.
    async fn fetch_raw(&self, url: &str) -> anyhow::Result<String> {
        self.fetch(FetchRequest {
            url: url.to_string(),
            method: "GET".to_string(),
            headers: Vec::new(),
            body: None,
        })
        .await
    }
}

/// Execute ONE [`TaskAction`] at its rung and return the [`ActionObservation`]
/// (the ROUTE+ACT+OBSERVE of §4, steps 3-4). The shared executor both control
/// modes call — the host-driven CLI turn and the slice-4 sampling loop — with
/// the fetch backend injected as a [`TaskFetcher`].
///
/// Executes rung-1 `api_call` and rung-2 `submit`. `extract` (needs trajectory
/// state) is handled in [`run_task_loop`]; `js_eval` and `needs_browser` (rung 3,
/// opt-in CDP) are forward API; `done` is terminal. Deferred variants return an
/// honest `Incomplete` observation rather than panicking, so a driver can route
/// around them.
pub async fn execute_action<F: TaskFetcher>(
    action: &TaskAction,
    fetcher: &F,
) -> anyhow::Result<ActionObservation> {
    match action {
        TaskAction::ApiCall {
            url,
            method,
            headers,
            body,
            extract_query,
        } => {
            let req = FetchRequest {
                url: url.clone(),
                method: method.clone(),
                headers: headers.clone(),
                body: body.clone(),
            };
            match fetcher.fetch(req).await {
                Ok(content) => Ok(ActionObservation {
                    rung: 1,
                    status: TaskStatus::Done,
                    content: shape_api_response(&content, extract_query.as_deref()),
                    error: None,
                }),
                Err(e) => Ok(ActionObservation {
                    rung: 1,
                    status: TaskStatus::Incomplete,
                    content: String::new(),
                    error: Some(e.to_string()),
                }),
            }
        }
        TaskAction::Done { .. } => Ok(ActionObservation {
            rung: 0,
            status: TaskStatus::Done,
            content: String::new(),
            error: None,
        }),
        TaskAction::Submit { url, fields } => Ok(submit_form(url, fields, fetcher).await),
        TaskAction::JsEval { .. } => Ok(deferred(1, "js_eval lands in a later slice")),
        TaskAction::Extract { .. } => Ok(deferred(
            0,
            "extract is a loop-level action (shapes trajectory state); \
             not available in stateless single-action mode",
        )),
        TaskAction::NeedsBrowser { reason, .. } => Ok(deferred(
            3,
            &format!(
                "browser rung is loop-level (needs an injected browser backend); \
                 not available in stateless single-action mode: {reason}"
            ),
        )),
    }
}

/// An observation for an action that is valid schema but not executable in the
/// current slice — honest `Incomplete` with the reason, never a panic.
fn deferred(rung: u8, why: &str) -> ActionObservation {
    ActionObservation {
        rung,
        status: TaskStatus::Incomplete,
        content: String::new(),
        error: Some(why.to_string()),
    }
}

/// Per-response token cap for a rung-1 `api_call` observation. nab's promise is
/// token-minimal web access; an unbounded raw API body (some endpoints return the
/// full version history or comment tree — tens of thousands of tokens) breaks it.
/// In the brain-driven loop every observation is read back into the prompt, so the
/// raw body IS the token cost. We bound it and mark the truncation so the brain
/// knows to narrow (a tighter endpoint or `extract_query`) if it needs more.
const API_RESPONSE_TOKEN_BUDGET: usize = 4_000;

/// Shape a rung-1 `api_call` response the way nab shapes every other output:
/// honor the action's `extract_query` (the BM25-lite focus pipeline) when present,
/// then bound the result to [`API_RESPONSE_TOKEN_BUDGET`]. Applied in the library
/// so both binaries (CLI + `nab-mcp`) get the same token-minimal contract. The
/// `extract_query` field has been in the [`TaskAction::ApiCall`] schema since the
/// schema slice; this is where it finally takes effect.
fn shape_api_response(content: &str, extract_query: Option<&str>) -> String {
    let focused = match extract_query {
        Some(q) if !q.is_empty() => crate::content::focus::extract_focused(content, q).markdown,
        _ => content.to_string(),
    };
    let budgeted =
        crate::content::budget::truncate_to_budget(&focused, Some(API_RESPONSE_TOKEN_BUDGET))
            .markdown;
    // `truncate_to_budget` is markdown-block-aware; an API body is frequently one
    // giant structureless JSON blob it cannot split, so it can return it whole.
    // Guarantee the bound with a hard char-level fallback (4 chars/token) so a raw
    // response can never blow the brain's context, JSON or not.
    let hard_cap = API_RESPONSE_TOKEN_BUDGET * 4;
    if budgeted.len() > hard_cap {
        // Truncate on a UTF-8 char boundary (String::truncate panics mid-char).
        let mut end = hard_cap;
        while end > 0 && !budgeted.is_char_boundary(end) {
            end -= 1;
        }
        let mut cut = budgeted;
        cut.truncate(end);
        cut.push_str(
            "\n…[Truncated: API response exceeded the token budget — \
             narrow the request or use extract_query]",
        );
        cut
    } else {
        budgeted
    }
}

/// Build the submission [`FetchRequest`] from a fetched form page (raw HTML), the
/// page URL (for relative-action resolution), and the caller's field values.
///
/// Pure logic over [`nab::Form`](crate::Form), so it is unit-testable on a
/// fixture HTML string with no network. It parses the FIRST form on the page,
/// merges the caller's fields over the form's existing fields (which already
/// include hidden inputs, so an in-form hidden CSRF token is carried
/// automatically), resolves the action URL against `page_url`, and encodes the
/// body. A `GET` form becomes a query-string request; any other method becomes a
/// urlencoded POST with the right `Content-Type`.
///
/// Limitation (documented, not a bug): a CSRF token that lives OUTSIDE the form
/// (e.g. a `<meta>` tag, reached via the CLI's `--csrf-from` selector) is not
/// handled at this rung. The common in-form hidden-input case is. This rung also
/// handles urlencoded forms only — like `cmd_submit`, it pairs an urlencoded body
/// with the form's declared `Content-Type`, so `multipart/form-data` (file
/// upload) is out of scope.
///
/// # Errors
/// Returns an error when the page has no form or the action URL cannot resolve.
pub fn plan_form_submission(
    page_html: &str,
    page_url: &str,
    fields: &[(String, String)],
) -> anyhow::Result<FetchRequest> {
    use std::collections::HashMap;

    let mut forms = crate::Form::parse_all(page_html)?;
    if forms.is_empty() {
        anyhow::bail!("no forms found on page");
    }
    let mut form = forms.remove(0);

    let user: HashMap<String, String> = fields.iter().cloned().collect();
    form.merge_fields(&user);

    let action_url = form.resolve_action(page_url)?;
    let encoded = form.encode_urlencoded();

    if form.method.eq_ignore_ascii_case("GET") {
        let url = if action_url.contains('?') {
            format!("{action_url}&{encoded}")
        } else {
            format!("{action_url}?{encoded}")
        };
        Ok(FetchRequest {
            url,
            method: "GET".to_string(),
            headers: Vec::new(),
            body: None,
        })
    } else {
        Ok(FetchRequest {
            url: action_url,
            method: "POST".to_string(),
            headers: vec![("Content-Type".to_string(), form.content_type().to_string())],
            body: Some(encoded),
        })
    }
}

/// Execute a rung-2 `submit`: fetch the form page as raw HTML through the moat,
/// [`plan_form_submission`] the submission, then send it through the moat and
/// return the screened response. Two moat round-trips (page GET, then submit),
/// both via the injected [`TaskFetcher`]; the form logic between them is pure and
/// lib-side. Maps every failure to an honest `Incomplete` observation (mirroring
/// the `api_call` arm) rather than propagating an error, so a driver can route
/// around a failed submit.
async fn submit_form<F: TaskFetcher>(
    url: &str,
    fields: &[(String, String)],
    fetcher: &F,
) -> ActionObservation {
    let page = match fetcher.fetch_raw(url).await {
        Ok(p) => p,
        Err(e) => return deferred(2, &format!("submit: fetching the form page failed: {e}")),
    };
    let req = match plan_form_submission(&page, url, fields) {
        Ok(r) => r,
        Err(e) => return deferred(2, &format!("submit: {e}")),
    };
    match fetcher.fetch(req).await {
        Ok(content) => ActionObservation {
            rung: 2,
            status: TaskStatus::Done,
            content,
            error: None,
        },
        Err(e) => deferred(2, &format!("submit: posting the form failed: {e}")),
    }
}

/// Apply query-focused extraction (the BM25-lite content pipeline) to the
/// CURRENT response — the loop-level `extract` action (§4). Unlike the rung
/// actions in [`execute_action`], `extract` performs no network work; it reshapes
/// content already in the trajectory, so it lives in the loop (which owns that
/// state) and is classified rung 0. Returns an honest `Incomplete` when there is
/// no content to shape rather than a misleading empty `Done`.
fn extract_from_content(prior: &str, extract_query: &str) -> ActionObservation {
    if prior.is_empty() {
        return ActionObservation {
            rung: 0,
            status: TaskStatus::Incomplete,
            content: String::new(),
            error: Some("extract: no prior content available to shape".to_string()),
        };
    }
    let focused = crate::content::focus::extract_focused(prior, extract_query);
    ActionObservation {
        rung: 0,
        status: TaskStatus::Done,
        content: focused.markdown,
        error: None,
    }
}

/// Bounds on a [`run_task_loop`] run — the loop stops at the first limit hit.
#[derive(Debug, Clone)]
pub struct LoopBounds {
    /// Hard cap on the number of executed steps.
    pub max_steps: usize,
    /// Wall-clock cap across the whole run.
    pub max_wall_clock: std::time::Duration,
    /// Crude token proxy: cap on total observation content carried forward, so
    /// the prompt cannot grow unbounded.
    pub max_total_content_chars: usize,
}

impl Default for LoopBounds {
    fn default() -> Self {
        Self {
            max_steps: 12,
            max_wall_clock: std::time::Duration::from_mins(2),
            max_total_content_chars: 32_000,
        }
    }
}

/// The loop's brain: given a prompt (goal + trajectory + discovered APIs), return
/// the next action as JSON text. `nab-mcp` wraps `sampling/createMessage`; tests
/// script a fixed sequence. Native async-fn-in-trait for the same `Send`-inheritance
/// reason as [`TaskFetcher`].
#[allow(async_fn_in_trait)]
pub trait Sampler {
    /// Return the next action as a JSON object (optionally fenced in markdown).
    async fn next_action(&self, prompt: &str) -> anyhow::Result<String>;
}

/// Rung 3: the external-browser backend the loop drives when an API/form path
/// cannot complete the goal (`needs_browser`). nab never bundles Chromium — each
/// binary injects a backend that orchestrates an EXTERNAL browser over CDP (the
/// opt-in `browser` feature), applying nab's cookies + fingerprint, and returns
/// the rendered page already shaped to markdown. The default build injects
/// [`NoBrowser`], so the rung is an honest deferral rather than a hard dependency.
///
/// Native async-fn-in-trait for the same `Send`-inheritance reason as
/// [`TaskFetcher`]: the concrete backend's future inherits its own `Send`-ness.
#[allow(async_fn_in_trait)]
pub trait BrowserBackend {
    /// Render `url` in the external browser and return screened, shaped markdown.
    async fn render(&self, url: &str) -> anyhow::Result<String>;
}

/// The default rung-3 backend: no browser compiled in. Every `render` is an honest
/// error so [`run_task_loop`] turns `needs_browser` into a `delegate_to_browser`
/// deferral rather than silently failing. A binary built with the `browser`
/// feature injects a real CDP backend via [`run_task_loop_with_browser`].
pub struct NoBrowser;

impl BrowserBackend for NoBrowser {
    async fn render(&self, _url: &str) -> anyhow::Result<String> {
        anyhow::bail!(
            "browser rung unavailable: build with --features browser and inject a \
             CDP backend (or set NAB_BROWSER_CDP_WS) to enable rung 3"
        )
    }
}

/// One executed step of a task run.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct TrajectoryStep {
    pub action: TaskAction,
    pub observation: ActionObservation,
}

/// Why a [`run_task_loop`] stopped.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum LoopStop {
    /// The brain emitted a `done` action.
    Done,
    /// The `max_steps` bound was hit.
    MaxSteps,
    /// The `max_wall_clock` bound was hit.
    Timeout,
    /// The `max_total_content_chars` bound was hit.
    Budget,
    /// The sampler (brain) returned an error.
    SamplerError,
    /// The sampler's reply could not be parsed as a `TaskAction`.
    ParseError,
}

/// The result of a bounded task loop.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LoopOutcome {
    pub goal: String,
    pub stop: LoopStop,
    pub status: TaskStatus,
    pub steps: Vec<TrajectoryStep>,
    pub final_content: String,
}

/// Parse a sampler reply into a [`TaskAction`], tolerating a ```` ```json ````
/// (or bare ```` ``` ````) markdown fence around the JSON object.
fn parse_action(reply: &str) -> anyhow::Result<TaskAction> {
    let trimmed = reply.trim();
    let body = if let Some(rest) = trimmed
        .strip_prefix("```json")
        .or_else(|| trimmed.strip_prefix("```"))
    {
        rest.trim()
            .strip_suffix("```")
            .map_or(rest, str::trim)
            .trim()
    } else {
        trimmed
    };
    serde_json::from_str(body).map_err(|e| anyhow::anyhow!("could not parse action JSON: {e}"))
}

/// Build the brain prompt from the goal, seed content, discovered APIs, and the
/// trajectory so far. The seed is truncated so the prompt stays bounded.
fn build_prompt(
    goal: &str,
    seed: &str,
    discovered: &[DiscoveredApi],
    steps: &[TrajectoryStep],
) -> String {
    let mut p = String::new();
    p.push_str("Goal: ");
    p.push_str(goal);
    p.push_str("\n\n");
    p.push_str("Seed page (markdown, truncated):\n");
    let seed_cap = 4000;
    if seed.len() > seed_cap {
        p.push_str(&seed[..seed_cap]);
        p.push_str("\n…(truncated)\n\n");
    } else {
        p.push_str(seed);
        p.push_str("\n\n");
    }
    if !discovered.is_empty() {
        p.push_str("Discovered API endpoints (rung-1 leads):\n");
        for d in discovered {
            writeln!(p, "- {} {}", d.method.as_deref().unwrap_or("GET"), d.url).unwrap();
        }
        p.push('\n');
    }
    if !steps.is_empty() {
        p.push_str("Trajectory so far:\n");
        for (i, s) in steps.iter().enumerate() {
            writeln!(
                p,
                "Step {}: rung {} {:?}",
                i + 1,
                s.observation.rung,
                s.observation.status
            )
            .unwrap();
        }
        p.push('\n');
    }
    p.push_str(
        "Reply with the NEXT action as a single JSON object (TaskAction schema), e.g.\n\
         {\"kind\":\"api_call\",\"url\":\"https://...\",\"method\":\"GET\"} \
         or {\"kind\":\"done\",\"summary\":\"...\"}.\n",
    );
    p
}

/// Run the bounded brain-driven loop (§4 steps 2-6 / §9.1): seed context →
/// sample the next action → execute it via the injected `fetcher` → observe →
/// repeat, until a `done` action, a bound, or an error. Pure logic over the
/// injected `sampler` + `fetcher`, so it is fully testable without an LLM or a
/// network. The host LLM is the brain (via `sampler`); nab supplies execution.
pub async fn run_task_loop<S: Sampler, F: TaskFetcher>(
    goal: &str,
    seed: &str,
    discovered: &[DiscoveredApi],
    sampler: &S,
    fetcher: &F,
    bounds: &LoopBounds,
) -> LoopOutcome {
    run_task_loop_with_browser(goal, seed, discovered, sampler, fetcher, &NoBrowser, bounds).await
}

/// [`run_task_loop`] with an injected rung-3 [`BrowserBackend`]. When the brain
/// emits `needs_browser` carrying a `url`, the loop drives the external browser
/// (rung 3) and feeds the rendered markdown back as the observation. With no
/// `url`, and when the backend is [`NoBrowser`], it records an honest
/// `delegate_to_browser` deferral. Every other action routes exactly as in
/// [`run_task_loop`]. Pure logic over the injected backends — fully testable with
/// a mock browser, no real Chrome.
pub async fn run_task_loop_with_browser<S: Sampler, F: TaskFetcher, B: BrowserBackend>(
    goal: &str,
    seed: &str,
    discovered: &[DiscoveredApi],
    sampler: &S,
    fetcher: &F,
    browser: &B,
    bounds: &LoopBounds,
) -> LoopOutcome {
    let start = std::time::Instant::now();
    let mut steps: Vec<TrajectoryStep> = Vec::new();
    let mut content_chars: usize = 0;

    let finish = |stop: LoopStop, status: TaskStatus, steps: Vec<TrajectoryStep>| {
        let final_content = steps
            .last()
            .map(|s| s.observation.content.clone())
            .unwrap_or_default();
        LoopOutcome {
            goal: goal.to_string(),
            stop,
            status,
            steps,
            final_content,
        }
    };

    while steps.len() < bounds.max_steps {
        if start.elapsed() > bounds.max_wall_clock {
            return finish(LoopStop::Timeout, TaskStatus::Incomplete, steps);
        }
        let prompt = build_prompt(goal, seed, discovered, &steps);
        let Ok(reply) = sampler.next_action(&prompt).await else {
            return finish(LoopStop::SamplerError, TaskStatus::Incomplete, steps);
        };
        let Ok(action) = parse_action(&reply) else {
            return finish(LoopStop::ParseError, TaskStatus::Incomplete, steps);
        };
        if let TaskAction::Done { summary } = &action {
            let final_content = summary.clone().unwrap_or_else(|| {
                steps
                    .last()
                    .map(|s| s.observation.content.clone())
                    .unwrap_or_default()
            });
            return LoopOutcome {
                goal: goal.to_string(),
                stop: LoopStop::Done,
                status: TaskStatus::Done,
                steps,
                final_content,
            };
        }
        // `extract` is a loop-level action: it shapes the CURRENT response, so it
        // needs trajectory state `execute_action` (stateless) does not have. The
        // current response is the most recent step's content, else the seed page
        // before any step has run. Pure content shaping, no network → rung 0.
        let observation = if let TaskAction::Extract { extract_query } = &action {
            let prior = steps
                .last()
                .map_or(seed, |s| s.observation.content.as_str());
            extract_from_content(prior, extract_query)
        } else if let TaskAction::NeedsBrowser { url, .. } = &action {
            // Rung 3: drive the external browser for the requested page. Needs a
            // url (the loop holds seed CONTENT, not the seed URL) plus a backend.
            browser_step(url.as_deref(), browser).await
        } else {
            execute_action(&action, fetcher)
                .await
                .unwrap_or_else(|e| ActionObservation {
                    rung: 0,
                    status: TaskStatus::Incomplete,
                    content: String::new(),
                    error: Some(e.to_string()),
                })
        };
        content_chars += observation.content.len();
        steps.push(TrajectoryStep {
            action,
            observation,
        });
        if content_chars > bounds.max_total_content_chars {
            return finish(LoopStop::Budget, TaskStatus::Incomplete, steps);
        }
    }
    finish(LoopStop::MaxSteps, TaskStatus::Incomplete, steps)
}

/// Execute one rung-3 `needs_browser` step: render `url` through the injected
/// [`BrowserBackend`]. Maps a missing url plus any backend error to an honest
/// `Incomplete` (rung 3) so the brain can `delegate_to_browser` cleanly, never a
/// panic. The rendered markdown is bounded like an API response.
async fn browser_step<B: BrowserBackend>(url: Option<&str>, browser: &B) -> ActionObservation {
    let Some(url) = url.filter(|u| !u.is_empty()) else {
        return deferred(3, "needs_browser requires a url to render");
    };
    match browser.render(url).await {
        Ok(content) => ActionObservation {
            rung: 3,
            status: TaskStatus::Done,
            content: shape_api_response(&content, None),
            error: None,
        },
        Err(e) => deferred(3, &format!("delegate_to_browser: {e}")),
    }
}

// ── bench.1 measurement primitive (the kill-gate's token axis) ───────────────

/// The token cost of one API-backed task, measured two ways: nab's API-first
/// path vs. a DOM-dumping browser agent's path. The unit underlying the
/// `bench.1` kill-gate (design §6).
///
/// IMPORTANT — what this is and is NOT:
/// * This is the **token axis** of the gate, measured against a **conservative
///   browser baseline**: the raw page HTML a DOM-dumping agent feeds its LLM.
///   Real browser agents (Webwright/Playwright) feed MORE than raw HTML —
///   accessibility trees and screenshots on top, and a fresh DOM dump per step —
///   so `browser_tokens` here is a LOWER bound favorable to the browser.
/// * It is NOT the full `bench.1` pass. The kill-gate also requires beating a
///   live browser agent on median **latency**, over a 20-task corpus. That
///   head-to-head needs a running Webwright + an LLM brain + per-site auth and
///   is neither deterministic nor CI-runnable; it is the documented remaining
///   step (see `docs/design/2026-05-31-nab-task-engine.md` §6.1). This primitive
///   provides the deterministic, CI-runnable token-axis evidence that the moat
///   thesis holds, nothing more.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct TokenGap {
    /// Tokens nab feeds the host LLM: shaped seed (readability markdown) + the
    /// discovered API's JSON response.
    pub nab_tokens: usize,
    /// Tokens a DOM-dumping browser agent feeds its LLM: the raw page HTML
    /// (single dump — a conservative lower bound).
    pub browser_tokens: usize,
}

impl TokenGap {
    /// `browser_tokens / nab_tokens` — how many times more tokens the browser
    /// baseline costs. `> 1.0` means nab is cheaper. Returns `f64::INFINITY` when
    /// nab somehow costs zero tokens (degenerate empty task).
    #[must_use]
    pub fn ratio(&self) -> f64 {
        if self.nab_tokens == 0 {
            return f64::INFINITY;
        }
        self.browser_tokens as f64 / self.nab_tokens as f64
    }

    /// Whether nab's path is strictly cheaper than the browser baseline.
    #[must_use]
    pub fn nab_wins(&self) -> bool {
        self.nab_tokens < self.browser_tokens
    }
}

/// Measure the [`TokenGap`] for one API-backed task from its recorded inputs: the
/// raw seed-page HTML and the discovered API's response body.
///
/// nab's side runs the REAL shaping pipeline — readability markdown on the seed
/// (`nab::content::html::html_to_markdown_with_readability`, the `nab fetch`
/// default for article HTML) plus the API JSON returned as-is — so the
/// measurement reflects nab's actual moat, not an idealisation. The browser side
/// is the raw HTML token count (conservative, see [`TokenGap`]).
#[must_use]
pub fn token_gap(seed_html: &str, api_response: &str) -> TokenGap {
    let nab_seed = crate::content::html::html_to_markdown_with_readability(seed_html);
    let nab_tokens = crate::content::budget::estimate_tokens(&nab_seed)
        + crate::content::budget::estimate_tokens(api_response);
    let browser_tokens = crate::content::budget::estimate_tokens(seed_html);
    TokenGap {
        nab_tokens,
        browser_tokens,
    }
}

/// The median token-reduction ratio across a corpus of [`TokenGap`]s — the
/// summary statistic the `bench.1` token axis reports (design §6 uses median, not
/// mean, to resist outlier pages). Returns `None` for an empty corpus.
#[must_use]
pub fn median_ratio(gaps: &[TokenGap]) -> Option<f64> {
    if gaps.is_empty() {
        return None;
    }
    let mut ratios: Vec<f64> = gaps.iter().map(TokenGap::ratio).collect();
    ratios.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
    let mid = ratios.len() / 2;
    Some(if ratios.len().is_multiple_of(2) {
        f64::midpoint(ratios[mid - 1], ratios[mid])
    } else {
        ratios[mid]
    })
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn task_action_roundtrips_through_json() {
        let actions = vec![
            TaskAction::ApiCall {
                url: "https://x/api".into(),
                method: "POST".into(),
                headers: vec![("A".into(), "B".into())],
                body: Some("{}".into()),
                extract_query: Some("q".into()),
            },
            TaskAction::JsEval {
                url: "https://x".into(),
                script: "1".into(),
            },
            TaskAction::Submit {
                url: "https://x".into(),
                fields: vec![("f".into(), "v".into())],
            },
            TaskAction::Extract {
                extract_query: "title".into(),
            },
            TaskAction::NeedsBrowser {
                reason: "captcha".into(),
                url: None,
            },
            TaskAction::Done {
                summary: Some("ok".into()),
            },
        ];
        for a in actions {
            let s = serde_json::to_string(&a).unwrap();
            let back: TaskAction = serde_json::from_str(&s).unwrap();
            assert_eq!(a, back);
        }
    }

    #[test]
    fn api_call_defaults_method_to_get() {
        let a: TaskAction =
            serde_json::from_str(r#"{"kind":"api_call","url":"https://x"}"#).unwrap();
        match a {
            TaskAction::ApiCall { method, .. } => assert_eq!(method, "GET"),
            other => panic!("expected api_call, got {other:?}"),
        }
    }

    #[test]
    fn outcome_serializes_rung_and_status() {
        let o = TaskOutcome {
            goal: "g".into(),
            url: "u".into(),
            rung: 0,
            status: TaskStatus::Done,
            content: "c".into(),
            discovered_apis: vec![],
        };
        let s = serde_json::to_string(&o).unwrap();
        assert!(s.contains("\"rung\":0"));
        assert!(s.contains("\"status\":\"done\""));
    }

    #[test]
    fn discovered_api_maps_from_endpoint_and_roundtrips() {
        let ep = ApiEndpoint {
            url: "/api/x".into(),
            method: Some("POST".into()),
            source: "script-fetch".into(),
        };
        let d: DiscoveredApi = ep.into();
        assert_eq!(d.url, "/api/x");
        let s = serde_json::to_string(&d).unwrap();
        let back: DiscoveredApi = serde_json::from_str(&s).unwrap();
        assert_eq!(d, back);
    }

    #[test]
    fn action_observation_serializes_and_omits_absent_error() {
        let obs = ActionObservation {
            rung: 1,
            status: TaskStatus::Done,
            content: "ok".into(),
            error: None,
        };
        let s = serde_json::to_string(&obs).unwrap();
        assert!(s.contains("\"rung\":1"));
        assert!(s.contains("\"status\":\"done\""));
        // error is None -> skipped, not serialized as null.
        assert!(!s.contains("error"));
        let back: ActionObservation = serde_json::from_str(&s).unwrap();
        assert_eq!(obs, back);
    }

    #[test]
    fn discover_apis_finds_endpoints_and_skips_empty() {
        assert!(discover_apis("").is_empty());
        let html = r#"<html><body>
            <script>fetch("/api/v1/users")</script>
            <a href="/graphql">gql</a>
        </body></html>"#;
        let found = discover_apis(html);
        assert!(
            found.iter().any(|a| a.url.contains("/api/v1/users")),
            "expected the /api/v1/users endpoint, got {found:?}"
        );
    }

    /// A scripted fetcher — no network — for executor routing tests.
    struct MockFetcher {
        reply: anyhow::Result<String>,
        last: std::sync::Mutex<Option<FetchRequest>>,
        /// Form-page HTML returned by `fetch_raw` (the submit-rung page GET). When
        /// `None`, `fetch_raw` falls back to the default (delegates to `fetch`).
        form_page: Option<String>,
    }

    impl MockFetcher {
        fn ok(body: &str) -> Self {
            Self {
                reply: Ok(body.to_string()),
                last: std::sync::Mutex::new(None),
                form_page: None,
            }
        }
        fn err(msg: &str) -> Self {
            Self {
                reply: Err(anyhow::anyhow!("{msg}")),
                last: std::sync::Mutex::new(None),
                form_page: None,
            }
        }
        /// Make `fetch_raw` (the submit page GET) return `html`, so the submit
        /// send (`fetch`) reply stays distinct and is the one recorded in `last`.
        fn with_form_page(mut self, html: &str) -> Self {
            self.form_page = Some(html.to_string());
            self
        }
    }

    impl TaskFetcher for MockFetcher {
        async fn fetch(&self, req: FetchRequest) -> anyhow::Result<String> {
            *self.last.lock().unwrap() = Some(req);
            match &self.reply {
                Ok(s) => Ok(s.clone()),
                Err(e) => Err(anyhow::anyhow!("{e}")),
            }
        }
        async fn fetch_raw(&self, url: &str) -> anyhow::Result<String> {
            // The page GET does NOT record `last`; the submit send (`fetch`) does,
            // so `last` reflects the submission request the executor built.
            if let Some(page) = &self.form_page {
                return Ok(page.clone());
            }
            self.fetch(FetchRequest {
                url: url.to_string(),
                method: "GET".to_string(),
                headers: Vec::new(),
                body: None,
            })
            .await
        }
    }

    /// The URL of the last request a [`MockFetcher`] saw, or `None` if it was
    /// never called — used to assert that loop-level actions (`extract`) do not
    /// touch the network.
    fn f_last_url(f: &MockFetcher) -> Option<String> {
        f.last.lock().unwrap().as_ref().map(|r| r.url.clone())
    }

    #[tokio::test]
    async fn execute_action_routes_api_call_through_the_fetcher() {
        let f = MockFetcher::ok("{\"ok\":true}");
        let action = TaskAction::ApiCall {
            url: "https://api/x".into(),
            method: "POST".into(),
            headers: vec![("Accept".into(), "application/json".into())],
            body: Some("{}".into()),
            extract_query: None,
        };
        let obs = execute_action(&action, &f).await.unwrap();
        assert_eq!(obs.rung, 1);
        assert_eq!(obs.status, TaskStatus::Done);
        assert_eq!(obs.content, "{\"ok\":true}");
        assert!(obs.error.is_none());
        // The action's wire essentials reached the fetcher unchanged.
        let req = f.last.lock().unwrap().clone().unwrap();
        assert_eq!(req.url, "https://api/x");
        assert_eq!(req.method, "POST");
        assert_eq!(req.body.as_deref(), Some("{}"));
        assert_eq!(
            req.headers,
            vec![("Accept".to_string(), "application/json".to_string())]
        );
    }

    #[tokio::test]
    async fn execute_action_caps_oversized_api_response() {
        // A verbose endpoint (e.g. crates.io returning every version) must not be
        // handed to the brain raw — nab's token-minimal contract bounds it.
        let huge = "x".repeat(API_RESPONSE_TOKEN_BUDGET * 4 * 10); // ~10x the budget
        let f = MockFetcher::ok(&huge);
        let action = TaskAction::ApiCall {
            url: "https://api/big".into(),
            method: "GET".into(),
            headers: vec![],
            body: None,
            extract_query: None,
        };
        let obs = execute_action(&action, &f).await.unwrap();
        assert_eq!(obs.status, TaskStatus::Done);
        let tokens = crate::content::budget::estimate_tokens(&obs.content);
        // Bounded to roughly the budget (+ the truncation marker), not the raw 40k.
        assert!(
            tokens <= API_RESPONSE_TOKEN_BUDGET + 64,
            "api response not capped: {tokens} tokens"
        );
        assert!(
            obs.content.contains("Truncated"),
            "capped response must carry the truncation marker so the brain knows"
        );
    }

    #[test]
    fn shape_api_response_passes_small_bodies_through() {
        // Short responses are returned verbatim (no extract_query, under budget).
        assert_eq!(shape_api_response("{\"ok\":true}", None), "{\"ok\":true}");
    }

    #[tokio::test]
    async fn execute_action_maps_fetcher_error_to_incomplete() {
        let f = MockFetcher::err("boom");
        let action = TaskAction::ApiCall {
            url: "https://api/x".into(),
            method: "GET".into(),
            headers: vec![],
            body: None,
            extract_query: None,
        };
        let obs = execute_action(&action, &f).await.unwrap();
        assert_eq!(obs.rung, 1);
        assert_eq!(obs.status, TaskStatus::Incomplete);
        assert!(obs.content.is_empty());
        assert!(obs.error.unwrap().contains("boom"));
    }

    #[tokio::test]
    async fn execute_action_defers_unsupported_and_terminates_done() {
        let f = MockFetcher::ok("unused");
        let cases = vec![
            (
                TaskAction::JsEval {
                    url: "https://x".into(),
                    script: "1".into(),
                },
                1u8,
            ),
            (
                TaskAction::Extract {
                    extract_query: "t".into(),
                },
                0,
            ),
            (
                TaskAction::NeedsBrowser {
                    reason: "captcha".into(),
                    url: None,
                },
                3,
            ),
        ];
        for (action, want_rung) in cases {
            let obs = execute_action(&action, &f).await.unwrap();
            assert_eq!(obs.rung, want_rung, "rung for {action:?}");
            assert_eq!(obs.status, TaskStatus::Incomplete);
            assert!(obs.error.is_some());
            assert!(obs.content.is_empty());
        }
        // Done is terminal.
        let obs = execute_action(&TaskAction::Done { summary: None }, &f)
            .await
            .unwrap();
        assert_eq!(obs.status, TaskStatus::Done);
        assert!(obs.error.is_none());
    }

    /// A POST form with a hidden CSRF input plus two user fields.
    const POST_FORM_HTML: &str = r#"<html><body>
        <form method="post" action="/login">
          <input type="hidden" name="csrf" value="tok123">
          <input type="text" name="email">
          <input type="password" name="password">
        </form></body></html>"#;

    #[test]
    fn plan_form_submission_builds_post_with_merged_and_carried_fields() {
        let req = plan_form_submission(
            POST_FORM_HTML,
            "https://site.test/login-page",
            &[
                ("email".to_string(), "a@b.com".to_string()),
                ("password".to_string(), "pw".to_string()),
            ],
        )
        .unwrap();
        assert_eq!(req.method, "POST");
        assert!(req.url.contains("site.test"), "url: {}", req.url);
        assert!(
            req.url.ends_with("/login"),
            "action not resolved: {}",
            req.url
        );
        assert_eq!(
            req.headers
                .iter()
                .find(|(n, _)| n == "Content-Type")
                .map(|(_, v)| v.as_str()),
            Some("application/x-www-form-urlencoded")
        );
        let body = req.body.as_deref().unwrap();
        // Hidden CSRF carried automatically; user fields merged in.
        assert!(body.contains("csrf=tok123"), "csrf not carried: {body}");
        assert!(
            body.contains("password=pw"),
            "user field not merged: {body}"
        );
        assert!(body.contains("email="), "email field missing: {body}");
    }

    #[test]
    fn plan_form_submission_builds_get_query() {
        let html = r#"<form method="get" action="/search"><input name="q"></form>"#;
        let req = plan_form_submission(
            html,
            "https://site.test/",
            &[("q".to_string(), "rust".to_string())],
        )
        .unwrap();
        assert_eq!(req.method, "GET");
        assert!(req.body.is_none(), "GET must not carry a body");
        assert!(
            req.url.contains("q=rust"),
            "query not appended: {}",
            req.url
        );
        assert!(req.url.contains("/search"), "action missing: {}", req.url);
    }

    #[test]
    fn plan_form_submission_errors_on_no_form() {
        let err = plan_form_submission("<html>nothing here</html>", "https://x/", &[]).unwrap_err();
        assert!(err.to_string().contains("no forms"), "got: {err}");
    }

    #[tokio::test]
    async fn execute_action_submit_posts_form_through_fetcher() {
        // fetch_raw returns the form page; fetch returns the post-submit response
        // and records the submission request the executor built.
        let f = MockFetcher::ok("welcome back").with_form_page(POST_FORM_HTML);
        let action = TaskAction::Submit {
            url: "https://site.test/login-page".into(),
            fields: vec![
                ("email".into(), "a@b.com".into()),
                ("password".into(), "pw".into()),
            ],
        };
        let obs = execute_action(&action, &f).await.unwrap();
        assert_eq!(obs.rung, 2, "submit is rung 2");
        assert_eq!(obs.status, TaskStatus::Done);
        assert_eq!(obs.content, "welcome back");
        assert!(obs.error.is_none());
        // The recorded request is the SUBMISSION (not the page GET).
        let req = f.last.lock().unwrap().clone().unwrap();
        assert_eq!(req.method, "POST");
        assert!(req.url.ends_with("/login"));
        let body = req.body.as_deref().unwrap();
        assert!(body.contains("csrf=tok123"));
        assert!(body.contains("password=pw"));
    }

    #[tokio::test]
    async fn execute_action_submit_incomplete_on_no_form() {
        let f = MockFetcher::ok("unused").with_form_page("<html>no form here</html>");
        let action = TaskAction::Submit {
            url: "https://x/".into(),
            fields: vec![],
        };
        let obs = execute_action(&action, &f).await.unwrap();
        assert_eq!(obs.rung, 2);
        assert_eq!(obs.status, TaskStatus::Incomplete);
        assert!(obs.content.is_empty());
        assert!(obs.error.unwrap().contains("no forms"));
    }

    /// A scripted sampler — returns a fixed sequence of replies, then errors.
    struct ScriptedSampler {
        replies: Vec<String>,
        idx: std::sync::Mutex<usize>,
    }

    impl ScriptedSampler {
        fn new(replies: &[&str]) -> Self {
            Self {
                replies: replies.iter().map(|s| (*s).to_string()).collect(),
                idx: std::sync::Mutex::new(0),
            }
        }
    }

    impl Sampler for ScriptedSampler {
        async fn next_action(&self, _prompt: &str) -> anyhow::Result<String> {
            let mut i = self.idx.lock().unwrap();
            if *i >= self.replies.len() {
                anyhow::bail!("script exhausted");
            }
            let r = self.replies[*i].clone();
            *i += 1;
            Ok(r)
        }
    }

    #[test]
    fn parse_action_strips_json_fences() {
        let a = parse_action("```json\n{\"kind\":\"done\",\"summary\":\"ok\"}\n```").unwrap();
        assert!(matches!(a, TaskAction::Done { .. }));
        let b = parse_action("{\"kind\":\"api_call\",\"url\":\"https://x\"}").unwrap();
        assert!(matches!(b, TaskAction::ApiCall { .. }));
        assert!(parse_action("not json").is_err());
    }

    #[tokio::test]
    async fn loop_runs_api_call_then_done() {
        let sampler = ScriptedSampler::new(&[
            "{\"kind\":\"api_call\",\"url\":\"https://api/x\",\"method\":\"GET\"}",
            "```json\n{\"kind\":\"done\",\"summary\":\"found it\"}\n```",
        ]);
        let fetcher = MockFetcher::ok("{\"result\":42}");
        let out = run_task_loop(
            "find the answer",
            "seed page",
            &[],
            &sampler,
            &fetcher,
            &LoopBounds::default(),
        )
        .await;
        assert_eq!(out.stop, LoopStop::Done);
        assert_eq!(out.status, TaskStatus::Done);
        assert_eq!(out.steps.len(), 1, "one api_call executed before done");
        assert_eq!(out.steps[0].observation.content, "{\"result\":42}");
        assert_eq!(out.final_content, "found it");
    }

    #[tokio::test]
    async fn route_1_stays_at_lowest_rung_when_api_completes() {
        // route.1 (design §6): when an API path completes the goal, the router
        // must stay at the lowest rung — rung 3 (browser) must never fire. Brain
        // solves via one rung-1 api_call then done.
        let sampler = ScriptedSampler::new(&[
            "{\"kind\":\"api_call\",\"url\":\"https://api/x\"}",
            "{\"kind\":\"done\",\"summary\":\"ok\"}",
        ]);
        let fetcher = MockFetcher::ok("data");
        let out = run_task_loop("g", "seed", &[], &sampler, &fetcher, &LoopBounds::default()).await;
        assert_eq!(out.stop, LoopStop::Done);
        // Every executed step stayed at the lowest rung (≤ 1); the browser rung
        // (3) never fired because an API path existed. This is the route.1 gate:
        // the rung telemetry on each step is the proof.
        assert!(
            out.steps.iter().all(|s| s.observation.rung <= 1),
            "router escalated above rung 1 when an API path completed: {:?}",
            out.steps
        );
        assert!(
            !out.steps.iter().any(|s| s.observation.rung == 3),
            "rung 3 (browser) fired despite an available API path"
        );
    }

    /// A 6-section markdown doc the focus pipeline can actually filter
    /// (`extract_focused` passes through ≤3 sections unchanged).
    const MULTI_SECTION_MD: &str = "# Intro\n\nWelcome.\n\n## Auth\n\nBearer tokens and \
        authentication flow.\n\n## Styling\n\nCSS rules.\n\n## Deploy\n\nDocker deploy.\n\n\
        ## Logging\n\nJSON logs.\n\n## Metrics\n\nProm metrics.";

    #[tokio::test]
    async fn loop_extract_shapes_prior_step_content() {
        // api_call → extract → done. `extract` is loop-level: it reshapes the
        // CURRENT response (the api_call's content) via the focus pipeline, at
        // rung 0 (no network), without a fetcher round-trip.
        let sampler = ScriptedSampler::new(&[
            "{\"kind\":\"api_call\",\"url\":\"https://api/doc\"}",
            "{\"kind\":\"extract\",\"extract_query\":\"authentication bearer tokens\"}",
            "{\"kind\":\"done\"}",
        ]);
        let fetcher = MockFetcher::ok(MULTI_SECTION_MD);
        let out = run_task_loop("g", "seed", &[], &sampler, &fetcher, &LoopBounds::default()).await;
        assert_eq!(out.stop, LoopStop::Done);
        assert_eq!(
            out.steps.len(),
            2,
            "api_call + extract executed before done"
        );
        let extract_step = &out.steps[1];
        assert!(
            matches!(extract_step.action, TaskAction::Extract { .. }),
            "second step should be the extract action"
        );
        assert_eq!(extract_step.observation.rung, 0, "extract is rung 0");
        assert_eq!(extract_step.observation.status, TaskStatus::Done);
        assert!(extract_step.observation.error.is_none());
        // Relevant section kept; the doc was actually filtered (omitted marker).
        assert!(
            extract_step.observation.content.contains("## Auth"),
            "extract dropped the relevant section: {}",
            extract_step.observation.content
        );
        assert!(
            extract_step.observation.content.contains("omitted —"),
            "extract did not filter — no omitted-section marker"
        );
        assert!(
            extract_step.observation.content.len() < MULTI_SECTION_MD.len(),
            "focused content should be shorter than the source"
        );
        // No fetch happened for the extract step (fetcher only saw the api_call).
        assert_eq!(
            f_last_url(&fetcher),
            Some("https://api/doc".to_string()),
            "extract must not hit the fetcher"
        );
    }

    #[tokio::test]
    async fn loop_extract_falls_back_to_seed_when_no_prior_step() {
        // extract as the FIRST action shapes the seed page (the current response
        // before any step has run).
        let sampler = ScriptedSampler::new(&[
            "{\"kind\":\"extract\",\"extract_query\":\"authentication bearer tokens\"}",
            "{\"kind\":\"done\"}",
        ]);
        let fetcher = MockFetcher::ok("unused");
        let out = run_task_loop(
            "g",
            MULTI_SECTION_MD,
            &[],
            &sampler,
            &fetcher,
            &LoopBounds::default(),
        )
        .await;
        assert_eq!(out.stop, LoopStop::Done);
        assert_eq!(out.steps.len(), 1);
        let obs = &out.steps[0].observation;
        assert_eq!(obs.rung, 0);
        assert_eq!(obs.status, TaskStatus::Done);
        assert!(obs.content.contains("## Auth"));
        assert!(obs.content.contains("omitted —"));
        // The fetcher was never called — extract shaped the seed directly.
        assert!(f_last_url(&fetcher).is_none());
    }

    #[tokio::test]
    async fn loop_extract_incomplete_when_no_content_to_shape() {
        // extract first, with an EMPTY seed → honest Incomplete, not empty Done.
        let sampler = ScriptedSampler::new(&[
            "{\"kind\":\"extract\",\"extract_query\":\"anything\"}",
            "{\"kind\":\"done\"}",
        ]);
        let fetcher = MockFetcher::ok("unused");
        let out = run_task_loop("g", "", &[], &sampler, &fetcher, &LoopBounds::default()).await;
        assert_eq!(out.stop, LoopStop::Done);
        let obs = &out.steps[0].observation;
        assert_eq!(obs.status, TaskStatus::Incomplete);
        assert!(obs.content.is_empty());
        assert!(obs.error.as_deref().unwrap().contains("no prior content"));
    }

    #[tokio::test]
    async fn loop_stops_at_max_steps() {
        // Sampler always asks for another api_call; never done.
        let sampler = ScriptedSampler::new(&[
            "{\"kind\":\"api_call\",\"url\":\"https://a\"}",
            "{\"kind\":\"api_call\",\"url\":\"https://b\"}",
            "{\"kind\":\"api_call\",\"url\":\"https://c\"}",
        ]);
        let fetcher = MockFetcher::ok("x");
        let bounds = LoopBounds {
            max_steps: 2,
            ..LoopBounds::default()
        };
        let out = run_task_loop("g", "s", &[], &sampler, &fetcher, &bounds).await;
        assert_eq!(out.stop, LoopStop::MaxSteps);
        assert_eq!(out.status, TaskStatus::Incomplete);
        assert_eq!(out.steps.len(), 2);
    }

    #[tokio::test]
    async fn loop_reports_parse_error_on_garbage_reply() {
        let sampler = ScriptedSampler::new(&["this is not json"]);
        let fetcher = MockFetcher::ok("x");
        let out = run_task_loop("g", "s", &[], &sampler, &fetcher, &LoopBounds::default()).await;
        assert_eq!(out.stop, LoopStop::ParseError);
        assert!(out.steps.is_empty());
    }

    #[tokio::test]
    async fn loop_reports_sampler_error_when_brain_fails() {
        let sampler = ScriptedSampler::new(&[]); // exhausted immediately → error
        let fetcher = MockFetcher::ok("x");
        let out = run_task_loop("g", "s", &[], &sampler, &fetcher, &LoopBounds::default()).await;
        assert_eq!(out.stop, LoopStop::SamplerError);
    }

    // ── rung 3: browser backend ──────────────────────────────────────────────

    /// A scripted browser backend — no real Chrome — for rung-3 routing tests.
    struct MockBrowser {
        reply: anyhow::Result<String>,
        last: std::sync::Mutex<Option<String>>,
    }
    impl MockBrowser {
        fn ok(body: &str) -> Self {
            Self {
                reply: Ok(body.to_string()),
                last: std::sync::Mutex::new(None),
            }
        }
        fn err(msg: &str) -> Self {
            Self {
                reply: Err(anyhow::anyhow!("{msg}")),
                last: std::sync::Mutex::new(None),
            }
        }
    }
    impl BrowserBackend for MockBrowser {
        async fn render(&self, url: &str) -> anyhow::Result<String> {
            *self.last.lock().unwrap() = Some(url.to_string());
            match &self.reply {
                Ok(s) => Ok(s.clone()),
                Err(e) => Err(anyhow::anyhow!("{e}")),
            }
        }
    }

    #[tokio::test]
    async fn no_browser_backend_defers_render() {
        // The default build's backend always defers honestly.
        assert!(NoBrowser.render("https://x").await.is_err());
    }

    #[tokio::test]
    async fn loop_drives_browser_on_needs_browser_with_url() {
        // Brain: needs_browser(url) → done. The injected backend renders rung 3.
        let sampler = ScriptedSampler::new(&[
            "{\"kind\":\"needs_browser\",\"reason\":\"spa\",\"url\":\"https://app/dash\"}",
            "{\"kind\":\"done\",\"summary\":\"read the dashboard\"}",
        ]);
        let fetcher = MockFetcher::ok("unused");
        let browser = MockBrowser::ok("# Dashboard\n\nBalance: 42");
        let out = run_task_loop_with_browser(
            "g",
            "seed",
            &[],
            &sampler,
            &fetcher,
            &browser,
            &LoopBounds::default(),
        )
        .await;
        assert_eq!(out.stop, LoopStop::Done);
        let step = &out.steps[0];
        assert_eq!(step.observation.rung, 3, "browser step is rung 3");
        assert_eq!(step.observation.status, TaskStatus::Done);
        assert!(step.observation.content.contains("Balance: 42"));
        // The backend received the requested url.
        assert_eq!(
            browser.last.lock().unwrap().clone(),
            Some("https://app/dash".to_string())
        );
    }

    #[tokio::test]
    async fn loop_defers_needs_browser_without_backend() {
        // Default loop (NoBrowser): needs_browser → honest rung-3 Incomplete.
        let sampler = ScriptedSampler::new(&[
            "{\"kind\":\"needs_browser\",\"reason\":\"spa\",\"url\":\"https://app/x\"}",
            "{\"kind\":\"done\"}",
        ]);
        let fetcher = MockFetcher::ok("unused");
        let out = run_task_loop("g", "seed", &[], &sampler, &fetcher, &LoopBounds::default()).await;
        assert_eq!(out.stop, LoopStop::Done);
        let obs = &out.steps[0].observation;
        assert_eq!(obs.rung, 3);
        assert_eq!(obs.status, TaskStatus::Incomplete);
        assert!(
            obs.error
                .as_deref()
                .unwrap()
                .contains("delegate_to_browser")
        );
    }

    #[tokio::test]
    async fn loop_defers_needs_browser_without_url() {
        // needs_browser with no url cannot render (the loop holds seed content,
        // not the seed URL) → honest deferral, even with a working backend.
        let sampler = ScriptedSampler::new(&[
            "{\"kind\":\"needs_browser\",\"reason\":\"spa\"}",
            "{\"kind\":\"done\"}",
        ]);
        let fetcher = MockFetcher::ok("unused");
        let browser = MockBrowser::err("should not be called");
        let out = run_task_loop_with_browser(
            "g",
            "seed",
            &[],
            &sampler,
            &fetcher,
            &browser,
            &LoopBounds::default(),
        )
        .await;
        let obs = &out.steps[0].observation;
        assert_eq!(obs.rung, 3);
        assert_eq!(obs.status, TaskStatus::Incomplete);
        assert!(obs.error.as_deref().unwrap().contains("requires a url"));
        assert!(
            browser.last.lock().unwrap().is_none(),
            "backend must not be called"
        );
    }

    // ── bench.1 token-gap primitive ──────────────────────────────────────────

    /// A chrome-heavy page: a small article buried under a large `<script>` blob,
    /// `<style>`, and nav. Markdown conversion always drops script/style, so the
    /// shaped output is guaranteed smaller than the raw HTML regardless of
    /// readability's exact heuristics — a robust fixture for the measurement.
    fn chrome_heavy_html() -> String {
        let junk_script = "var x=0;".repeat(400); // ~3.2 KB of non-content
        let junk_style = "a{color:red}".repeat(100);
        format!(
            "<html><head><style>{junk_style}</style>\
             <script>{junk_script}</script></head><body>\
             <nav><a href=/>home</a><a href=/about>about</a></nav>\
             <article><h1>The Answer</h1>\
             <p>The result you asked for is forty-two.</p></article>\
             <footer>(c) 2026 example</footer></body></html>"
        )
    }

    #[test]
    fn token_gap_nab_beats_raw_dom_on_chrome_heavy_page() {
        let html = chrome_heavy_html();
        let gap = token_gap(&html, r#"{"answer":42}"#);
        assert!(
            gap.nab_wins(),
            "nab ({}) should cost fewer tokens than the raw DOM ({})",
            gap.nab_tokens,
            gap.browser_tokens
        );
        assert!(gap.ratio() > 1.0, "ratio {} should exceed 1", gap.ratio());
        assert!(gap.browser_tokens > gap.nab_tokens);
    }

    #[test]
    fn token_gap_includes_the_api_response_additively() {
        let html = chrome_heavy_html();
        let base = token_gap(&html, "");
        let with_api = token_gap(&html, "0123456789"); // 10 chars → +3 tokens (ceil)
        assert_eq!(
            with_api.nab_tokens,
            base.nab_tokens + crate::content::budget::estimate_tokens("0123456789"),
            "api response tokens must add to nab_tokens"
        );
        // The browser baseline (raw HTML) is unaffected by the API response.
        assert_eq!(with_api.browser_tokens, base.browser_tokens);
    }

    #[test]
    fn ratio_is_infinite_for_zero_nab_tokens() {
        let gap = TokenGap {
            nab_tokens: 0,
            browser_tokens: 5,
        };
        assert!(gap.ratio().is_infinite());
    }

    #[test]
    fn median_ratio_handles_empty_odd_and_even() {
        assert!(median_ratio(&[]).is_none());
        // Controlled ratios: 2.0, 4.0, 8.0 → median 4.0.
        let odd = [
            TokenGap {
                nab_tokens: 10,
                browser_tokens: 20,
            },
            TokenGap {
                nab_tokens: 10,
                browser_tokens: 40,
            },
            TokenGap {
                nab_tokens: 10,
                browser_tokens: 80,
            },
        ];
        assert!((median_ratio(&odd).unwrap() - 4.0).abs() < f64::EPSILON);
        // Even count → average of the two middles (2.0, 4.0) = 3.0.
        let even = [
            TokenGap {
                nab_tokens: 10,
                browser_tokens: 20,
            },
            TokenGap {
                nab_tokens: 10,
                browser_tokens: 40,
            },
        ];
        assert!((median_ratio(&even).unwrap() - 3.0).abs() < f64::EPSILON);
    }
}