harn-stdlib 0.8.62

/** std/testing — helpers for writing Harn tests. */
var __testing_step_events = []

fn params_subset_match(expected, actual) {
  if expected == nil {
    return true
  }
  if type_of(expected) != "dict" || type_of(actual) != "dict" {
    return expected == actual
  }
  for entry in entries(expected) {
    if actual[entry.key] != entry.value {
      return false
    }
  }
  return true
}

/**
 * clear_host_mocks.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: clear_host_mocks()
 */
pub fn clear_host_mocks() {
  return host_mock_clear()
}

/**
 * mock_host_result.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: mock_host_result(cap, op, result, params)
 */
pub fn mock_host_result(cap: string, op: string, result, params) {
  if params == nil {
    return host_mock(cap, op, result)
  }
  return host_mock(cap, op, result, params)
}

/**
 * mock_host_error.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: mock_host_error(cap, op, message, params)
 */
pub fn mock_host_error(cap: string, op: string, message: string, params) {
  let config = {error: message}
  if params == nil {
    return host_mock(cap, op, config)
  }
  return host_mock(cap, op, config + {params: params})
}

/**
 * mock_host_response.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: mock_host_response(cap, op, config)
 */
pub fn mock_host_response(cap: string, op: string, config) {
  return host_mock(cap, op, config)
}

/**
 * host_calls.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: host_calls()
 */
pub fn host_calls() {
  return host_mock_calls()
}

/**
 * host_calls_for.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: host_calls_for(cap, op)
 */
pub fn host_calls_for(cap: string, op: string) {
  return host_mock_calls()
    .filter({ call -> return call?.capability == cap && call?.operation == op })
}

/**
 * host_call_count.
 *
 * @effects: []
 * @allocation: stack-only
 * @errors: []
 * @api_stability: stable
 * @example: host_call_count()
 */
pub fn host_call_count() -> int {
  return len(host_mock_calls())
}

/**
 * host_call_count_for.
 *
 * @effects: []
 * @allocation: stack-only
 * @errors: []
 * @api_stability: stable
 * @example: host_call_count_for(cap, op)
 */
pub fn host_call_count_for(cap: string, op: string) -> int {
  return len(host_calls_for(cap, op))
}

/**
 * host_was_called.
 *
 * @effects: []
 * @allocation: stack-only
 * @errors: []
 * @api_stability: stable
 * @example: host_was_called(cap, op, expected_params)
 */
pub fn host_was_called(cap: string, op: string, expected_params) -> bool {
  for call in host_calls_for(cap, op) {
    if params_subset_match(expected_params, call?.params) {
      return true
    }
  }
  return false
}

/**
 * assert_host_called.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: assert_host_called(cap, op, params, message)
 */
pub fn assert_host_called(cap: string, op: string, params = nil, message = nil) {
  if host_was_called(cap, op, params) {
    return nil
  }
  let default_message = if params == nil {
    "Expected host call " + cap + "." + op + " to be recorded"
  } else {
    "Expected host call " + cap + "." + op + " with params " + to_string(params) + " to be recorded"
  }
  require false, message ?? default_message
  return nil
}

/**
 * assert_host_call_count.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: assert_host_call_count(expected_count, cap, op, message)
 */
pub fn assert_host_call_count(expected_count: int, cap: string, op: string, message = nil) {
  let actual_count = host_call_count_for(cap, op)
  let default_message = "Expected " + to_string(expected_count) + " host calls, got " + to_string(actual_count)
  require actual_count == expected_count, message ?? default_message
  return actual_count
}

/**
 * assert_no_host_calls.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: assert_no_host_calls(message)
 */
pub fn assert_no_host_calls(message = nil) {
  let actual_count = host_call_count()
  require actual_count == 0, message ?? "Expected no host calls, got " + to_string(actual_count)
  return actual_count
}

/**
 * Apply a single host-mock fixture entry. Each entry is a dict with:
 *   { capability, operation, result?, error?, params? }
 * `error` (string) takes precedence over `result` so an entry may
 * either mock a successful response or an exception, never both.
 */
fn apply_host_mock_entry(entry) {
  if type_of(entry) != "dict" {
    throw "with_host_mocks: each entry must be a dict, got " + type_of(entry)
  }
  let cap = entry?.capability
  let op = entry?.operation
  if cap == nil || op == nil {
    throw "with_host_mocks: each entry must include 'capability' and 'operation'"
  }
  if entry?.error != nil {
    return mock_host_error(cap, op, entry.error, entry?.params)
  }
  return mock_host_result(cap, op, entry?.result, entry?.params)
}

/**
 * Run `body` with a fresh host-mock scope. Each entry in `mocks` is
 * applied via `mock_host_result` / `mock_host_error`, the body runs,
 * and the prior host-mock state plus call log is restored on exit —
 * even if the body throws. Nested scopes stack: an inner
 * `with_host_mocks` does not leak into the outer scope.
 *
 * Returns whatever `body` returns. Re-raises any thrown error after
 * cleanup.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: with_host_mocks(mocks, body)
 */
pub fn with_host_mocks(mocks, body) {
  host_mock_push_scope()
  // Registration runs inside the same try/catch as the body so a
  // malformed entry takes the cleanup path instead of leaking the
  // pushed scope.
  try {
    for entry in mocks ?? [] {
      apply_host_mock_entry(entry)
    }
    let result = __testing_call_body(body)
    host_mock_pop_scope()
    return result
  } catch (e) {
    host_mock_pop_scope()
    throw e
  }
}

fn __testing_is_callable(value) -> bool {
  let kind = type_of(value)
  return kind == "function" || kind == "closure" || kind == "fn"
}

fn __testing_step_event_record(ctx) {
  var record = {
    event: ctx?.event,
    target: ctx?.target,
    persona: ctx?.persona,
    step_name: ctx?.step?.name,
    function: ctx?.step?.function,
    args: ctx?.step?.args ?? [],
  }
  if ctx?.output != nil {
    record = record + {output: ctx.output}
  }
  return record
}

fn __testing_step_events_for(kind) {
  var out = []
  for event in __testing_step_events {
    if event.event == kind {
      out = out.push(event)
    }
  }
  return out
}

fn __testing_event_matches(candidate, predicate) -> bool {
  if predicate == nil {
    return true
  }
  if __testing_is_callable(predicate) {
    return predicate(candidate)
  }
  if type_of(predicate) == "dict" {
    return params_subset_match(predicate, candidate)
  }
  return candidate == predicate
}

fn __testing_handoff_target_matches(handoff, target) -> bool {
  if target == nil {
    return true
  }
  let resolved = handoff?.target_persona_or_human ?? {}
  return resolved?.id == target
    || resolved?.label == target
    || resolved?.kind == target
    || handoff?.target == target
    || handoff?.target_persona == target
}

fn __testing_golden_match(expected, actual) -> bool {
  if expected == "<ms>" {
    let kind = type_of(actual)
    return kind == "int" || kind == "float"
      || (kind == "string" && len(regex_captures("^[0-9]+$", actual)) == 1)
  }
  if expected == "<uuid>" {
    return type_of(actual) == "string"
      && len(
      regex_captures(
        "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$",
        actual,
      ),
    )
      == 1
  }
  if expected == "<any>" {
    return actual != nil
  }
  if type_of(expected) != type_of(actual) {
    return false
  }
  if type_of(expected) == "dict" {
    for entry in entries(expected) {
      if !__testing_golden_match(entry.value, actual[entry.key]) {
        return false
      }
    }
    return true
  }
  if type_of(expected) == "list" {
    if len(expected) != len(actual) {
      return false
    }
    var idx = 0
    while idx < len(expected) {
      if !__testing_golden_match(expected[idx], actual[idx]) {
        return false
      }
      idx += 1
    }
    return true
  }
  return expected == actual
}

/**
 * llm_calls. Returns the LLM mock call log for the current scope.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: llm_calls()
 */
pub fn llm_calls() {
  return llm_mock_calls()
}

/**
 * llm_call_count. Number of LLM calls recorded in the current scope.
 *
 * @effects: []
 * @allocation: stack-only
 * @errors: []
 * @api_stability: stable
 * @example: llm_call_count()
 */
pub fn llm_call_count() -> int {
  return len(llm_mock_calls())
}

/**
 * Run `body` with a fresh LLM-mock scope. Each entry in `mocks` is
 * pushed via `llm_mock(...)` and consumed in order by the body. The
 * prior LLM-mock queue plus call log is restored on exit — even if
 * the body throws. Nested scopes stack the same way as
 * `with_host_mocks`.
 *
 * Returns whatever `body` returns. Re-raises any thrown error after
 * cleanup.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: with_llm_mocks(mocks, body)
 */
pub fn with_llm_mocks(mocks, body) {
  llm_mock_push_scope()
  try {
    for entry in mocks ?? [] {
      if type_of(entry) != "dict" {
        throw "with_llm_mocks: each entry must be a dict, got " + type_of(entry)
      }
      llm_mock(entry)
    }
    let result = __testing_call_body(body)
    llm_mock_pop_scope()
    return result
  } catch (e) {
    llm_mock_pop_scope()
    throw e
  }
}

/**
 * Unified scoped fixture for tests that mix host and LLM mocks.
 * `config` is a dict with optional `host_mocks` and `llm_mocks` lists,
 * each shaped like the entries accepted by `with_host_mocks` and
 * `with_llm_mocks`. Both scopes are pushed before the body runs and
 * popped (in reverse order) after — including on thrown errors.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: with_mocks(config, body)
 */
pub fn with_mocks(config, body) {
  let host_entries = config?.host_mocks ?? []
  let llm_entries = config?.llm_mocks ?? []
  host_mock_push_scope()
  llm_mock_push_scope()
  // Wrap registration *and* body together so a malformed entry
  // (e.g. a missing capability) still triggers the same restore path
  // that a thrown body would. Without this, an exception during
  // registration would leak both pushed scopes.
  try {
    for entry in host_entries {
      apply_host_mock_entry(entry)
    }
    for entry in llm_entries {
      if type_of(entry) != "dict" {
        throw "with_mocks: each llm_mocks entry must be a dict, got " + type_of(entry)
      }
      llm_mock(entry)
    }
    let result = __testing_call_body(body)
    llm_mock_pop_scope()
    host_mock_pop_scope()
    return result
  } catch (e) {
    llm_mock_pop_scope()
    host_mock_pop_scope()
    throw e
  }
}

/**
 * Clear recorded persona step events.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: step_events_clear()
 */
pub fn step_events_clear() {
  __testing_step_events = []
  return nil
}

/**
 * Return recorded PreStep/PostStep payloads captured by step_assertions_begin.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: step_events()
 */
pub fn step_events() {
  return __testing_step_events
}

/**
 * Start recording persona step hook payloads for Harn-level tests.
 *
 * This resets existing persona hooks because the helper owns the hook
 * registrations during a conformance fixture. Register any fixture-specific
 * hooks after calling this helper.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: step_assertions_begin(persona_pattern)
 */
pub fn step_assertions_begin(persona_pattern = "*") {
  clear_persona_hooks()
  step_events_clear()
  register_persona_hook(
    persona_pattern,
    "PreStep",
    { ctx ->
      __testing_step_events = __testing_step_events.push(__testing_step_event_record(ctx))
      return nil
    },
  )
  register_persona_hook(
    persona_pattern,
    "PostStep",
    { ctx ->
      __testing_step_events = __testing_step_events.push(__testing_step_event_record(ctx))
      return nil
    },
  )
  return nil
}

/**
 * Stop recording persona step hook payloads.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: step_assertions_end()
 */
pub fn step_assertions_end() {
  clear_persona_hooks()
  return nil
}

/**
 * Assert the exact ordered list of @step names that ran.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: assert_steps_ran(expected_names, message)
 */
pub fn assert_steps_ran(expected_names, message = nil) {
  var actual = []
  for event in __testing_step_events_for("PreStep") {
    actual = actual.push(event.step_name)
  }
  for name in expected_names {
    require contains(actual, name), message ?? "step '" + name + "' not callable: not annotated `@step`"
  }
  require actual == expected_names, message ?? "step order mismatch"
  return actual
}

/**
 * Assert that a step received an input matching a closure, dict subset, or value.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: assert_step_received(step_name, predicate, message)
 */
pub fn assert_step_received(step_name, predicate = nil, message = nil) {
  for event in __testing_step_events_for("PreStep") {
    if event.step_name == step_name && __testing_event_matches(event, predicate) {
      return event
    }
  }
  require false, message ?? "step '" + step_name + "' did not receive the expected input"
  return nil
}

/**
 * Assert that a step emitted an output matching a closure, dict subset, or value.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: assert_step_emitted(step_name, predicate, message)
 */
pub fn assert_step_emitted(step_name, predicate = nil, message = nil) {
  for event in __testing_step_events_for("PostStep") {
    if event.step_name == step_name && __testing_event_matches(event, predicate) {
      return event
    }
  }
  require false, message ?? "step '" + step_name + "' did not emit the expected output"
  return nil
}

/**
 * Assert that a handoff list or run record contains a handoff of kind and optional target.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: assert_handoff_emitted(source, handoff_kind, target)
 */
pub fn assert_handoff_emitted(source, handoff_kind, target = nil) {
  let handoffs = source?.handoffs ?? source
  for item in handoffs ?? [] {
    if item?.kind == handoff_kind && __testing_handoff_target_matches(item, target) {
      return item
    }
  }
  let suffix = if target == nil {
    ""
  } else {
    " to " + to_string(target)
  }
  require false, "handoff '" + handoff_kind + "'" + suffix + " was not emitted"
  return nil
}

/**
 * Assert an RFC 6901 JSON Pointer field inside a receipt or envelope.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: assert_receipt_field(receipt, path, value)
 */
pub fn assert_receipt_field(receipt, path, value) {
  let actual = json_pointer(receipt, path)
  require actual == value, "receipt field " + path + " mismatch"
  return actual
}

/**
 * Assert a structured golden against an actual transcript-like value.
 *
 * The expected value may contain `<ms>`, `<uuid>`, or `<any>` sentinel strings.
 * Dict expected values are subset matches so tests can preserve the payload
 * fields that matter without pinning every implementation detail.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: assert_golden_transcript(expected, actual, message)
 */
pub fn assert_golden_transcript(expected, actual, message = nil) {
  require __testing_golden_match(expected, actual), message ?? "golden transcript mismatch"
  return actual
}

// -------------------------------------------------------------------------------------------------

// LLM mock turn builders
//
// `with_llm_mocks` / `with_mocks` already accept a list of LLM response
// dicts, but hand-building `{tool_calls: [{id, name, arguments}]}` and
// `{text: "##DONE##"}` literals is the reason most fixtures still reach
// for the raw `llm_mock_clear()` + sequential `llm_mock(...)` form. These
// constructors make a scoped script read as one ordered list of turns:
//
//   with_llm_mocks(
//     [llm_tool_call("write_note", {path: "n.txt"}), llm_done()],
//     { _ -> run_agent() },
//   )

// -------------------------------------------------------------------------------------------------

/**
 * Build a plain-text assistant turn for an LLM mock script.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: llm_text("Sure, here is the answer.")
 */
pub fn llm_text(text: string) -> dict {
  return {text: text}
}

/**
 * Build the terminal "done" turn for an agent-loop LLM mock script. The
 * default marker matches the `##DONE##` sentinel agent loops recognize;
 * pass a custom marker when the loop under test uses a different one.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: llm_done()
 */
pub fn llm_done(marker: string = "##DONE##") -> dict {
  return {text: marker}
}

/**
 * Build an assistant turn that fails the LLM call with `message`. Mirrors
 * the `{error: ...}` shape `llm_mock` accepts so error-path tests can be
 * scripted alongside successful turns.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: llm_error("rate limited")
 */
pub fn llm_error(message: string) -> dict {
  return {error: message}
}

/**
 * Build a single-tool-call assistant turn. `id` defaults to the tool name
 * so simple scripts need not invent call ids; pass an explicit `id` when a
 * test asserts on it or issues the same tool twice in one turn.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: llm_tool_call("write_note", {path: "n.txt"})
 */
pub fn llm_tool_call(name: string, args = {}, id = nil) -> dict {
  return {tool_calls: [{id: id ?? name, name: name, arguments: args ?? {}}]}
}

/**
 * Build an assistant turn that issues several tool calls in parallel.
 * Each entry is `{name, args?, id?}`; a missing `id` falls back to
 * `name + "_" + index` so parallel calls to the same tool stay distinct.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: llm_tool_calls([{name: "a", args: {}}, {name: "b"}])
 */
pub fn llm_tool_calls(calls) -> dict {
  var out = []
  var index = 0
  for call in calls ?? [] {
    let resolved_id = call?.id ?? (to_string(call?.name) + "_" + to_string(index))
    out = out.push({id: resolved_id, name: call?.name, arguments: call?.args ?? {}})
    index = index + 1
  }
  return {tool_calls: out}
}

/**
 * Alias for `with_llm_mocks` that reads as scripting an ordered list of
 * assistant turns. Pair it with the `llm_*` turn builders.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: with_llm_script([llm_text("hi"), llm_done()], body)
 */
pub fn with_llm_script(turns, body) {
  return with_llm_mocks(turns, body)
}

// -------------------------------------------------------------------------------------------------

// Error assertions
//
// Collapses the `try { ... }` + `is_err` + `unwrap_err` + `to_string` +
// `contains` chain that fixtures hand-roll to assert a call rejects.

// -------------------------------------------------------------------------------------------------

/**
 * Run `body` and assert it throws. Returns the thrown error so callers can
 * make further assertions on it. Fails if the body returns normally.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: assert_throws({ -> parse("") })
 */
pub fn assert_throws(body, message = nil) -> any {
  var threw = false
  var captured = nil
  try {
    body()
  } catch (e) {
    threw = true
    captured = e
  }
  require threw, message ?? "assert_throws: expected the body to throw, but it returned normally"
  return captured
}

/**
 * Run `body`, assert it throws, and assert the rendered error contains
 * `substring`. Returns the thrown error. Replaces the
 * try/is_err/unwrap_err/to_string/contains chain with one call.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: assert_error_contains({ -> connect("") }, "url is required")
 */
pub fn assert_error_contains(body, substring: string, message = nil) -> any {
  let captured = assert_throws(body, message)
  let rendered = to_string(captured)
  require contains(rendered, substring), message
    ?? ("assert_error_contains: expected thrown error to contain "
    + to_string(substring)
    + ", got: "
    + rendered)
  return captured
}

/**
 * Run `body` and assert it does NOT throw, returning its value. Useful for
 * pinning a regression where a previously-rejected input must now succeed.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: assert_no_throw({ -> parse("ok") })
 */
pub fn assert_no_throw(body, message = nil) -> any {
  // NB: assign-then-return rather than `return body()` inside the try.
  // A throw inside `return <expr>` within a try is not observed by the
  // local catch, so the assign-then-return shape is required for the
  // catch to fire.
  var value = nil
  try {
    value = body()
  } catch (e) {
    require false, message ?? ("assert_no_throw: expected the body not to throw, but it threw: " + to_string(e))
  }
  return value
}

// -------------------------------------------------------------------------------------------------

// Filesystem fixtures
//
// A scoped temp workspace with guaranteed cleanup — the fs counterpart to
// `with_host_mocks`. `with_fs` additionally seeds the workspace from a
// `{ relative_path: contents }` dict so a test states its starting tree
// declaratively instead of hand-writing mkdir/write/cleanup.

// -------------------------------------------------------------------------------------------------

fn __testing_fs_content(value) -> string {
  if type_of(value) == "string" {
    return value
  }
  return json_stringify(value)
}

/**
 * Create a unique temp directory, pass its path to `body`, and delete it
 * recursively on exit — even if `body` throws. Returns whatever `body`
 * returns.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: with_temp_dir({ dir -> harness.fs.write_text(dir + "/x", "1") })
 */
pub fn with_temp_dir(body) -> any {
  let dir = harness.fs.temp_dir() + "/harn-test-" + uuid_v7()
  harness.fs.mkdir(dir)
  try {
    let result = body(dir)
    harness.fs.delete(dir)
    return result
  } catch (e) {
    harness.fs.delete(dir)
    throw e
  }
}

/**
 * Like `with_temp_dir`, but first seeds the workspace from `files`, a
 * `{ relative_path: contents }` dict. String contents are written
 * verbatim; any other value is JSON-encoded. Parent directories are
 * created automatically, so `{ "a/b/c.txt": "hi" }` just works. The temp
 * directory path is passed to `body` and removed on exit.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: with_fs({"in.txt": "hello"}, { dir -> harness.fs.read_text(dir + "/in.txt") })
 */
pub fn with_fs(files, body) -> any {
  return with_temp_dir(
    { dir ->
      for entry in entries(files ?? {}) {
        let full = dir + "/" + entry.key
        harness.fs.mkdir(dirname(full))
        harness.fs.write_text(full, __testing_fs_content(entry.value))
      }
      body(dir)
    },
  )
}

/**
 * One declarative block for tests that mix mocks and a seeded workspace.
 * `config` accepts:
 *   * `host` — host-mock entries (same shape as `with_host_mocks`)
 *   * `llm`  — LLM-mock turns (build them with the `llm_*` constructors)
 *   * `fs`   — a `{ relative_path: contents }` seed dict (see `with_fs`)
 *   * `temp_dir` — set `true` for an empty temp workspace without `fs`
 *
 * `body` receives a context dict `{dir}` (the temp directory, or nil when
 * neither `fs` nor `temp_dir` is requested). All scopes are torn down in
 * reverse order on exit, including on a thrown error.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: stable
 * @example: with_scenario({llm: [llm_done()], fs: {"a.txt": "1"}}, { s -> run(s.dir) })
 */
pub fn with_scenario(config, body) {
  let mock_config = {host_mocks: config?.host ?? [], llm_mocks: config?.llm ?? []}
  if config?.fs != nil {
    return with_fs(config.fs, { dir -> with_mocks(mock_config, { _ -> body({dir: dir}) }) })
  }
  if config?.temp_dir {
    return with_temp_dir({ dir -> with_mocks(mock_config, { _ -> body({dir: dir}) }) })
  }
  return with_mocks(mock_config, { _ -> body({dir: nil}) })
}