/** std/testing — helpers for writing Harn tests. */
var __testing_step_events = []
fn params_subset_match(expected, actual) {
if expected == nil {
return true
}
if type_of(expected) != "dict" || type_of(actual) != "dict" {
return expected == actual
}
for entry in entries(expected) {
if actual[entry.key] != entry.value {
return false
}
}
return true
}
/**
* clear_host_mocks.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: clear_host_mocks()
*/
pub fn clear_host_mocks() {
return host_mock_clear()
}
/**
* mock_host_result.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: mock_host_result(cap, op, result, params)
*/
pub fn mock_host_result(cap: string, op: string, result, params) {
if params == nil {
return host_mock(cap, op, result)
}
return host_mock(cap, op, result, params)
}
/**
* mock_host_error.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: mock_host_error(cap, op, message, params)
*/
pub fn mock_host_error(cap: string, op: string, message: string, params) {
let config = {error: message}
if params == nil {
return host_mock(cap, op, config)
}
return host_mock(cap, op, config + {params: params})
}
/**
* mock_host_response.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: mock_host_response(cap, op, config)
*/
pub fn mock_host_response(cap: string, op: string, config) {
return host_mock(cap, op, config)
}
/**
* host_calls.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: host_calls()
*/
pub fn host_calls() {
return host_mock_calls()
}
/**
* host_calls_for.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: host_calls_for(cap, op)
*/
pub fn host_calls_for(cap: string, op: string) {
return host_mock_calls()
.filter({ call -> return call?.capability == cap && call?.operation == op })
}
/**
* host_call_count.
*
* @effects: []
* @allocation: stack-only
* @errors: []
* @api_stability: stable
* @example: host_call_count()
*/
pub fn host_call_count() -> int {
return len(host_mock_calls())
}
/**
* host_call_count_for.
*
* @effects: []
* @allocation: stack-only
* @errors: []
* @api_stability: stable
* @example: host_call_count_for(cap, op)
*/
pub fn host_call_count_for(cap: string, op: string) -> int {
return len(host_calls_for(cap, op))
}
/**
* host_was_called.
*
* @effects: []
* @allocation: stack-only
* @errors: []
* @api_stability: stable
* @example: host_was_called(cap, op, expected_params)
*/
pub fn host_was_called(cap: string, op: string, expected_params) -> bool {
for call in host_calls_for(cap, op) {
if params_subset_match(expected_params, call?.params) {
return true
}
}
return false
}
/**
* assert_host_called.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: assert_host_called(cap, op, params, message)
*/
pub fn assert_host_called(cap: string, op: string, params = nil, message = nil) {
if host_was_called(cap, op, params) {
return nil
}
let default_message = if params == nil {
"Expected host call " + cap + "." + op + " to be recorded"
} else {
"Expected host call " + cap + "." + op + " with params " + to_string(params) + " to be recorded"
}
require false, message ?? default_message
return nil
}
/**
* assert_host_call_count.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: assert_host_call_count(expected_count, cap, op, message)
*/
pub fn assert_host_call_count(expected_count: int, cap: string, op: string, message = nil) {
let actual_count = host_call_count_for(cap, op)
let default_message = "Expected " + to_string(expected_count) + " host calls, got " + to_string(actual_count)
require actual_count == expected_count, message ?? default_message
return actual_count
}
/**
* assert_no_host_calls.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: assert_no_host_calls(message)
*/
pub fn assert_no_host_calls(message = nil) {
let actual_count = host_call_count()
require actual_count == 0, message ?? "Expected no host calls, got " + to_string(actual_count)
return actual_count
}
/**
* Apply a single host-mock fixture entry. Each entry is a dict with:
* { capability, operation, result?, error?, params? }
* `error` (string) takes precedence over `result` so an entry may
* either mock a successful response or an exception, never both.
*/
fn apply_host_mock_entry(entry) {
if type_of(entry) != "dict" {
throw "with_host_mocks: each entry must be a dict, got " + type_of(entry)
}
let cap = entry?.capability
let op = entry?.operation
if cap == nil || op == nil {
throw "with_host_mocks: each entry must include 'capability' and 'operation'"
}
if entry?.error != nil {
return mock_host_error(cap, op, entry.error, entry?.params)
}
return mock_host_result(cap, op, entry?.result, entry?.params)
}
/**
* Run `body` with a fresh host-mock scope. Each entry in `mocks` is
* applied via `mock_host_result` / `mock_host_error`, the body runs,
* and the prior host-mock state plus call log is restored on exit —
* even if the body throws. Nested scopes stack: an inner
* `with_host_mocks` does not leak into the outer scope.
*
* Returns whatever `body` returns. Re-raises any thrown error after
* cleanup.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: with_host_mocks(mocks, body)
*/
pub fn with_host_mocks(mocks, body) {
host_mock_push_scope()
// Registration runs inside the same try/catch as the body so a
// malformed entry takes the cleanup path instead of leaking the
// pushed scope.
try {
for entry in mocks ?? [] {
apply_host_mock_entry(entry)
}
let result = __testing_call_body(body)
host_mock_pop_scope()
return result
} catch (e) {
host_mock_pop_scope()
throw e
}
}
fn __testing_is_callable(value) -> bool {
let kind = type_of(value)
return kind == "function" || kind == "closure" || kind == "fn"
}
fn __testing_step_event_record(ctx) {
var record = {
event: ctx?.event,
target: ctx?.target,
persona: ctx?.persona,
step_name: ctx?.step?.name,
function: ctx?.step?.function,
args: ctx?.step?.args ?? [],
}
if ctx?.output != nil {
record = record + {output: ctx.output}
}
return record
}
fn __testing_step_events_for(kind) {
var out = []
for event in __testing_step_events {
if event.event == kind {
out = out.push(event)
}
}
return out
}
fn __testing_event_matches(candidate, predicate) -> bool {
if predicate == nil {
return true
}
if __testing_is_callable(predicate) {
return predicate(candidate)
}
if type_of(predicate) == "dict" {
return params_subset_match(predicate, candidate)
}
return candidate == predicate
}
fn __testing_handoff_target_matches(handoff, target) -> bool {
if target == nil {
return true
}
let resolved = handoff?.target_persona_or_human ?? {}
return resolved?.id == target
|| resolved?.label == target
|| resolved?.kind == target
|| handoff?.target == target
|| handoff?.target_persona == target
}
fn __testing_golden_match(expected, actual) -> bool {
if expected == "<ms>" {
let kind = type_of(actual)
return kind == "int" || kind == "float"
|| (kind == "string" && len(regex_captures("^[0-9]+$", actual)) == 1)
}
if expected == "<uuid>" {
return type_of(actual) == "string"
&& len(
regex_captures(
"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$",
actual,
),
)
== 1
}
if expected == "<any>" {
return actual != nil
}
if type_of(expected) != type_of(actual) {
return false
}
if type_of(expected) == "dict" {
for entry in entries(expected) {
if !__testing_golden_match(entry.value, actual[entry.key]) {
return false
}
}
return true
}
if type_of(expected) == "list" {
if len(expected) != len(actual) {
return false
}
var idx = 0
while idx < len(expected) {
if !__testing_golden_match(expected[idx], actual[idx]) {
return false
}
idx += 1
}
return true
}
return expected == actual
}
/**
* llm_calls. Returns the LLM mock call log for the current scope.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: llm_calls()
*/
pub fn llm_calls() {
return llm_mock_calls()
}
/**
* llm_call_count. Number of LLM calls recorded in the current scope.
*
* @effects: []
* @allocation: stack-only
* @errors: []
* @api_stability: stable
* @example: llm_call_count()
*/
pub fn llm_call_count() -> int {
return len(llm_mock_calls())
}
/**
* Run `body` with a fresh LLM-mock scope. Each entry in `mocks` is
* pushed via `llm_mock(...)` and consumed in order by the body. The
* prior LLM-mock queue plus call log is restored on exit — even if
* the body throws. Nested scopes stack the same way as
* `with_host_mocks`.
*
* Returns whatever `body` returns. Re-raises any thrown error after
* cleanup.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: with_llm_mocks(mocks, body)
*/
pub fn with_llm_mocks(mocks, body) {
llm_mock_push_scope()
try {
for entry in mocks ?? [] {
if type_of(entry) != "dict" {
throw "with_llm_mocks: each entry must be a dict, got " + type_of(entry)
}
llm_mock(entry)
}
let result = __testing_call_body(body)
llm_mock_pop_scope()
return result
} catch (e) {
llm_mock_pop_scope()
throw e
}
}
/**
* Unified scoped fixture for tests that mix host and LLM mocks.
* `config` is a dict with optional `host_mocks` and `llm_mocks` lists,
* each shaped like the entries accepted by `with_host_mocks` and
* `with_llm_mocks`. Both scopes are pushed before the body runs and
* popped (in reverse order) after — including on thrown errors.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: with_mocks(config, body)
*/
pub fn with_mocks(config, body) {
let host_entries = config?.host_mocks ?? []
let llm_entries = config?.llm_mocks ?? []
host_mock_push_scope()
llm_mock_push_scope()
// Wrap registration *and* body together so a malformed entry
// (e.g. a missing capability) still triggers the same restore path
// that a thrown body would. Without this, an exception during
// registration would leak both pushed scopes.
try {
for entry in host_entries {
apply_host_mock_entry(entry)
}
for entry in llm_entries {
if type_of(entry) != "dict" {
throw "with_mocks: each llm_mocks entry must be a dict, got " + type_of(entry)
}
llm_mock(entry)
}
let result = __testing_call_body(body)
llm_mock_pop_scope()
host_mock_pop_scope()
return result
} catch (e) {
llm_mock_pop_scope()
host_mock_pop_scope()
throw e
}
}
/**
* Clear recorded persona step events.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: step_events_clear()
*/
pub fn step_events_clear() {
__testing_step_events = []
return nil
}
/**
* Return recorded PreStep/PostStep payloads captured by step_assertions_begin.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: step_events()
*/
pub fn step_events() {
return __testing_step_events
}
/**
* Start recording persona step hook payloads for Harn-level tests.
*
* This resets existing persona hooks because the helper owns the hook
* registrations during a conformance fixture. Register any fixture-specific
* hooks after calling this helper.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: step_assertions_begin(persona_pattern)
*/
pub fn step_assertions_begin(persona_pattern = "*") {
clear_persona_hooks()
step_events_clear()
register_persona_hook(
persona_pattern,
"PreStep",
{ ctx ->
__testing_step_events = __testing_step_events.push(__testing_step_event_record(ctx))
return nil
},
)
register_persona_hook(
persona_pattern,
"PostStep",
{ ctx ->
__testing_step_events = __testing_step_events.push(__testing_step_event_record(ctx))
return nil
},
)
return nil
}
/**
* Stop recording persona step hook payloads.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: step_assertions_end()
*/
pub fn step_assertions_end() {
clear_persona_hooks()
return nil
}
/**
* Assert the exact ordered list of @step names that ran.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: assert_steps_ran(expected_names, message)
*/
pub fn assert_steps_ran(expected_names, message = nil) {
var actual = []
for event in __testing_step_events_for("PreStep") {
actual = actual.push(event.step_name)
}
for name in expected_names {
require contains(actual, name), message ?? "step '" + name + "' not callable: not annotated `@step`"
}
require actual == expected_names, message ?? "step order mismatch"
return actual
}
/**
* Assert that a step received an input matching a closure, dict subset, or value.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: assert_step_received(step_name, predicate, message)
*/
pub fn assert_step_received(step_name, predicate = nil, message = nil) {
for event in __testing_step_events_for("PreStep") {
if event.step_name == step_name && __testing_event_matches(event, predicate) {
return event
}
}
require false, message ?? "step '" + step_name + "' did not receive the expected input"
return nil
}
/**
* Assert that a step emitted an output matching a closure, dict subset, or value.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: assert_step_emitted(step_name, predicate, message)
*/
pub fn assert_step_emitted(step_name, predicate = nil, message = nil) {
for event in __testing_step_events_for("PostStep") {
if event.step_name == step_name && __testing_event_matches(event, predicate) {
return event
}
}
require false, message ?? "step '" + step_name + "' did not emit the expected output"
return nil
}
/**
* Assert that a handoff list or run record contains a handoff of kind and optional target.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: assert_handoff_emitted(source, handoff_kind, target)
*/
pub fn assert_handoff_emitted(source, handoff_kind, target = nil) {
let handoffs = source?.handoffs ?? source
for item in handoffs ?? [] {
if item?.kind == handoff_kind && __testing_handoff_target_matches(item, target) {
return item
}
}
let suffix = if target == nil {
""
} else {
" to " + to_string(target)
}
require false, "handoff '" + handoff_kind + "'" + suffix + " was not emitted"
return nil
}
/**
* Assert an RFC 6901 JSON Pointer field inside a receipt or envelope.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: assert_receipt_field(receipt, path, value)
*/
pub fn assert_receipt_field(receipt, path, value) {
let actual = json_pointer(receipt, path)
require actual == value, "receipt field " + path + " mismatch"
return actual
}
/**
* Assert a structured golden against an actual transcript-like value.
*
* The expected value may contain `<ms>`, `<uuid>`, or `<any>` sentinel strings.
* Dict expected values are subset matches so tests can preserve the payload
* fields that matter without pinning every implementation detail.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: assert_golden_transcript(expected, actual, message)
*/
pub fn assert_golden_transcript(expected, actual, message = nil) {
require __testing_golden_match(expected, actual), message ?? "golden transcript mismatch"
return actual
}
// -------------------------------------------------------------------------------------------------
// LLM mock turn builders
//
// `with_llm_mocks` / `with_mocks` already accept a list of LLM response
// dicts, but hand-building `{tool_calls: [{id, name, arguments}]}` and
// `{text: "##DONE##"}` literals is the reason most fixtures still reach
// for the raw `llm_mock_clear()` + sequential `llm_mock(...)` form. These
// constructors make a scoped script read as one ordered list of turns:
//
// with_llm_mocks(
// [llm_tool_call("write_note", {path: "n.txt"}), llm_done()],
// { _ -> run_agent() },
// )
// -------------------------------------------------------------------------------------------------
/**
* Build a plain-text assistant turn for an LLM mock script.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: llm_text("Sure, here is the answer.")
*/
pub fn llm_text(text: string) -> dict {
return {text: text}
}
/**
* Build the terminal "done" turn for an agent-loop LLM mock script. The
* default marker matches the `##DONE##` sentinel agent loops recognize;
* pass a custom marker when the loop under test uses a different one.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: llm_done()
*/
pub fn llm_done(marker: string = "##DONE##") -> dict {
return {text: marker}
}
/**
* Build an assistant turn that fails the LLM call with `message`. Mirrors
* the `{error: ...}` shape `llm_mock` accepts so error-path tests can be
* scripted alongside successful turns.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: llm_error("rate limited")
*/
pub fn llm_error(message: string) -> dict {
return {error: message}
}
/**
* Build a single-tool-call assistant turn. `id` defaults to the tool name
* so simple scripts need not invent call ids; pass an explicit `id` when a
* test asserts on it or issues the same tool twice in one turn.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: llm_tool_call("write_note", {path: "n.txt"})
*/
pub fn llm_tool_call(name: string, args = {}, id = nil) -> dict {
return {tool_calls: [{id: id ?? name, name: name, arguments: args ?? {}}]}
}
/**
* Build an assistant turn that issues several tool calls in parallel.
* Each entry is `{name, args?, id?}`; a missing `id` falls back to
* `name + "_" + index` so parallel calls to the same tool stay distinct.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: llm_tool_calls([{name: "a", args: {}}, {name: "b"}])
*/
pub fn llm_tool_calls(calls) -> dict {
var out = []
var index = 0
for call in calls ?? [] {
let resolved_id = call?.id ?? (to_string(call?.name) + "_" + to_string(index))
out = out.push({id: resolved_id, name: call?.name, arguments: call?.args ?? {}})
index = index + 1
}
return {tool_calls: out}
}
/**
* Alias for `with_llm_mocks` that reads as scripting an ordered list of
* assistant turns. Pair it with the `llm_*` turn builders.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: with_llm_script([llm_text("hi"), llm_done()], body)
*/
pub fn with_llm_script(turns, body) {
return with_llm_mocks(turns, body)
}
// -------------------------------------------------------------------------------------------------
// Error assertions
//
// Collapses the `try { ... }` + `is_err` + `unwrap_err` + `to_string` +
// `contains` chain that fixtures hand-roll to assert a call rejects.
// -------------------------------------------------------------------------------------------------
/**
* Run `body` and assert it throws. Returns the thrown error so callers can
* make further assertions on it. Fails if the body returns normally.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: assert_throws({ -> parse("") })
*/
pub fn assert_throws(body, message = nil) -> any {
var threw = false
var captured = nil
try {
body()
} catch (e) {
threw = true
captured = e
}
require threw, message ?? "assert_throws: expected the body to throw, but it returned normally"
return captured
}
/**
* Run `body`, assert it throws, and assert the rendered error contains
* `substring`. Returns the thrown error. Replaces the
* try/is_err/unwrap_err/to_string/contains chain with one call.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: assert_error_contains({ -> connect("") }, "url is required")
*/
pub fn assert_error_contains(body, substring: string, message = nil) -> any {
let captured = assert_throws(body, message)
let rendered = to_string(captured)
require contains(rendered, substring), message
?? ("assert_error_contains: expected thrown error to contain "
+ to_string(substring)
+ ", got: "
+ rendered)
return captured
}
/**
* Run `body` and assert it does NOT throw, returning its value. Useful for
* pinning a regression where a previously-rejected input must now succeed.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: assert_no_throw({ -> parse("ok") })
*/
pub fn assert_no_throw(body, message = nil) -> any {
// NB: assign-then-return rather than `return body()` inside the try.
// A throw inside `return <expr>` within a try is not observed by the
// local catch, so the assign-then-return shape is required for the
// catch to fire.
var value = nil
try {
value = body()
} catch (e) {
require false, message ?? ("assert_no_throw: expected the body not to throw, but it threw: " + to_string(e))
}
return value
}
// -------------------------------------------------------------------------------------------------
// Filesystem fixtures
//
// A scoped temp workspace with guaranteed cleanup — the fs counterpart to
// `with_host_mocks`. `with_fs` additionally seeds the workspace from a
// `{ relative_path: contents }` dict so a test states its starting tree
// declaratively instead of hand-writing mkdir/write/cleanup.
// -------------------------------------------------------------------------------------------------
fn __testing_fs_content(value) -> string {
if type_of(value) == "string" {
return value
}
return json_stringify(value)
}
/**
* Create a unique temp directory, pass its path to `body`, and delete it
* recursively on exit — even if `body` throws. Returns whatever `body`
* returns.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: with_temp_dir({ dir -> harness.fs.write_text(dir + "/x", "1") })
*/
pub fn with_temp_dir(body) -> any {
let dir = harness.fs.temp_dir() + "/harn-test-" + uuid_v7()
harness.fs.mkdir(dir)
try {
let result = body(dir)
harness.fs.delete(dir)
return result
} catch (e) {
harness.fs.delete(dir)
throw e
}
}
/**
* Like `with_temp_dir`, but first seeds the workspace from `files`, a
* `{ relative_path: contents }` dict. String contents are written
* verbatim; any other value is JSON-encoded. Parent directories are
* created automatically, so `{ "a/b/c.txt": "hi" }` just works. The temp
* directory path is passed to `body` and removed on exit.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: with_fs({"in.txt": "hello"}, { dir -> harness.fs.read_text(dir + "/in.txt") })
*/
pub fn with_fs(files, body) -> any {
return with_temp_dir(
{ dir ->
for entry in entries(files ?? {}) {
let full = dir + "/" + entry.key
harness.fs.mkdir(dirname(full))
harness.fs.write_text(full, __testing_fs_content(entry.value))
}
body(dir)
},
)
}
/**
* One declarative block for tests that mix mocks and a seeded workspace.
* `config` accepts:
* * `host` — host-mock entries (same shape as `with_host_mocks`)
* * `llm` — LLM-mock turns (build them with the `llm_*` constructors)
* * `fs` — a `{ relative_path: contents }` seed dict (see `with_fs`)
* * `temp_dir` — set `true` for an empty temp workspace without `fs`
*
* `body` receives a context dict `{dir}` (the temp directory, or nil when
* neither `fs` nor `temp_dir` is requested). All scopes are torn down in
* reverse order on exit, including on a thrown error.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: with_scenario({llm: [llm_done()], fs: {"a.txt": "1"}}, { s -> run(s.dir) })
*/
pub fn with_scenario(config, body) {
let mock_config = {host_mocks: config?.host ?? [], llm_mocks: config?.llm ?? []}
if config?.fs != nil {
return with_fs(config.fs, { dir -> with_mocks(mock_config, { _ -> body({dir: dir}) }) })
}
if config?.temp_dir {
return with_temp_dir({ dir -> with_mocks(mock_config, { _ -> body({dir: dir}) }) })
}
return with_mocks(mock_config, { _ -> body({dir: nil}) })
}