harn-stdlib 0.8.3

/** std/testing — helpers for writing Harn tests. */
var __testing_step_events = []

fn params_subset_match(expected, actual) {
  if expected == nil {
    return true
  }
  if type_of(expected) != "dict" || type_of(actual) != "dict" {
    return expected == actual
  }
  for entry in entries(expected) {
    if actual[entry.key] != entry.value {
      return false
    }
  }
  return true
}

/** clear_host_mocks. */
pub fn clear_host_mocks() {
  return host_mock_clear()
}

/** mock_host_result. */
pub fn mock_host_result(cap: string, op: string, result, params) {
  if params == nil {
    return host_mock(cap, op, result)
  }
  return host_mock(cap, op, result, params)
}

/** mock_host_error. */
pub fn mock_host_error(cap: string, op: string, message: string, params) {
  let config = {error: message}
  if params == nil {
    return host_mock(cap, op, config)
  }
  return host_mock(cap, op, config + {params: params})
}

/** mock_host_response. */
pub fn mock_host_response(cap: string, op: string, config) {
  return host_mock(cap, op, config)
}

/** host_calls. */
pub fn host_calls() {
  return host_mock_calls()
}

/** host_calls_for. */
pub fn host_calls_for(cap: string, op: string) {
  return host_mock_calls()
    .filter({ call -> return call?.capability == cap && call?.operation == op })
}

/** host_call_count. */
pub fn host_call_count() -> int {
  return len(host_mock_calls())
}

/** host_call_count_for. */
pub fn host_call_count_for(cap: string, op: string) -> int {
  return len(host_calls_for(cap, op))
}

/** host_was_called. */
pub fn host_was_called(cap: string, op: string, expected_params) -> bool {
  for call in host_calls_for(cap, op) {
    if params_subset_match(expected_params, call?.params) {
      return true
    }
  }
  return false
}

/** assert_host_called. */
pub fn assert_host_called(cap: string, op: string, params = nil, message = nil) {
  if host_was_called(cap, op, params) {
    return nil
  }
  let default_message = if params == nil {
    "Expected host call " + cap + "." + op + " to be recorded"
  } else {
    "Expected host call " + cap + "." + op + " with params " + to_string(params) + " to be recorded"
  }
  require false, message ?? default_message
  return nil
}

/** assert_host_call_count. */
pub fn assert_host_call_count(expected_count: int, cap: string, op: string, message = nil) {
  let actual_count = host_call_count_for(cap, op)
  let default_message = "Expected " + to_string(expected_count) + " host calls, got " + to_string(actual_count)
  require actual_count == expected_count, message ?? default_message
  return actual_count
}

/** assert_no_host_calls. */
pub fn assert_no_host_calls(message = nil) {
  let actual_count = host_call_count()
  require actual_count == 0, message ?? "Expected no host calls, got " + to_string(actual_count)
  return actual_count
}

/**
 * Apply a single host-mock fixture entry. Each entry is a dict with:
 *   { capability, operation, result?, error?, params? }
 * `error` (string) takes precedence over `result` so an entry may
 * either mock a successful response or an exception, never both.
 */
fn apply_host_mock_entry(entry) {
  if type_of(entry) != "dict" {
    throw "with_host_mocks: each entry must be a dict, got " + type_of(entry)
  }
  let cap = entry?.capability
  let op = entry?.operation
  if cap == nil || op == nil {
    throw "with_host_mocks: each entry must include 'capability' and 'operation'"
  }
  if entry?.error != nil {
    return mock_host_error(cap, op, entry.error, entry?.params)
  }
  return mock_host_result(cap, op, entry?.result, entry?.params)
}

/**
 * Run `body` with a fresh host-mock scope. Each entry in `mocks` is
 * applied via `mock_host_result` / `mock_host_error`, the body runs,
 * and the prior host-mock state plus call log is restored on exit —
 * even if the body throws. Nested scopes stack: an inner
 * `with_host_mocks` does not leak into the outer scope.
 *
 * Returns whatever `body` returns. Re-raises any thrown error after
 * cleanup.
 */
pub fn with_host_mocks(mocks, body) {
  host_mock_push_scope()
  // Registration runs inside the same try/catch as the body so a
  // malformed entry takes the cleanup path instead of leaking the
  // pushed scope.
  try {
    for entry in mocks ?? [] {
      apply_host_mock_entry(entry)
    }
    let result = __testing_call_body(body)
    host_mock_pop_scope()
    return result
  } catch (e) {
    host_mock_pop_scope()
    throw e
  }
}

fn __testing_is_callable(value) -> bool {
  let kind = type_of(value)
  return kind == "function" || kind == "closure" || kind == "fn"
}

fn __testing_step_event_record(ctx) {
  var record = {
    event: ctx?.event,
    target: ctx?.target,
    persona: ctx?.persona,
    step_name: ctx?.step?.name,
    function: ctx?.step?.function,
    args: ctx?.step?.args ?? [],
  }
  if ctx?.output != nil {
    record = record + {output: ctx.output}
  }
  return record
}

fn __testing_step_events_for(kind) {
  var out = []
  for event in __testing_step_events {
    if event.event == kind {
      out = out.push(event)
    }
  }
  return out
}

fn __testing_event_matches(candidate, predicate) -> bool {
  if predicate == nil {
    return true
  }
  if __testing_is_callable(predicate) {
    return predicate(candidate)
  }
  if type_of(predicate) == "dict" {
    return params_subset_match(predicate, candidate)
  }
  return candidate == predicate
}

fn __testing_handoff_target_matches(handoff, target) -> bool {
  if target == nil {
    return true
  }
  let resolved = handoff?.target_persona_or_human ?? {}
  return resolved?.id == target
    || resolved?.label == target
    || resolved?.kind == target
    || handoff?.target == target
    || handoff?.target_persona == target
}

fn __testing_golden_match(expected, actual) -> bool {
  if expected == "<ms>" {
    let kind = type_of(actual)
    return kind == "int" || kind == "float"
      || (kind == "string" && len(regex_captures("^[0-9]+$", actual)) == 1)
  }
  if expected == "<uuid>" {
    return type_of(actual) == "string"
      && len(
      regex_captures(
        "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$",
        actual,
      ),
    )
      == 1
  }
  if expected == "<any>" {
    return actual != nil
  }
  if type_of(expected) != type_of(actual) {
    return false
  }
  if type_of(expected) == "dict" {
    for entry in entries(expected) {
      if !__testing_golden_match(entry.value, actual[entry.key]) {
        return false
      }
    }
    return true
  }
  if type_of(expected) == "list" {
    if len(expected) != len(actual) {
      return false
    }
    var idx = 0
    while idx < len(expected) {
      if !__testing_golden_match(expected[idx], actual[idx]) {
        return false
      }
      idx += 1
    }
    return true
  }
  return expected == actual
}

/** llm_calls. Returns the LLM mock call log for the current scope. */
pub fn llm_calls() {
  return llm_mock_calls()
}

/** llm_call_count. Number of LLM calls recorded in the current scope. */
pub fn llm_call_count() -> int {
  return len(llm_mock_calls())
}

/**
 * Run `body` with a fresh LLM-mock scope. Each entry in `mocks` is
 * pushed via `llm_mock(...)` and consumed in order by the body. The
 * prior LLM-mock queue plus call log is restored on exit — even if
 * the body throws. Nested scopes stack the same way as
 * `with_host_mocks`.
 *
 * Returns whatever `body` returns. Re-raises any thrown error after
 * cleanup.
 */
pub fn with_llm_mocks(mocks, body) {
  llm_mock_push_scope()
  try {
    for entry in mocks ?? [] {
      if type_of(entry) != "dict" {
        throw "with_llm_mocks: each entry must be a dict, got " + type_of(entry)
      }
      llm_mock(entry)
    }
    let result = __testing_call_body(body)
    llm_mock_pop_scope()
    return result
  } catch (e) {
    llm_mock_pop_scope()
    throw e
  }
}

/**
 * Unified scoped fixture for tests that mix host and LLM mocks.
 * `config` is a dict with optional `host_mocks` and `llm_mocks` lists,
 * each shaped like the entries accepted by `with_host_mocks` and
 * `with_llm_mocks`. Both scopes are pushed before the body runs and
 * popped (in reverse order) after — including on thrown errors.
 */
pub fn with_mocks(config, body) {
  let host_entries = config?.host_mocks ?? []
  let llm_entries = config?.llm_mocks ?? []
  host_mock_push_scope()
  llm_mock_push_scope()
  // Wrap registration *and* body together so a malformed entry
  // (e.g. a missing capability) still triggers the same restore path
  // that a thrown body would. Without this, an exception during
  // registration would leak both pushed scopes.
  try {
    for entry in host_entries {
      apply_host_mock_entry(entry)
    }
    for entry in llm_entries {
      if type_of(entry) != "dict" {
        throw "with_mocks: each llm_mocks entry must be a dict, got " + type_of(entry)
      }
      llm_mock(entry)
    }
    let result = __testing_call_body(body)
    llm_mock_pop_scope()
    host_mock_pop_scope()
    return result
  } catch (e) {
    llm_mock_pop_scope()
    host_mock_pop_scope()
    throw e
  }
}

/** Clear recorded persona step events. */
pub fn step_events_clear() {
  __testing_step_events = []
  return nil
}

/** Return recorded PreStep/PostStep payloads captured by step_assertions_begin. */
pub fn step_events() {
  return __testing_step_events
}

/**
 * Start recording persona step hook payloads for Harn-level tests.
 *
 * This resets existing persona hooks because the helper owns the hook
 * registrations during a conformance fixture. Register any fixture-specific
 * hooks after calling this helper.
 */
pub fn step_assertions_begin(persona_pattern = "*") {
  clear_persona_hooks()
  step_events_clear()
  register_persona_hook(
    persona_pattern,
    "PreStep",
    { ctx ->
      __testing_step_events = __testing_step_events.push(__testing_step_event_record(ctx))
      return nil
    },
  )
  register_persona_hook(
    persona_pattern,
    "PostStep",
    { ctx ->
      __testing_step_events = __testing_step_events.push(__testing_step_event_record(ctx))
      return nil
    },
  )
  return nil
}

/** Stop recording persona step hook payloads. */
pub fn step_assertions_end() {
  clear_persona_hooks()
  return nil
}

/** Assert the exact ordered list of @step names that ran. */
pub fn assert_steps_ran(expected_names, message = nil) {
  var actual = []
  for event in __testing_step_events_for("PreStep") {
    actual = actual.push(event.step_name)
  }
  for name in expected_names {
    require contains(actual, name), message ?? "step '" + name + "' not callable: not annotated `@step`"
  }
  require actual == expected_names, message ?? "step order mismatch"
  return actual
}

/** Assert that a step received an input matching a closure, dict subset, or value. */
pub fn assert_step_received(step_name, predicate = nil, message = nil) {
  for event in __testing_step_events_for("PreStep") {
    if event.step_name == step_name && __testing_event_matches(event, predicate) {
      return event
    }
  }
  require false, message ?? "step '" + step_name + "' did not receive the expected input"
  return nil
}

/** Assert that a step emitted an output matching a closure, dict subset, or value. */
pub fn assert_step_emitted(step_name, predicate = nil, message = nil) {
  for event in __testing_step_events_for("PostStep") {
    if event.step_name == step_name && __testing_event_matches(event, predicate) {
      return event
    }
  }
  require false, message ?? "step '" + step_name + "' did not emit the expected output"
  return nil
}

/** Assert that a handoff list or run record contains a handoff of kind and optional target. */
pub fn assert_handoff_emitted(source, handoff_kind, target = nil) {
  let handoffs = source?.handoffs ?? source
  for item in handoffs ?? [] {
    if item?.kind == handoff_kind && __testing_handoff_target_matches(item, target) {
      return item
    }
  }
  let suffix = if target == nil {
    ""
  } else {
    " to " + to_string(target)
  }
  require false, "handoff '" + handoff_kind + "'" + suffix + " was not emitted"
  return nil
}

/** Assert an RFC 6901 JSON Pointer field inside a receipt or envelope. */
pub fn assert_receipt_field(receipt, path, value) {
  let actual = json_pointer(receipt, path)
  require actual == value, "receipt field " + path + " mismatch"
  return actual
}

/**
 * Assert a structured golden against an actual transcript-like value.
 *
 * The expected value may contain `<ms>`, `<uuid>`, or `<any>` sentinel strings.
 * Dict expected values are subset matches so tests can preserve the payload
 * fields that matter without pinning every implementation detail.
 */
pub fn assert_golden_transcript(expected, actual, message = nil) {
  require __testing_golden_match(expected, actual), message ?? "golden transcript mismatch"
  return actual
}