harn-stdlib 0.8.128

// @harn-entrypoint-category llm.stdlib
//
// std/llm/safe — DRY consolidations for envelope-shaped llm_call results,
// case-insensitive dict access, and judge-payload reconstruction.
import { agent_session_messages } from "std/agent/state"

/**
 * Try-wrap llm_call into the canonical envelope shape:
 * {ok: true, value: <llm dict>} on success
 * {ok: false, status: <"budget_exhausted" | "exception">, error?} on error.
 *
 * @effects: [llm.call]
 * @errors: []
 */
pub fn safe_call(prompt, system, options) {
  let result = try {
    llm_call(prompt, system, options)
  }
  if !is_err(result) {
    return {ok: true, value: unwrap(result)}
  }
  let err = unwrap_err(result)
  let reason = if type_of(err) == "dict" {
    err?.reason ?? ""
  } else {
    ""
  }
  if reason == "budget_exceeded" {
    return {ok: false, status: "budget_exhausted", error: err}
  }
  return {ok: false, status: "exception", error: err}
}

/**
 * Direct case-insensitive single-key lookup on a dict. Returns nil on miss.
 *
 * @effects: []
 * @errors: []
 */
pub fn dict_get_ci(d, key) {
  if type_of(d) != "dict" {
    return nil
  }
  let target = lowercase(to_string(key))
  for k in d.keys() {
    if lowercase(to_string(k)) == target {
      return d[k]
    }
  }
  return nil
}

fn __value_is_present(value) {
  if value == nil {
    return false
  }
  let kind = type_of(value)
  if kind == "string" {
    return value != ""
  }
  if kind == "list" {
    return len(value) > 0
  }
  if kind == "dict" {
    return len(value.keys()) > 0
  }
  return true
}

/**
 * Case-insensitive top-level dict lookup. Tries each name in order; returns
 * the first non-nil non-empty value, else default. Top-level keys only.
 *
 * @effects: []
 * @errors: []
 */
pub fn safe_field(envelope, names, default) {
  if type_of(envelope) != "dict" {
    return default
  }
  if type_of(names) != "list" {
    return default
  }
  for name in names {
    let value = dict_get_ci(envelope, name)
    if __value_is_present(value) {
      return value
    }
  }
  return default
}

/**
 * Recursively normalize all dict keys to lowercase. Lists pass through
 * unchanged, but dicts within lists are recursed into. Idempotent.
 *
 * @effects: []
 * @errors: []
 */
pub fn with_case_insensitive_keys(envelope) {
  if type_of(envelope) == "dict" {
    var out = {}
    for k in envelope.keys() {
      let new_key = lowercase(to_string(k))
      out = out + {[new_key]: with_case_insensitive_keys(envelope[k])}
    }
    return out
  }
  if type_of(envelope) == "list" {
    var out = []
    for item in envelope {
      out = out.push(with_case_insensitive_keys(item))
    }
    return out
  }
  return envelope
}

/**
 * Merges defaults UNDER the envelope's data field. The envelope wins per-key.
 * If envelope.ok is false or envelope is nil, returns {ok: false, ...defaults}.
 *
 * @effects: []
 * @errors: []
 */
pub fn structured_envelope_or_default(envelope, defaults) {
  let base = if type_of(defaults) == "dict" {
    defaults
  } else {
    {}
  }
  if envelope == nil {
    return {ok: false} + base
  }
  if type_of(envelope) != "dict" {
    return {ok: false} + base
  }
  if !(envelope?.ok ?? false) {
    return {ok: false} + base + envelope
  }
  let data = if type_of(envelope?.data) == "dict" {
    envelope.data
  } else {
    {}
  }
  return envelope + {data: base + data}
}

fn __unique_names(names) {
  var unique = []
  for name in names {
    if name != "" && !contains(unique, name) {
      unique = unique.push(name)
    }
  }
  return unique
}

fn __session_tool_names(messages) {
  var names = []
  for message in messages {
    if message?.role == "tool" {
      names = names.push(message?.name ?? "")
    }
  }
  return __unique_names(names)
}

/**
 * Build the canonical judge payload. Mirrors agent/judge.__judge_payload but
 * callable from non-agent_loop contexts (e.g. parallel_judge in ensemble).
 * Returns {session_id, task, stop_reason, text, visible_text, last_text,
 * transcript, all_tools_used, successful_tools_used, iteration}.
 *
 * @effects: []
 * @errors: []
 */
pub fn judge_payload(session, _opts, stop_reason, text, iteration) {
  let session_id = session?.session_id ?? ""
  let messages = if session_id != "" {
    let m = try {
      agent_session_messages(session_id)
    }
    if is_err(m) {
      []
    } else {
      unwrap(m)
    }
  } else {
    []
  }
  let tool_names = __session_tool_names(messages)
  return {
    session_id: session_id,
    task: session?.task ?? "",
    stop_reason: stop_reason,
    text: text,
    visible_text: text,
    last_text: text,
    transcript: json_stringify(messages),
    all_tools_used: join(tool_names, ", "),
    successful_tools_used: join(tool_names, ", "),
    iteration: iteration,
  }
}

/**
 * Normalize a judge verdict string. Lowercases, trims, optionally maps via
 * alias_groups (a list of {canonical, aliases}). Returns the canonical form
 * or the lowered/trimmed original.
 *
 * @effects: []
 * @errors: []
 */
pub fn verdict_normalize(text, alias_groups) {
  let normalized = lowercase(trim(to_string(text ?? "")))
  if alias_groups == nil || type_of(alias_groups) != "list" || len(alias_groups) == 0 {
    return normalized
  }
  for group in alias_groups {
    let {canonical = "", aliases = []} = group ?? {}
    if canonical != "" && type_of(aliases) == "list" && contains(aliases, normalized) {
      return canonical
    }
  }
  return normalized
}

/**
 * Build a deterministic schema-retry nudge string from a JSON Schema.
 * Lists required fields (sorted), enforces lowercase keys, no markdown,
 * no fences. Optional hint string is appended.
 *
 * @effects: []
 * @errors: []
 */
pub fn schema_retry_nudge_for(schema, hint) {
  let required = if type_of(schema) == "dict" && type_of(schema?.required) == "list" {
    schema.required
  } else {
    []
  }
  let sorted_required = required.sort()
  let required_line = if len(sorted_required) > 0 {
    "Required keys (lowercase): " + join(sorted_required, ", ") + "."
  } else {
    "No required keys are declared."
  }
  var lines = [
    "Your previous response did not pass schema validation.",
    "Re-emit valid JSON only:",
    "- Use lowercase keys exactly as specified.",
    "- Do not wrap in markdown fences.",
    "- Do not include prose, commentary, or trailing text.",
    required_line,
  ]
  if hint != nil && to_string(hint) != "" {
    lines = lines.push("Hint: " + to_string(hint))
  }
  return join(lines, "\n")
}

/**
 * One-shot structured-output helper that bundles schema retries, an
 * automatic repair pass, judge-friendly defaults, and case-insensitive
 * key normalization on the result.
 *
 * Replaces the recurring 80-120 LOC structured-output dance in judges
 * and analyzers. Returns the canonical envelope from
 * `llm_call_structured_result` augmented with `value` (alias for
 * lowercase-key-normalized `data`) and `ok` already populated. Callers
 * dispatch on `result.ok` and read structured fields off `result.value`.
 *
 * Conceptually equivalent to the structured-output preset
 *
 *     compose([with_coerce({})])(__structured_caller(schema))
 *
 * with `__apply_judge_defaults` baking in judge-friendly options before
 * the call. Schema retries + the repair pass are owned by
 * `llm_call_structured_result` (the structured base caller), so this
 * function is the canonical preset; it stays in `std/llm/safe` rather
 * than `std/llm/handlers` because callers consume it as a one-shot,
 * not as a caller-seam middleware.
 *
 * Defaults applied when the corresponding option is unset:
 *   temperature      -> 0.0
 *   schema_retries   -> 2
 *   repair.enabled   -> true
 *   repair.max_tokens-> 600
 *   repair.temperature -> 0.0
 *   schema_retry_nudge -> derived via `schema_retry_nudge_for`
 *   max_tokens       -> floored to `STRUCTURED_MIN_MAX_TOKENS` (512)
 *
 * The `max_tokens` FLOOR is a measurement-integrity guard, not a tuning knob:
 * a judge/router structured call that goes out with a tiny budget (e.g. the
 * historical `max_tokens: 180` completion-judge default) truncates mid-object
 * on a reasoning model — which spends part of the same output budget on its
 * reasoning channel — yielding unparseable JSON and a silent dead-judge
 * fall-through. Flooring (never lowering) the budget keeps the JSON bounded by
 * the schema yet large enough to finish. Provider-generic: it never inspects
 * the provider or model.
 *
 * @effects: []
 * @errors: []
 */
pub fn safe_structured_call(prompt, schema, options) {
  let user_options = if type_of(options) == "dict" {
    options
  } else {
    {}
  }
  let resolved = __apply_judge_defaults(schema, user_options)
  let envelope = llm_call_structured_result(prompt, schema, resolved)
  return __augment_envelope(envelope)
}

/**
 * Minimum output-token budget for a structured judge/router call. A call that
 * goes out under this floor risks truncating its JSON mid-object on a reasoning
 * model (the reasoning channel shares the same `max_tokens` budget), which then
 * masquerades as a dead-judge abstention. The floor only RAISES an unset or
 * too-small budget; an explicit larger `max_tokens` (e.g. a 1200-token rubric
 * judge) is left untouched.
 */
let STRUCTURED_MIN_MAX_TOKENS = 512

fn __apply_judge_defaults(schema, options) {
  var resolved = options
  let requested_max_tokens = to_int(resolved?.max_tokens ?? 0) ?? 0
  if requested_max_tokens < STRUCTURED_MIN_MAX_TOKENS {
    resolved = resolved + {max_tokens: STRUCTURED_MIN_MAX_TOKENS}
  }
  if resolved?.temperature == nil {
    resolved = resolved + {temperature: 0.0}
  }
  if resolved?.schema_retries == nil {
    resolved = resolved + {schema_retries: 2}
  }
  if resolved?.schema_retry_nudge == nil {
    resolved = resolved + {schema_retry_nudge: schema_retry_nudge_for(schema, nil)}
  }
  let user_repair = if type_of(resolved?.repair) == "dict" {
    resolved.repair
  } else {
    {}
  }
  var repair = user_repair
  if repair?.enabled == nil {
    repair = repair + {enabled: true}
  }
  if repair?.max_tokens == nil {
    repair = repair + {max_tokens: 600}
  }
  if repair?.temperature == nil {
    repair = repair + {temperature: 0.0}
  }
  return resolved + {repair: repair}
}

/**
 * Single-shot context-overflow detection on a structured/llm error.
 *
 * A single-shot structured call (judge, router, classifier, cached lookup) has
 * NO live session transcript, so it cannot emergency-compact-and-retry the way
 * the agent loop does — there is nothing to mask. The least-surprising thing we
 * CAN do is surface the condition TRANSPARENTLY so a harness author sees
 * "this call overflowed the model's window" instead of a generic `error` that
 * looks like a dead judge or a flaky 400.
 *
 * Mirrors the Rust classifier's `[context_overflow]` reason tag and the
 * structured `reason`/`error_category` fields, so detection stays consistent
 * with the agent-loop recovery path and is provider-agnostic.
 *
 * @effects: []
 * @errors: []
 */
pub fn is_context_overflow_error(err) {
  if type_of(err) == "dict" {
    let reason = to_string(err?.reason ?? "")
    if reason == "context_overflow" {
      return true
    }
    let category = to_string(err?.error_category ?? "")
    if category == "context_overflow" {
      return true
    }
    let message = to_string(err?.message ?? "") + " " + to_string(err?.error ?? "")
    return contains(message, "[context_overflow]")
  }
  return contains(to_string(err), "[context_overflow]")
}

fn __augment_envelope(envelope) {
  if type_of(envelope) != "dict" {
    let exc_status = if is_context_overflow_error(envelope) {
      "context_overflow"
    } else {
      "exception"
    }
    return {ok: false, status: exc_status, value: {}, error: envelope}
  }
  let ok_flag = envelope?.ok ?? false
  let data = if type_of(envelope?.data) == "dict" {
    envelope.data
  } else {
    {}
  }
  let normalized = with_case_insensitive_keys(data)
  let status = if ok_flag {
    "ok"
  } else if is_context_overflow_error(envelope) {
    "context_overflow"
  } else {
    let category = envelope?.error_category
    if category != nil {
      to_string(category)
    } else {
      "error"
    }
  }
  return envelope + {value: normalized, status: status}
}
// Distinct, machine-readable status so callers/harness authors can SEE that
// a no-session single-shot call hit the context window (vs. a generic 400 or
// a dead judge). The agent loop recovers overflow via compaction; this path
// cannot, but it must never be silent.