harn-stdlib 0.8.98

// std/agent/judge_internals — private helpers shared by `std/agent/judge`
// (verify_completion + done_judge) and `std/agent/step_judge`. NOT
// intended as a public surface: every export is prefixed `__judge_` and
// the file is excluded from the docs sweep. Consolidates the bits that
// used to be copy-pasted between the two callers (LLM-options
// overrides, structured-output schema field names, and the
// "raw_verdict -> {vetoed, feedback}" classifier).
/**
 * Keys on `judge_cfg` that override `opts.llm_options` when present.
 * Listed once here so adding (or renaming) an LLM tuning knob is a
 * single-line change instead of a 2x sweep.
 */
const __JUDGE_LLM_OVERRIDE_KEYS = ["temperature", "max_tokens", "top_p", "tool_format", "reasoning_effort"]

/**
 * Apply per-judge LLM overrides on top of an already-built `llm_opts`
 * dict. Mirrors the `for key in [...]` block that lived inline at both
 * call sites before v0.8.43.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: internal
 * @example: __judge_apply_llm_overrides(llm_opts, judge_cfg)
 */
pub fn __judge_apply_llm_overrides(llm_opts, judge_cfg) {
  var out = llm_opts
  for key in __JUDGE_LLM_OVERRIDE_KEYS {
    if judge_cfg[key] != nil {
      out = out + {[key]: judge_cfg[key]}
    }
  }
  return out
}

/**
 * JSON structural characters can never be part of a legitimate verdict
 * token, so a captured verdict containing one was mangled upstream.
 */
const __JUDGE_VERDICT_JSON_JUNK = ["\"", ",", "{", "}", ":", "\\"]

/**
 * Normalize a captured judge verdict to its leading token. Structured
 * judges occasionally emit sloppy JSON (double commas, run-on key/value
 * pairs) that the structured-call repair layer salvages by capturing
 * trailing JSON junk into the verdict string — observed live in
 * `judge_decision` events as `continue",,` and `continue",  "reasoning":`.
 * Cut at the first JSON structural character and trim; verdicts without
 * JSON junk (including multi-word prose verdicts) pass through unchanged.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: internal
 * @example: __judge_verdict_token("continue\",,")
 */
pub fn __judge_verdict_token(raw_verdict) {
  let normalized = lowercase(trim(to_string(raw_verdict ?? "")))
  var cut = len(normalized)
  for junk in __JUDGE_VERDICT_JSON_JUNK {
    let idx = normalized.index_of(junk)
    if idx >= 0 && idx < cut {
      cut = idx
    }
  }
  if cut == len(normalized) {
    return normalized
  }
  return trim(normalized[0:cut])
}

/**
 * Classify a raw verdict string against an allow-list of pass tokens and
 * compose the `{vetoed, feedback?}` outcome. `feedback_candidates` is
 * tried in order: the first non-nil, non-empty entry wins. Falls back
 * to `feedback_default` when nothing else is set.
 *
 * Used by `agent_step_judge` (pass tokens like "pass"/"yes"/"approve") and
 * by `agent_verify_or_continue` (pass tokens like "done"/"complete").
 * The raw verdict is normalized through `__judge_verdict_token` first, so
 * a verdict that captured trailing JSON junk still classifies correctly
 * and the stored/emitted `verdict` field carries the clean token.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: internal
 * @example: __judge_classify_verdict("pass", ["pass", "yes"], [critique], default)
 */
pub fn __judge_classify_verdict(raw_verdict, pass_tokens, feedback_candidates, feedback_default) {
  let normalized = __judge_verdict_token(raw_verdict)
  if contains(pass_tokens, normalized) {
    return {vetoed: false, verdict: normalized}
  }
  var feedback = ""
  for candidate in feedback_candidates {
    if feedback == "" && candidate != nil && to_string(candidate) != "" {
      feedback = to_string(candidate)
    }
  }
  if feedback == "" {
    feedback = feedback_default
  }
  return {vetoed: true, feedback: feedback, verdict: normalized}
}