harn-stdlib 0.8.112

import { agent_emit_event, agent_session_inject_feedback } from "std/agent/state"

// Deterministic thrash / degenerate-loop detector over the agent's
// action/observation stream (#2712, part of the #2708 cheap-model
// reliability stack).
//
// This is the deterministic GATE in the reliability stack. An LLM
// "are we thrashing?" self-evaluation is unreliable on its own: intrinsic
// self-evaluation degrades otherwise-correct work (Huang et al., ICLR 2024;
// Kamoi et al., TACL 2024). So the advisory judge (consulted via
// `loop.harn`'s `stall_judge_due` path) must NEVER fire on its own — it is
// consulted only AFTER this detector trips, and even then it stays advisory.
//
// Compose, do not duplicate: every trip is surfaced through the EXISTING
// `agent_loop_stall_warning` event (the `warning` payload is passed through
// to the host verbatim, so enriching it needs no Rust change), the advisory
// judge stays gated on `stall_warning != nil`, and a hard stop surfaces
// through the loop's existing terminal `stuck` / `loop_control_decision`
// path. We deliberately avoid a parallel Rust detector or a new hook surface.
//
// Trip conditions (each behind a named, tunable threshold; mirrors the
// design reference's defaults):
//
//   1. same action -> byte-identical observation, repeated >= 4 times. This
//      SUBSUMES the old adjacent-same-signature logic with an
//      observation-signature refinement.
//   2. same action -> same error, repeated >= 3 times.
//   3. no-progress monologue: >= 3 consecutive assistant turns with text but
//      no tool call.
//   4. ping-pong: A,B,A,B alternation between two actions >= 6 cycles.
//   5. repeated context-window / token-limit errors >= 2 times.
//
// The polling / retry exemption (principled, not an allowlist; the fix for
// OpenHands #5355): a repeated identical action only counts toward conditions
// (1) and (4) when its observation signature is BYTE-IDENTICAL across
// repeats. Legitimate polling produces a changing observation (the status
// advances, the watched file mutates), which resets the streak, so polling
// never trips. The `exempt_tools` allowlist still works for back-compat but
// is now a SECONDARY escape hatch — the observation-signature principle is
// the primary mechanism.
// same action -> identical observation repeats before tripping.
const STALL_REPEAT_SAME_OBSERVATION = 4

// same action -> same error repeats before tripping.
const STALL_REPEAT_SAME_ERROR = 3

// consecutive no-progress agent messages (text, no tool call) before tripping.
const STALL_NO_PROGRESS_MESSAGES = 3

// ping-pong cycles (A,B,A,B,...) between two actions before tripping.
const STALL_PING_PONG_CYCLES = 6

// repeated context-window / token-limit errors before tripping.
const STALL_REPEAT_CONTEXT_WINDOW_ERROR = 2

// Evidence-aware repair loop (#repair-diagnostics): default number of repair
// turns the SAME diagnostic may recur (same signature, no intervening edit)
// before the detector trips the "stuck_same_diagnostic" strategy-shift nudge.
const STALL_STUCK_SAME_DIAGNOSTIC_AFTER = 3

// consecutive un-recovered trips before a hard stop is recommended. A trip
// "recovers" when a turn produces a step that does not itself trip (the agent
// broke the loop). Past this many back-to-back trips the detector flags
// `hard_stop` so the loop can surface a terminal stuck stop instead of
// nudging forever.
const STALL_HARD_STOP_AFTER_TRIPS = 3

// Tools that report progress without changing task state. A turn whose only
// tool call is one of these is "narrating progress without making it": it is
// counted as a no-progress turn for stall accounting, so a run of them (e.g. a
// model spamming `agent_progress` after it gets stuck) trips the no-progress
// detector instead of masquerading as activity. A turn that also makes a real
// tool call is unaffected — the real call carries the action stream.
const SOFT_PROGRESS_TOOLS = ["agent_progress"]

/**
 * Evidence-aware repair loop (#repair-diagnostics), spanning AgentStallConfig /
 * AgentStallWarning / AgentStallState below. Default OFF: when `repair_aware`
 * is false every repair field is inert and the detector behaves exactly as
 * before. When ON, the loop tracks a current-failure model (signature + epoch)
 * instead of a blind counter, forces a post-edit re-verify, and nudges a
 * strategy shift after the same diagnostic recurs across
 * `stuck_same_diagnostic_after` repair turns.
 *   - Config knobs: repair_aware / stuck_same_diagnostic_after / post_edit_reverify.
 *   - Warning extras (only on the "stuck_same_diagnostic" pattern):
 *     diagnostic_class / diagnostic_signature / diagnostic_snippet.
 *   - State (the current-failure model — folded by __agent_stall_fold_diagnostic
 *     from the turn's verification result, and deliberately NOT cleared by
 *     __agent_stall_reset_action so a different corrective action does not clear
 *     "same root failure"): last_diagnostic_class / last_diagnostic_signature /
 *     last_diagnostic_snippet / write_epoch / same_diagnostic_streak /
 *     edit_since_failure / reverify_owed.
 */
type AgentStallConfig = {
  enabled: bool,
  threshold: int,
  inject_feedback: bool,
  max_feedback: int,
  exempt_tools: list,
  include_arguments: bool,
  repeat_same_observation: int,
  repeat_same_error: int,
  no_progress_messages: int,
  ping_pong_cycles: int,
  repeat_context_window_error: int,
  hard_stop_after_trips: int,
  repair_aware: bool,
  stuck_same_diagnostic_after: int,
  post_edit_reverify: bool,
}

type AgentStallWarning = {
  iteration: int,
  tool_name: string,
  repeat_count: int,
  threshold: int,
  arguments_digest: string,
  signature_digest: string,
  pattern: string,
  signature: string,
  count: int,
  consecutive_trips: int,
  hard_stop: bool,
  arguments?: dict,
  diagnostic_class?: string,
  diagnostic_signature?: string,
  diagnostic_snippet?: string,
}

type AgentStallState = {
  last_signature: string,
  streak: int,
  warnings: list<AgentStallWarning>,
  repeated_tool_calls: int,
  feedback_count: int,
  last_observation_signature: string,
  last_outcome_kind: string,
  same_observation_streak: int,
  same_error_streak: int,
  context_window_error_streak: int,
  no_progress_streak: int,
  ping_pong_a: string,
  ping_pong_b: string,
  ping_pong_alternations: int,
  consecutive_trips: int,
  hard_stop: bool,
  last_diagnostic_class: string,
  last_diagnostic_signature: string,
  last_diagnostic_snippet: string,
  write_epoch: int,
  same_diagnostic_streak: int,
  edit_since_failure: bool,
  reverify_owed: bool,
}

type AgentStallObservation = {
  state: AgentStallState,
  enabled: bool,
  warning: AgentStallWarning?,
  feedback_deferred: bool,
  config: AgentStallConfig,
  hard_stop: bool,
}

fn __agent_stall_bool(value, fallback: bool, field: string) -> bool {
  if value == nil {
    return fallback
  }
  if type_of(value) == "bool" {
    return value
  }
  throw "agent_loop: stall_diagnostics." + field + " must be a bool; got " + type_of(value)
}

fn __agent_stall_list(value, field: string) -> list {
  if value == nil {
    return []
  }
  if type_of(value) == "list" {
    return value
  }
  throw "agent_loop: stall_diagnostics." + field + " must be a list; got " + type_of(value)
}

fn __agent_stall_int(value, fallback: int, minimum: int) -> int {
  if type_of(value) == "int" && value >= minimum {
    return value
  }
  return fallback
}

fn __agent_stall_default_config() -> AgentStallConfig {
  return {
    enabled: false,
    threshold: 3,
    inject_feedback: true,
    max_feedback: 1,
    exempt_tools: [],
    include_arguments: false,
    repeat_same_observation: STALL_REPEAT_SAME_OBSERVATION,
    repeat_same_error: STALL_REPEAT_SAME_ERROR,
    no_progress_messages: STALL_NO_PROGRESS_MESSAGES,
    ping_pong_cycles: STALL_PING_PONG_CYCLES,
    repeat_context_window_error: STALL_REPEAT_CONTEXT_WINDOW_ERROR,
    hard_stop_after_trips: STALL_HARD_STOP_AFTER_TRIPS,
    repair_aware: false,
    stuck_same_diagnostic_after: STALL_STUCK_SAME_DIAGNOSTIC_AFTER,
    post_edit_reverify: true,
  }
}

fn __agent_stall_config(value) -> AgentStallConfig {
  let defaults = __agent_stall_default_config()
  if value == nil {
    return defaults
  }
  if type_of(value) == "bool" {
    return defaults + {enabled: value}
  }
  if type_of(value) != "dict" {
    throw "agent_loop: `stall_diagnostics` must be a dict, bool, or nil; got " + type_of(value)
  }
  let {threshold = 3, max_feedback = 1} = value ?? {}
  let exempt_tools = if value?.exempt_tools != nil {
    __agent_stall_list(value.exempt_tools, "exempt_tools")
  } else {
    __agent_stall_list(value?.allow_repeated_tools, "allow_repeated_tools")
  }
  let resolved_threshold = if type_of(threshold) == "int" && threshold >= 2 {
    threshold
  } else {
    3
  }
  // The legacy `threshold` knob (N repeated identical calls) IS the same intent
  // as the same-observation condition, so when a caller sets it explicitly it
  // also drives `repeat_same_observation` for back-compat. Absent an explicit
  // `threshold`, the same-observation condition uses the reference default (4).
  let same_observation_default = if value?.threshold != nil {
    resolved_threshold
  } else {
    STALL_REPEAT_SAME_OBSERVATION
  }
  return {
    enabled: __agent_stall_bool(value?.enabled, true, "enabled"),
    threshold: resolved_threshold,
    inject_feedback: __agent_stall_bool(value?.inject_feedback, true, "inject_feedback"),
    max_feedback: if type_of(max_feedback) == "int" && max_feedback >= 0 {
      max_feedback
    } else {
      1
    },
    exempt_tools: exempt_tools,
    include_arguments: __agent_stall_bool(value?.include_arguments, false, "include_arguments"),
    repeat_same_observation: __agent_stall_int(value?.repeat_same_observation, same_observation_default, 2),
    repeat_same_error: __agent_stall_int(value?.repeat_same_error, STALL_REPEAT_SAME_ERROR, 2),
    no_progress_messages: __agent_stall_int(value?.no_progress_messages, STALL_NO_PROGRESS_MESSAGES, 2),
    ping_pong_cycles: __agent_stall_int(value?.ping_pong_cycles, STALL_PING_PONG_CYCLES, 2),
    repeat_context_window_error: __agent_stall_int(value?.repeat_context_window_error, STALL_REPEAT_CONTEXT_WINDOW_ERROR, 1),
    hard_stop_after_trips: __agent_stall_int(value?.hard_stop_after_trips, STALL_HARD_STOP_AFTER_TRIPS, 1),
    repair_aware: __agent_stall_bool(value?.repair_aware, false, "repair_aware"),
    stuck_same_diagnostic_after: __agent_stall_int(value?.stuck_same_diagnostic_after, STALL_STUCK_SAME_DIAGNOSTIC_AFTER, 2),
    post_edit_reverify: __agent_stall_bool(value?.post_edit_reverify, true, "post_edit_reverify"),
  }
}

/**
 * agent_stall_initial_state creates repeated-tool-call diagnostic state.
 *
 * @effects: []
 * @errors: []
 * @api_stability: experimental
 */
pub fn agent_stall_initial_state() -> AgentStallState {
  return {
    last_signature: "",
    streak: 0,
    warnings: [],
    repeated_tool_calls: 0,
    feedback_count: 0,
    last_observation_signature: "",
    last_outcome_kind: "",
    same_observation_streak: 0,
    same_error_streak: 0,
    context_window_error_streak: 0,
    no_progress_streak: 0,
    ping_pong_a: "",
    ping_pong_b: "",
    ping_pong_alternations: 0,
    consecutive_trips: 0,
    hard_stop: false,
    last_diagnostic_class: "",
    last_diagnostic_signature: "",
    last_diagnostic_snippet: "",
    write_epoch: 0,
    same_diagnostic_streak: 0,
    edit_since_failure: false,
    reverify_owed: false,
  }
}

/**
 * Reset only the action-stream tracking (signatures, observation/error
 * streaks, ping-pong). Leaves the consecutive-trip escalation and feedback
 * counters alone. Used when an action is exempt or the turn made no tool call.
 */
fn __agent_stall_reset_action(state: AgentStallState) -> AgentStallState {
  return state
    + {
    last_signature: "",
    streak: 0,
    last_observation_signature: "",
    last_outcome_kind: "",
    same_observation_streak: 0,
    same_error_streak: 0,
    context_window_error_streak: 0,
    ping_pong_a: "",
    ping_pong_b: "",
    ping_pong_alternations: 0,
  }
}

fn __agent_tool_call_name(call) -> string {
  return to_string(call?.name ?? call?.tool_name ?? "")
}

fn __agent_tool_call_args(call) -> dict {
  let raw = call?.arguments ?? call?.tool_args
  if type_of(raw) == "dict" {
    return raw
  }
  return {}
}

fn __agent_tool_call_signature(call) -> string {
  let args_text = json_stringify(__agent_tool_call_args(call))
  return __agent_tool_call_name(call) + "\n" + args_text
}

/** observation classification (the polling-exemption mechanism) */
fn __agent_stall_result_ok(result) -> bool {
  if result?.ok != nil {
    return result.ok ? true : false
  }
  if result?.success != nil {
    return result.success ? true : false
  }
  let status = to_string(result?.status ?? "")
  return status == "ok" || status == "success"
}

fn __agent_stall_result_name(result) -> string {
  return to_string(result?.tool_name ?? result?.name ?? "")
}

/**
 * A context-window / token-limit error is its own trip condition because
 * repeating it is never productive and never recovers on its own.
 */
fn __agent_stall_is_context_window_error(text: string) -> bool {
  let lowered = lowercase(text)
  return contains(lowered, "context window")
    || contains(lowered, "context length")
    || contains(lowered, "context_length")
    || contains(lowered, "maximum context")
    || contains(lowered, "token limit")
    || contains(lowered, "too many tokens")
    || contains(lowered, "context_length_exceeded")
    || contains(lowered, "max_tokens")
}

/**
 * Outcome of a dispatched result, normalized for the detector: one of
 * "ok" / "error" / "context_window", plus a byte-stable signature of the
 * observation payload (for "ok") or the error text (for "error").
 */
fn __agent_stall_result_outcome(result) -> dict {
  if __agent_stall_result_ok(result) {
    let payload = json_stringify(result?.result ?? result?.output ?? result?.content ?? nil)
    return {kind: "ok", signature: "ok:" + sha256(payload)}
  }
  let error_text = to_string(result?.error ?? result?.message ?? result?.result ?? "")
  if __agent_stall_is_context_window_error(error_text) {
    return {kind: "context_window", signature: "ctx"}
  }
  return {kind: "error", signature: "err:" + sha256(error_text)}
}

/**
 * Short, human-readable evidence snippet for a failing result (used to ground
 * the strategy-shift nudge and the terminal current-failure payload). Never
 * hashed — the byte-stable signature stays in __agent_stall_result_outcome.
 */
fn __agent_stall_result_snippet(result) -> string {
  if __agent_stall_result_ok(result) {
    return ""
  }
  let text = to_string(result?.error ?? result?.message ?? result?.result ?? "")
  let normalized = trim(text)
  if len(normalized) <= 240 {
    return normalized
  }
  return normalized.slice(0, 240)
}

/**
 * Pick the representative verification result for a turn from its dispatch:
 * the FIRST failing result (the diagnostic the agent must fix) if any,
 * otherwise the first result (a "pass" turn). Returns nil when there is no
 * dispatch result to classify (the turn made no observed tool call), so the
 * caller leaves the current-failure model untouched.
 */
fn __agent_stall_turn_result(prev_dispatch) {
  if prev_dispatch == nil {
    return nil
  }
  let results = if type_of(prev_dispatch) == "list" {
    prev_dispatch
  } else {
    prev_dispatch?.results ?? []
  }
  var first = nil
  for result in results {
    if !__agent_stall_result_ok(result) {
      return result
    }
    if first == nil {
      first = result
    }
  }
  return first
}

/**
 * __agent_stall_fold_diagnostic folds the current-failure model for one turn.
 * It is a PURE function of (state, prev_dispatch, turn_made_edit):
 *
 *  - It classifies the turn's representative verification result (the first
 *    failing dispatch result, else the first result) into a ProbeOutcome-shaped
 *    class and a byte-stable signature via __agent_stall_result_outcome (never
 *    reimplementing the sha256/normalization).
 *  - On a successful workspace-mutating edit this turn it bumps `write_epoch`
 *    (which arms the post-edit re-verify mandate the loop honors).
 *  - The failure SIGNATURE is the progress signal: a productive edit changes
 *    the error (signature changes => streak resets — legitimate progress, no
 *    false positive); a flailing edit leaves the SAME error (signature
 *    identical => streak advances — real thrash). So the streak advances on the
 *    same signature REGARDLESS of intervening edits.
 *  - It advances/resets `same_diagnostic_streak`:
 *      same failure signature  => futile  (streak + 1), even across edits
 *      different signature      => reset to 1 (a different mistake, or progress)
 *      "pass"                   => clear streak, reverify_owed, edit_since_failure
 *
 * Result handling:
 *  - a FAILING result  => fold the streak as above; this is verification
 *    evidence, so any owed re-verify is satisfied (clear reverify_owed /
 *    edit_since_failure). A turn that BOTH edits and re-tests still classifies
 *    here: __agent_stall_turn_result returns the FIRST failing result, so the
 *    re-test's failure is seen past the edit's own "ok" (the edit bumps the
 *    epoch; the fail still advances the streak).
 *  - a non-failing result, NOT an edit turn => a genuine passing verification
 *    clears the current-failure model.
 *  - a non-failing result on an EDIT turn => the result is the edit's own
 *    success, NOT verification of the failure. Owe a re-verify and preserve the
 *    failure model (same as the no-result edit case).
 *  - no result (prev_dispatch nil/empty) => if this turn made an edit on top of
 *    a live failure, owe a re-verify (reverify_owed / edit_since_failure) and
 *    bump the epoch; otherwise leave the model untouched (epoch still bumps on
 *    an edit).
 */
fn __agent_stall_fold_diagnostic(state: AgentStallState, prev_dispatch, turn_made_edit: bool) -> AgentStallState {
  // Edit bookkeeping first: a successful workspace mutation advances the epoch,
  // which arms the post-edit re-verify mandate in the loop.
  let new_write_epoch = if turn_made_edit {
    state.write_epoch + 1
  } else {
    state.write_epoch
  }
  let result = __agent_stall_turn_result(prev_dispatch)
  let result_is_fail = result != nil && !__agent_stall_result_ok(result)
  if !result_is_fail {
    // No FAILING verification this turn. A successful edit on top of a live
    // failure is NOT proof the failure is resolved (its "ok" is the edit
    // succeeding, not the check passing): owe a re-verify and preserve the
    // current-failure model so the next re-fail of the SAME diagnostic is
    // recognized across the edit. A genuine passing result on a NON-edit turn
    // is real verification evidence and clears the model.
    if turn_made_edit && state.last_diagnostic_class == "fail" {
      return state
        + {write_epoch: new_write_epoch, edit_since_failure: true, reverify_owed: true}
    }
    if result == nil {
      // No verification evidence and no edit-on-failure: leave the model alone.
      return state + {write_epoch: new_write_epoch}
    }
    // A passing verification clears the current-failure model.
    let pass_outcome = __agent_stall_result_outcome(result)
    return state
      + {
      last_diagnostic_class: "pass",
      last_diagnostic_signature: pass_outcome.signature,
      last_diagnostic_snippet: "",
      write_epoch: new_write_epoch,
      same_diagnostic_streak: 0,
      edit_since_failure: false,
      reverify_owed: false,
    }
  }
  // result_is_fail: fold against the prior failure model. The signature
  // is the progress signal, so the streak advances on the SAME signature
  // regardless of whether an edit intervened — a flailing edit that leaves the
  // same error counts as thrash, while a productive edit changes the signature
  // and resets the streak (the false-positive safety property).
  let outcome = __agent_stall_result_outcome(result)
  let signature = outcome.signature
  let same_signature = signature == state.last_diagnostic_signature
    && state.last_diagnostic_class == "fail"
  let new_streak = if same_signature {
    state.same_diagnostic_streak + 1
  } else {
    1
  }
  // This is a verification result, so the post-edit re-verify mandate (if any)
  // has now been satisfied: clear reverify_owed and the edit-since marker. The
  // NEXT edit after this failure re-arms them.
  return state
    + {
    last_diagnostic_class: "fail",
    last_diagnostic_signature: signature,
    last_diagnostic_snippet: __agent_stall_result_snippet(result),
    write_epoch: new_write_epoch,
    same_diagnostic_streak: new_streak,
    edit_since_failure: false,
    reverify_owed: false,
  }
}

/**
 * Find the dispatch result for the given tool name. The detector tracks one
 * action at a time, so the first matching result is the relevant observation.
 */
fn __agent_stall_outcome_for(prev_dispatch, tool_name: string) {
  if prev_dispatch == nil {
    return nil
  }
  let results = if type_of(prev_dispatch) == "list" {
    prev_dispatch
  } else {
    prev_dispatch?.results ?? []
  }
  for result in results {
    if __agent_stall_result_name(result) == tool_name {
      return __agent_stall_result_outcome(result)
    }
  }
  return nil
}

fn __agent_stall_feedback_text(warning: AgentStallWarning) -> string {
  let pattern = warning.pattern
  let count = to_string(warning.count)
  // Evidence-aware repair loop: a strategy-shift nudge grounded in the actual
  // diagnostic, not a blind "you repeated yourself" counter. This fires when
  // the same failure has survived `stuck_same_diagnostic_after` repair turns
  // with no intervening successful edit (or with edits that did not change the
  // failure), so the right move is to change approach, not to retry harder.
  if pattern == "stuck_same_diagnostic" {
    let snippet = to_string(warning.diagnostic_snippet ?? "")
    let evidence = if snippet != "" {
      " The failing evidence is unchanged:\n" + snippet
    } else {
      ""
    }
    return "Repair diagnostic: the same failure has persisted across "
      + count
      + " repair turns without changing."
      + evidence
      + "\nStop retrying the same fix. Re-read the relevant code, form a NEW hypothesis "
      + "about the root cause, or gather different evidence before editing again. If you "
      + "cannot make progress, report this specific blocker instead of repeating."
  }
  let core = if pattern == "repeated_same_observation" {
    "the action `" + warning.tool_name + "` produced the same observation " + count
      + " times in a row"
  } else if pattern == "repeated_error" {
    "the action `" + warning.tool_name + "` failed with the same error " + count + " times in a row"
  } else if pattern == "no_progress_monologue" {
    count + " consecutive assistant messages made no progress (no tool call)"
  } else if pattern == "errored_no_tool_call" {
    "your previous turn ended with a provider error and emitted no tool call ("
      + count
      + " in a row) — re-emit the intended tool call"
  } else if pattern == "ping_pong" {
    "the loop is ping-ponging between two actions (" + count + " cycles)"
  } else if pattern == "repeated_context_window_error" {
    "the run hit a context-window error " + count + " times in a row"
  } else {
    "the last " + count + " tool calls repeated `" + warning.tool_name
      + "` with identical arguments"
  }
  if warning.hard_stop {
    return "Loop detector (hard stop): "
      + core
      + ". "
      + to_string(warning.consecutive_trips)
      + " trips with no recovery — stop and report the blocker instead of repeating."
  }
  return "Stall diagnostic: "
    + core
    + ". Use different evidence, finish, or explain why repeating is necessary before doing so again."
}

fn __agent_stall_warning_record(
  iteration: int,
  tool_name: string,
  args: dict,
  signature: string,
  pattern: string,
  count: int,
  consecutive_trips: int,
  hard_stop: bool,
  config: AgentStallConfig,
  threshold: int,
) -> AgentStallWarning {
  let args_text = json_stringify(args)
  var record = {
    iteration: iteration,
    tool_name: tool_name,
    repeat_count: count,
    threshold: threshold,
    arguments_digest: "sha256:" + sha256(args_text),
    signature_digest: "sha256:" + sha256(signature),
    pattern: pattern,
    signature: signature,
    count: count,
    consecutive_trips: consecutive_trips,
    hard_stop: hard_stop,
  }
  if config.include_arguments {
    record = record + {arguments: args}
  }
  return record
}

/**
 * agent_stall_inject_feedback injects bounded stall feedback.
 *
 * @effects: [agent]
 * @errors: []
 * @api_stability: experimental
 */
pub fn agent_stall_inject_feedback(
  session_id: string,
  warning: AgentStallWarning,
  config: AgentStallConfig,
  state: AgentStallState,
) -> AgentStallState {
  let wants_feedback = config.inject_feedback ?? true
  let should_feedback = wants_feedback && state.feedback_count < config.max_feedback
  if should_feedback {
    agent_session_inject_feedback(session_id, "stall_diagnostics", __agent_stall_feedback_text(warning))
    return state + {feedback_count: state.feedback_count + 1}
  }
  return state
}

fn __agent_stall_maybe_emit(
  session_id: string,
  warning: AgentStallWarning,
  config: AgentStallConfig,
  state: AgentStallState,
  defer_feedback: bool,
) {
  agent_emit_event(session_id, "agent_loop_stall_warning", warning)
  if defer_feedback {
    return {state: state, warning: warning, feedback_deferred: true}
  }
  return {
    state: agent_stall_inject_feedback(session_id, warning, config, state),
    warning: warning,
    feedback_deferred: false,
  }
}

/**
 * ping-pong tracking: track the two action signatures in a candidate A/B
 * alternation and the count of alternations. Two alternations (A->B->A) is one
 * cycle. Returns the updated state; the cycle count lives in
 * `ping_pong_alternations`.
 */
fn __agent_stall_update_ping_pong(state: AgentStallState, signature: string) -> AgentStallState {
  let a = state.ping_pong_a
  let b = state.ping_pong_b
  if a == "" {
    return state + {ping_pong_a: signature, ping_pong_b: "", ping_pong_alternations: 0}
  }
  if b == "" {
    if a == signature {
      // Same action twice — this is the same-action repeat path, not ping-pong.
      return state + {ping_pong_alternations: 0}
    }
    return state + {ping_pong_b: signature, ping_pong_alternations: 1}
  }
  let expected_next = if state.ping_pong_alternations % 2 == 1 {
    a
  } else {
    b
  }
  if expected_next == signature {
    return state + {ping_pong_alternations: state.ping_pong_alternations + 1}
  }
  if a == signature || b == signature {
    // Within the A/B pair but out of strict alternation: restart the pair
    // anchored on the current value.
    return state + {ping_pong_a: signature, ping_pong_b: "", ping_pong_alternations: 0}
  }
  // A third distinct action: not a two-action ping-pong.
  return state + {ping_pong_a: signature, ping_pong_b: "", ping_pong_alternations: 0}
}

/**
 * Apply a fired trip: bump the consecutive-trip escalation and decide whether
 * to flag a hard stop. Returns the state with the trip recorded.
 */
fn __agent_stall_register_trip(state: AgentStallState, config: AgentStallConfig) -> AgentStallState {
  let trips = state.consecutive_trips + 1
  return state + {consecutive_trips: trips, hard_stop: trips >= config.hard_stop_after_trips}
}

/**
 * A non-tripping step recovers: reset the consecutive-trip escalation so the
 * hard stop only fires on genuinely uninterrupted thrashing.
 */
fn __agent_stall_register_recovery(state: AgentStallState) -> AgentStallState {
  return state + {consecutive_trips: 0, hard_stop: false}
}

/**
 * Detect a no-progress monologue turn (assistant produced text but no tool
 * call). Returns the observation when the streak crosses the threshold.
 */
fn __agent_stall_observe_no_progress(
  session_id: string,
  iteration: int,
  config: AgentStallConfig,
  state: AgentStallState,
  defer_feedback: bool,
  turn_stop_reason = "",
) -> AgentStallObservation {
  // An agent message breaks any action-repeat / ping-pong run.
  var next_state = __agent_stall_reset_action(state)
  next_state = next_state + {no_progress_streak: next_state.no_progress_streak + 1}
  if next_state.no_progress_streak < config.no_progress_messages {
    next_state = __agent_stall_register_recovery(next_state)
    return {
      state: next_state,
      enabled: true,
      warning: nil,
      feedback_deferred: false,
      config: config,
      hard_stop: false,
    }
  }
  next_state = __agent_stall_register_trip(next_state, config)
  // A turn that ended with a provider error (stop_reason=error) but emitted no
  // tool call narrated its intent without landing an action — the cheap-model
  // eval-meter failure class. Give it cause-specific feedback instead of the
  // generic "made no progress" nag, so the model knows its turn errored and to
  // re-emit the intended tool call.
  let pattern = if to_string(turn_stop_reason) == "error" {
    "errored_no_tool_call"
  } else {
    "no_progress_monologue"
  }
  let warning = __agent_stall_warning_record(
    iteration,
    "",
    {},
    "",
    pattern,
    next_state.no_progress_streak,
    next_state.consecutive_trips,
    next_state.hard_stop,
    config,
    config.no_progress_messages,
  )
  let emitted = __agent_stall_maybe_emit(session_id, warning, config, next_state, defer_feedback)
  next_state = emitted.state + {warnings: emitted.state.warnings.push(warning)}
  return {
    state: next_state,
    enabled: true,
    warning: warning,
    feedback_deferred: emitted.feedback_deferred ?? false,
    config: config,
    hard_stop: next_state.hard_stop,
  }
}

/**
 * __agent_stall_process_call folds a single tool call (and the observation it
 * produced last turn) into the detector state and decides which condition, if
 * any, tripped. Extracted from `agent_stall_observe_tool_calls` to keep that
 * function within the cyclomatic-complexity budget. Returns the updated state
 * plus a `trip` dict (or nil). Trip ordering: context-window, then repeated
 * error, then repeated identical observation, then ping-pong, then the legacy
 * signature-only threshold (used only when no dispatch results are available).
 */
fn __agent_stall_process_call(
  state: AgentStallState,
  config: AgentStallConfig,
  call,
  tool_name: string,
  prev_dispatch,
) -> dict {
  let signature = __agent_tool_call_signature(call)
  let same_action = signature == state.last_signature
  let outcome = __agent_stall_outcome_for(prev_dispatch, tool_name)
  // Observation signature of the action that just repeated. When dispatch
  // results are unavailable, fall back to the action signature itself so the
  // legacy adjacent-repeat behavior still trips.
  let obs_signature = if outcome != nil {
    outcome.signature
  } else {
    "sig:" + sha256(signature)
  }
  let outcome_kind = if outcome != nil {
    outcome.kind
  } else {
    "ok"
  }
  // Ping-pong is independent of the same-action streak and only considers
  // distinct alternating actions; update it first.
  var next_state = __agent_stall_update_ping_pong(state, signature)
  let streak = if same_action {
    next_state.streak + 1
  } else {
    1
  }
  let repeated = if streak > 1 {
    next_state.repeated_tool_calls + 1
  } else {
    next_state.repeated_tool_calls
  }
  // The polling exemption (principled, not an allowlist): a repeated identical
  // action counts toward the same-observation condition UNLESS we have positive
  // evidence the observation changed — i.e. both this turn's and last turn's
  // observations are real (non-fallback) AND differ. A changing observation is
  // legitimate polling (status advances, watched file mutates) and resets the
  // streak. When observations are unavailable (no dispatch results threaded in)
  // the streak counts the raw signature repeat, preserving the legacy
  // adjacent-identical-call behavior so existing thresholds still trip.
  let both_known = outcome != nil && next_state.last_outcome_kind != ""
  let observation_changed = both_known && obs_signature != next_state.last_observation_signature
  let new_same_observation_streak = if !same_action || outcome_kind != "ok" {
    if outcome_kind == "ok" {
      1
    } else {
      0
    }
  } else if observation_changed {
    1
  } else {
    next_state.same_observation_streak + 1
  }
  // Same-error streak: same action producing the same (or unknown) error.
  // Distinct errors reset, because the agent is making different mistakes.
  let new_same_error_streak = if !same_action || outcome_kind != "error" {
    if outcome_kind == "error" {
      1
    } else {
      0
    }
  } else if observation_changed {
    1
  } else {
    next_state.same_error_streak + 1
  }
  let new_context_streak = if outcome_kind == "context_window" {
    next_state.context_window_error_streak + 1
  } else {
    0
  }
  next_state = next_state
    + {
    last_signature: signature,
    streak: streak,
    repeated_tool_calls: repeated,
    last_observation_signature: obs_signature,
    last_outcome_kind: if outcome != nil {
      outcome_kind
    } else {
      ""
    },
    same_observation_streak: new_same_observation_streak,
    same_error_streak: new_same_error_streak,
    context_window_error_streak: new_context_streak,
  }
  let ping_pong_cycles = next_state.ping_pong_alternations / 2
  let trip = __agent_stall_classify_trip(
    config,
    {
      context_streak: new_context_streak,
      same_error_streak: new_same_error_streak,
      same_observation_streak: new_same_observation_streak,
      outcome_kind: outcome_kind,
      ping_pong_cycles: ping_pong_cycles,
      outcome_present: outcome != nil,
      streak: streak,
    },
  )
  return {state: next_state, trip: trip}
}

/**
 * Decide which stall condition, if any, tripped for a single processed call.
 * Extracted from `__agent_stall_process_call` to keep both within the
 * cyclomatic-complexity budget. Trip ordering: context-window, repeated error,
 * repeated identical observation, ping-pong, then the legacy signature-only
 * threshold. When the evidence-aware repair loop is enabled it OWNS the
 * repeated-failure semantics (same diagnostic across repair turns) via the
 * diagnostic-grounded `stuck_same_diagnostic` pattern in the repair overlay, so
 * the legacy `repeated_error` / `repeated_same_observation` trips are suppressed
 * here (no double-fire; the richer repair nudge wins). Context-window and
 * ping-pong remain — they are orthogonal failure modes the repair model does
 * not cover.
 */
fn __agent_stall_classify_trip(config: AgentStallConfig, s) {
  let repair_owns_repeat = config.repair_aware
  if s.context_streak >= config.repeat_context_window_error {
    return {
      pattern: "repeated_context_window_error",
      count: s.context_streak,
      threshold: config.repeat_context_window_error,
    }
  }
  if !repair_owns_repeat && s.same_error_streak >= config.repeat_same_error {
    return {pattern: "repeated_error", count: s.same_error_streak, threshold: config.repeat_same_error}
  }
  if !repair_owns_repeat
    && s.outcome_kind == "ok"
    && s.same_observation_streak >= config.repeat_same_observation {
    return {
      pattern: "repeated_same_observation",
      count: s.same_observation_streak,
      threshold: config.repeat_same_observation,
    }
  }
  if s.ping_pong_cycles >= config.ping_pong_cycles {
    return {pattern: "ping_pong", count: s.ping_pong_cycles, threshold: config.ping_pong_cycles}
  }
  if !s.outcome_present && s.streak >= config.threshold {
    return {pattern: "repeated_same_signature", count: s.streak, threshold: config.threshold}
  }
  return nil
}

/**
 * agent_stall_observe_tool_calls detects degenerate agent loops.
 *
 * Conditions: same action -> identical observation, same action -> same
 * error, no-progress monologue, ping-pong, and repeated context-window
 * errors. `prev_dispatch` carries the previous turn's dispatch results so the
 * detector can attribute an observation signature to the repeated action;
 * pass nil to fall back to the signature-only same-action heuristic. The
 * polling exemption is principled: a repeated identical action counts toward
 * the same-observation and ping-pong conditions only when its observation is
 * byte-identical across repeats. `exempt_tools` remains a secondary escape
 * hatch.
 *
 * @effects: [agent]
 * @errors: [agent_loop]
 * @api_stability: experimental
 */
fn __agent_stall_observe_core(
  session_id: string,
  tool_calls: list,
  iteration: int,
  raw_config,
  state: AgentStallState,
  defer_feedback: bool,
  prev_dispatch = nil,
  turn_text = "",
  had_parse_errors = false,
  turn_stop_reason = "",
) -> AgentStallObservation {
  let config = __agent_stall_config(raw_config)
  if !config.enabled {
    return {
      state: state,
      enabled: false,
      warning: nil,
      feedback_deferred: false,
      config: config,
      hard_stop: false,
    }
  }
  // A turn whose tool calls were all dropped by the parser carries real intent
  // (it tried to act); the malformed-call path already injected purpose-built
  // parse-guidance feedback. Treat it as a neutral recovery turn so it does NOT
  // count toward the no-progress monologue streak — otherwise the loop nags the
  // model to "emit one well-formed tool call" when it already did, just with a
  // malformed body. Gated purely on the syntactic parse-error signal.
  if had_parse_errors {
    return {
      state: __agent_stall_register_recovery(__agent_stall_reset_action(state)),
      enabled: true,
      warning: nil,
      feedback_deferred: false,
      config: config,
      hard_stop: false,
    }
  }
  // Soft progress-report tools (e.g. `agent_progress`) report status without
  // advancing task state, so they are excluded from the real-action stream: a
  // turn whose only call is such a report is treated as no-progress.
  let substantive_calls = tool_calls
    .filter({ c -> !contains(SOFT_PROGRESS_TOOLS, __agent_tool_call_name(c)) })
  if len(substantive_calls) == 0 {
    // No substantive tool call this turn — either none at all, or only soft
    // progress reports. With visible text or a progress report, that is a
    // no-progress monologue candidate; a fully empty turn resets the action
    // stream.
    if to_string(turn_text) != "" || len(tool_calls) > 0 {
      return __agent_stall_observe_no_progress(
        session_id,
        iteration,
        config,
        state,
        defer_feedback,
        turn_stop_reason,
      )
    }
    return {
      state: __agent_stall_register_recovery(__agent_stall_reset_action(state)),
      enabled: true,
      warning: nil,
      feedback_deferred: false,
      config: config,
      hard_stop: false,
    }
  }
  var next_state = state + {no_progress_streak: 0}
  var emitted_warning = nil
  var feedback_deferred = false
  for call in substantive_calls {
    let tool_name = __agent_tool_call_name(call)
    if tool_name == "" || contains(config.exempt_tools, tool_name) {
      next_state = __agent_stall_register_recovery(__agent_stall_reset_action(next_state))
      continue
    }
    let processed = __agent_stall_process_call(next_state, config, call, tool_name, prev_dispatch)
    next_state = processed.state
    let trip = processed.trip
    if trip != nil {
      next_state = __agent_stall_register_trip(next_state, config)
      let warning = __agent_stall_warning_record(
        iteration,
        tool_name,
        __agent_tool_call_args(call),
        __agent_tool_call_signature(call),
        trip.pattern,
        trip.count,
        next_state.consecutive_trips,
        next_state.hard_stop,
        config,
        trip.threshold,
      )
      let emitted = __agent_stall_maybe_emit(session_id, warning, config, next_state, defer_feedback)
      next_state = emitted.state
      if emitted_warning == nil {
        emitted_warning = warning
      }
      feedback_deferred = feedback_deferred || emitted.feedback_deferred ?? false
      next_state = next_state + {warnings: next_state.warnings.push(warning)}
    } else {
      next_state = __agent_stall_register_recovery(next_state)
    }
  }
  return {
    state: next_state,
    enabled: true,
    warning: emitted_warning,
    feedback_deferred: feedback_deferred,
    config: config,
    hard_stop: next_state.hard_stop,
  }
}

/**
 * Evidence-aware repair overlay (#repair-diagnostics). Default OFF. When
 * `repair_aware` is enabled this:
 *   1. folds the current-failure model from the PRIOR turn's verification
 *      result + whether that turn made a successful edit (turn_made_edit), and
 *   2. trips the "stuck_same_diagnostic" strategy-shift nudge when the same
 *      diagnostic has survived `stuck_same_diagnostic_after` repair turns.
 * The trip rides the EXISTING `agent_loop_stall_warning` event +
 * `__agent_stall_register_trip` escalation; no new event type is introduced.
 * When the core observation already produced a warning this turn we keep it
 * and only fold the model (we do not stack two warnings in one turn).
 */
fn __agent_stall_apply_repair(
  session_id: string,
  config: AgentStallConfig,
  observation: AgentStallObservation,
  iteration: int,
  prev_dispatch,
  turn_made_edit: bool,
  defer_feedback: bool,
) -> AgentStallObservation {
  if !config.repair_aware {
    return observation
  }
  let folded = __agent_stall_fold_diagnostic(observation.state, prev_dispatch, turn_made_edit)
  // Trip ONCE when the streak first crosses the threshold, so the strategy-shift
  // nudge fires a single time per stuck episode rather than on every subsequent
  // identical failure. Require that the streak ADVANCED this turn (a fresh
  // same-signature failure was folded) — an edit/owe turn preserves the streak
  // at the threshold without re-folding a failure, and must not re-trip. A later
  // streak reset (a different/passing signature) re-arms the trip for the next
  // genuinely-stuck episode. Also require that no other stall pattern already
  // fired this turn (avoid double-nudging).
  let streak_advanced = folded.same_diagnostic_streak > observation.state.same_diagnostic_streak
  let should_trip = folded.same_diagnostic_streak == config.stuck_same_diagnostic_after
    && streak_advanced
    && observation.warning == nil
  if !should_trip {
    return observation + {state: folded}
  }
  let tripped = __agent_stall_register_trip(folded, config)
  let warning = __agent_stall_warning_record(
    iteration,
    "",
    {},
    tripped.last_diagnostic_signature,
    "stuck_same_diagnostic",
    tripped.same_diagnostic_streak,
    tripped.consecutive_trips,
    tripped.hard_stop,
    config,
    config.stuck_same_diagnostic_after,
  )
    + {
    diagnostic_class: tripped.last_diagnostic_class,
    diagnostic_signature: tripped.last_diagnostic_signature,
    diagnostic_snippet: tripped.last_diagnostic_snippet,
  }
  let emitted = __agent_stall_maybe_emit(session_id, warning, config, tripped, defer_feedback)
  let next_state = emitted.state + {warnings: emitted.state.warnings.push(warning)}
  return {
    state: next_state,
    enabled: true,
    warning: warning,
    feedback_deferred: emitted.feedback_deferred ?? false,
    config: config,
    hard_stop: next_state.hard_stop,
  }
}

/**
 * agent_stall_observe_tool_calls detects degenerate agent loops and, when the
 * evidence-aware repair loop is enabled (`stall_diagnostics.repair_aware`),
 * folds a current-failure model and nudges a strategy shift on a stuck
 * diagnostic. `turn_made_edit` reports whether the PRIOR turn (the one
 * `prev_dispatch` describes) made a successful workspace-mutating edit; it is
 * inert unless `repair_aware` is set.
 *
 * @effects: [agent]
 * @errors: [agent_loop]
 * @api_stability: experimental
 */
pub fn agent_stall_observe_tool_calls(
  session_id: string,
  tool_calls: list,
  iteration: int,
  raw_config,
  state: AgentStallState,
  defer_feedback: bool,
  prev_dispatch = nil,
  turn_text = "",
  had_parse_errors = false,
  turn_made_edit = false,
  turn_stop_reason = "",
) -> AgentStallObservation {
  let observation = __agent_stall_observe_core(
    session_id,
    tool_calls,
    iteration,
    raw_config,
    state,
    defer_feedback,
    prev_dispatch,
    turn_text,
    had_parse_errors,
    turn_stop_reason,
  )
  if !observation.enabled {
    return observation
  }
  return __agent_stall_apply_repair(
    session_id,
    observation.config,
    observation,
    iteration,
    prev_dispatch,
    turn_made_edit,
    defer_feedback,
  )
}

/**
 * agent_stall_repair_config exposes the parsed evidence-aware repair knobs
 * (repair_aware / post_edit_reverify / stuck_same_diagnostic_after) so the
 * agent loop can decide whether to honor the post-edit re-verify mandate
 * without reaching into the private config parser.
 *
 * @effects: []
 * @errors: []
 * @api_stability: experimental
 */
pub fn agent_stall_repair_config(raw_config) -> dict {
  let config = __agent_stall_config(raw_config)
  return {
    repair_aware: config.repair_aware,
    post_edit_reverify: config.post_edit_reverify,
    stuck_same_diagnostic_after: config.stuck_same_diagnostic_after,
  }
}

/**
 * agent_stall_current_failure projects the current-failure model into a
 * stop-payload block (nil when there is no live failure). Used to enrich the
 * terminal `stuck` and budget-exhaustion payloads with WHAT failed, not just
 * "budget exhausted".
 *
 * @effects: []
 * @errors: []
 * @api_stability: experimental
 */
pub fn agent_stall_current_failure(stall_state: AgentStallState) {
  if stall_state.last_diagnostic_class != "fail" {
    return nil
  }
  return {
    class: stall_state.last_diagnostic_class,
    signature: stall_state.last_diagnostic_signature,
    snippet: stall_state.last_diagnostic_snippet,
    same_diagnostic_streak: stall_state.same_diagnostic_streak,
  }
}

/**
 * agent_stall_clear_current_failure clears the current-failure model so a
 * SUCCESSFUL terminal hand-back (clean done / a passing `verify_completion`)
 * does not report a stale `current_failure`. A run can complete without ever
 * flowing a passing verification result through the fold (e.g. the loop stops
 * `done` after the model's prose, or `verify_completion` passes out-of-band),
 * which would otherwise leave `last_diagnostic_class == "fail"`. The loop calls
 * this on the successful-termination signal before `agent_stall_apply_result`.
 * A stuck / exhausted run does NOT call this, so it still carries the failure.
 *
 * @effects: []
 * @errors: []
 * @api_stability: experimental
 */
pub fn agent_stall_clear_current_failure(stall_state: AgentStallState) -> AgentStallState {
  return stall_state
    + {last_diagnostic_class: "pass", same_diagnostic_streak: 0, reverify_owed: false}
}

/**
 * agent_stall_apply_result adds stall diagnostics to an agent-loop result,
 * including the evidence-aware repair loop's `current_failure` summary on the
 * terminal hand-back when a live failure remains.
 *
 * @effects: []
 * @errors: []
 * @api_stability: experimental
 */
pub fn agent_stall_apply_result(result: dict, stall_enabled: bool, stall_state: AgentStallState) -> dict {
  if !stall_enabled && len(stall_state.warnings) == 0 {
    return result
  }
  let base = result
    + {
    repeated_tool_calls: stall_state.repeated_tool_calls,
    stall_warnings: stall_state.warnings,
    suspected_loop: len(stall_state.warnings) > 0,
  }
  let current_failure = agent_stall_current_failure(stall_state)
  if current_failure == nil {
    return base
  }
  return base + {current_failure: current_failure}
}

fn __agent_done_judge_stall_cadence(opts) {
  let judge = opts?.done_judge
  if type_of(judge) != "dict" {
    return nil
  }
  let cadence = judge?.cadence
  if type_of(cadence) != "dict" || cadence?.when != "stalled" {
    return nil
  }
  return cadence
}

/**
 * agent_stall_done_judge_due decides whether the stall done judge is due.
 *
 * @effects: []
 * @errors: []
 * @api_stability: experimental
 * @example: agent_stall_done_judge_due(opts, 0, 3)
 */
pub fn agent_stall_done_judge_due(opts, invocations: int, turn_number: int) -> bool {
  let cadence = __agent_done_judge_stall_cadence(opts)
  if cadence == nil {
    return false
  }
  let max_invocations = cadence?.max_invocations
  if max_invocations != nil && invocations >= max_invocations {
    return false
  }
  let min_iterations = cadence?.min_iterations_before_first
  if min_iterations != nil && turn_number <= min_iterations {
    return false
  }
  let every = cadence?.every
  if every != nil && turn_number % every != 0 {
    return false
  }
  return true
}