import { agent_emit_event, agent_session_inject_feedback } from "std/agent/state"
// Deterministic thrash / degenerate-loop detector over the agent's
// action/observation stream (#2712, part of the #2708 cheap-model
// reliability stack).
//
// This is the deterministic GATE in the reliability stack. An LLM
// "are we thrashing?" self-evaluation is unreliable on its own: intrinsic
// self-evaluation degrades otherwise-correct work (Huang et al., ICLR 2024;
// Kamoi et al., TACL 2024). So the advisory judge (consulted via
// `loop.harn`'s `stall_judge_due` path) must NEVER fire on its own — it is
// consulted only AFTER this detector trips, and even then it stays advisory.
//
// Compose, do not duplicate: every trip is surfaced through the EXISTING
// `agent_loop_stall_warning` event (the `warning` payload is passed through
// to the host verbatim, so enriching it needs no Rust change), the advisory
// judge stays gated on `stall_warning != nil`, and a hard stop surfaces
// through the loop's existing terminal `stuck` / `loop_control_decision`
// path. We deliberately avoid a parallel Rust detector or a new hook surface.
//
// Trip conditions (each behind a named, tunable threshold; mirrors the
// design reference's defaults):
//
// 1. same action -> byte-identical observation, repeated >= 4 times. This
// SUBSUMES the old adjacent-same-signature logic with an
// observation-signature refinement.
// 2. same action -> same error, repeated >= 3 times.
// 3. no-progress monologue: >= 3 consecutive assistant turns with text but
// no tool call.
// 4. ping-pong: A,B,A,B alternation between two actions >= 6 cycles.
// 5. repeated context-window / token-limit errors >= 2 times.
//
// The polling / retry exemption (principled, not an allowlist; the fix for
// OpenHands #5355): a repeated identical action only counts toward conditions
// (1) and (4) when its observation signature is BYTE-IDENTICAL across
// repeats. Legitimate polling produces a changing observation (the status
// advances, the watched file mutates), which resets the streak, so polling
// never trips. The `exempt_tools` allowlist still works for back-compat but
// is now a SECONDARY escape hatch — the observation-signature principle is
// the primary mechanism.
// same action -> identical observation repeats before tripping.
const STALL_REPEAT_SAME_OBSERVATION = 4
// same action -> same error repeats before tripping.
const STALL_REPEAT_SAME_ERROR = 3
// consecutive no-progress agent messages (text, no tool call) before tripping.
const STALL_NO_PROGRESS_MESSAGES = 3
// ping-pong cycles (A,B,A,B,...) between two actions before tripping.
const STALL_PING_PONG_CYCLES = 6
// repeated context-window / token-limit errors before tripping.
const STALL_REPEAT_CONTEXT_WINDOW_ERROR = 2
// Evidence-aware repair loop (#repair-diagnostics): default number of repair
// turns the SAME diagnostic may recur (same signature, no intervening edit)
// before the detector trips the "stuck_same_diagnostic" strategy-shift nudge.
const STALL_STUCK_SAME_DIAGNOSTIC_AFTER = 3
// consecutive un-recovered trips before a hard stop is recommended. A trip
// "recovers" when a turn produces a step that does not itself trip (the agent
// broke the loop). Past this many back-to-back trips the detector flags
// `hard_stop` so the loop can surface a terminal stuck stop instead of
// nudging forever.
const STALL_HARD_STOP_AFTER_TRIPS = 3
// Tools that report progress without changing task state. A turn whose only
// tool call is one of these is "narrating progress without making it": it is
// counted as a no-progress turn for stall accounting, so a run of them (e.g. a
// model spamming `agent_progress` after it gets stuck) trips the no-progress
// detector instead of masquerading as activity. A turn that also makes a real
// tool call is unaffected — the real call carries the action stream.
const SOFT_PROGRESS_TOOLS = ["agent_progress"]
/**
* Evidence-aware repair loop (#repair-diagnostics), spanning AgentStallConfig /
* AgentStallWarning / AgentStallState below. Default OFF: when `repair_aware`
* is false every repair field is inert and the detector behaves exactly as
* before. When ON, the loop tracks a current-failure model (signature + epoch)
* instead of a blind counter, forces a post-edit re-verify, and nudges a
* strategy shift after the same diagnostic recurs across
* `stuck_same_diagnostic_after` repair turns.
* - Config knobs: repair_aware / stuck_same_diagnostic_after / post_edit_reverify.
* - Warning extras (only on the "stuck_same_diagnostic" pattern):
* diagnostic_class / diagnostic_signature / diagnostic_snippet.
* - State (the current-failure model — folded by __agent_stall_fold_diagnostic
* from the turn's verification result, and deliberately NOT cleared by
* __agent_stall_reset_action so a different corrective action does not clear
* "same root failure"): last_diagnostic_class / last_diagnostic_signature /
* last_diagnostic_snippet / write_epoch / same_diagnostic_streak /
* edit_since_failure / reverify_owed.
*/
type AgentStallConfig = {
enabled: bool,
threshold: int,
inject_feedback: bool,
max_feedback: int,
exempt_tools: list,
include_arguments: bool,
repeat_same_observation: int,
repeat_same_error: int,
no_progress_messages: int,
ping_pong_cycles: int,
repeat_context_window_error: int,
hard_stop_after_trips: int,
repair_aware: bool,
stuck_same_diagnostic_after: int,
post_edit_reverify: bool,
}
type AgentStallWarning = {
iteration: int,
tool_name: string,
repeat_count: int,
threshold: int,
arguments_digest: string,
signature_digest: string,
pattern: string,
signature: string,
count: int,
consecutive_trips: int,
hard_stop: bool,
arguments?: dict,
diagnostic_class?: string,
diagnostic_signature?: string,
diagnostic_snippet?: string,
}
type AgentStallState = {
last_signature: string,
streak: int,
warnings: list<AgentStallWarning>,
repeated_tool_calls: int,
feedback_count: int,
last_observation_signature: string,
last_outcome_kind: string,
same_observation_streak: int,
same_error_streak: int,
context_window_error_streak: int,
no_progress_streak: int,
ping_pong_a: string,
ping_pong_b: string,
ping_pong_alternations: int,
consecutive_trips: int,
hard_stop: bool,
last_diagnostic_class: string,
last_diagnostic_signature: string,
last_diagnostic_snippet: string,
write_epoch: int,
same_diagnostic_streak: int,
edit_since_failure: bool,
reverify_owed: bool,
}
type AgentStallObservation = {
state: AgentStallState,
enabled: bool,
warning: AgentStallWarning?,
feedback_deferred: bool,
config: AgentStallConfig,
hard_stop: bool,
}
fn __agent_stall_bool(value, fallback: bool, field: string) -> bool {
if value == nil {
return fallback
}
if type_of(value) == "bool" {
return value
}
throw "agent_loop: stall_diagnostics." + field + " must be a bool; got " + type_of(value)
}
fn __agent_stall_list(value, field: string) -> list {
if value == nil {
return []
}
if type_of(value) == "list" {
return value
}
throw "agent_loop: stall_diagnostics." + field + " must be a list; got " + type_of(value)
}
fn __agent_stall_int(value, fallback: int, minimum: int) -> int {
if type_of(value) == "int" && value >= minimum {
return value
}
return fallback
}
fn __agent_stall_default_config() -> AgentStallConfig {
return {
enabled: false,
threshold: 3,
inject_feedback: true,
max_feedback: 1,
exempt_tools: [],
include_arguments: false,
repeat_same_observation: STALL_REPEAT_SAME_OBSERVATION,
repeat_same_error: STALL_REPEAT_SAME_ERROR,
no_progress_messages: STALL_NO_PROGRESS_MESSAGES,
ping_pong_cycles: STALL_PING_PONG_CYCLES,
repeat_context_window_error: STALL_REPEAT_CONTEXT_WINDOW_ERROR,
hard_stop_after_trips: STALL_HARD_STOP_AFTER_TRIPS,
repair_aware: false,
stuck_same_diagnostic_after: STALL_STUCK_SAME_DIAGNOSTIC_AFTER,
post_edit_reverify: true,
}
}
fn __agent_stall_config(value) -> AgentStallConfig {
let defaults = __agent_stall_default_config()
if value == nil {
return defaults
}
if type_of(value) == "bool" {
return defaults + {enabled: value}
}
if type_of(value) != "dict" {
throw "agent_loop: `stall_diagnostics` must be a dict, bool, or nil; got " + type_of(value)
}
let {threshold = 3, max_feedback = 1} = value ?? {}
let exempt_tools = if value?.exempt_tools != nil {
__agent_stall_list(value.exempt_tools, "exempt_tools")
} else {
__agent_stall_list(value?.allow_repeated_tools, "allow_repeated_tools")
}
let resolved_threshold = if type_of(threshold) == "int" && threshold >= 2 {
threshold
} else {
3
}
// The legacy `threshold` knob (N repeated identical calls) IS the same intent
// as the same-observation condition, so when a caller sets it explicitly it
// also drives `repeat_same_observation` for back-compat. Absent an explicit
// `threshold`, the same-observation condition uses the reference default (4).
let same_observation_default = if value?.threshold != nil {
resolved_threshold
} else {
STALL_REPEAT_SAME_OBSERVATION
}
return {
enabled: __agent_stall_bool(value?.enabled, true, "enabled"),
threshold: resolved_threshold,
inject_feedback: __agent_stall_bool(value?.inject_feedback, true, "inject_feedback"),
max_feedback: if type_of(max_feedback) == "int" && max_feedback >= 0 {
max_feedback
} else {
1
},
exempt_tools: exempt_tools,
include_arguments: __agent_stall_bool(value?.include_arguments, false, "include_arguments"),
repeat_same_observation: __agent_stall_int(value?.repeat_same_observation, same_observation_default, 2),
repeat_same_error: __agent_stall_int(value?.repeat_same_error, STALL_REPEAT_SAME_ERROR, 2),
no_progress_messages: __agent_stall_int(value?.no_progress_messages, STALL_NO_PROGRESS_MESSAGES, 2),
ping_pong_cycles: __agent_stall_int(value?.ping_pong_cycles, STALL_PING_PONG_CYCLES, 2),
repeat_context_window_error: __agent_stall_int(value?.repeat_context_window_error, STALL_REPEAT_CONTEXT_WINDOW_ERROR, 1),
hard_stop_after_trips: __agent_stall_int(value?.hard_stop_after_trips, STALL_HARD_STOP_AFTER_TRIPS, 1),
repair_aware: __agent_stall_bool(value?.repair_aware, false, "repair_aware"),
stuck_same_diagnostic_after: __agent_stall_int(value?.stuck_same_diagnostic_after, STALL_STUCK_SAME_DIAGNOSTIC_AFTER, 2),
post_edit_reverify: __agent_stall_bool(value?.post_edit_reverify, true, "post_edit_reverify"),
}
}
/**
* agent_stall_initial_state creates repeated-tool-call diagnostic state.
*
* @effects: []
* @errors: []
* @api_stability: experimental
*/
pub fn agent_stall_initial_state() -> AgentStallState {
return {
last_signature: "",
streak: 0,
warnings: [],
repeated_tool_calls: 0,
feedback_count: 0,
last_observation_signature: "",
last_outcome_kind: "",
same_observation_streak: 0,
same_error_streak: 0,
context_window_error_streak: 0,
no_progress_streak: 0,
ping_pong_a: "",
ping_pong_b: "",
ping_pong_alternations: 0,
consecutive_trips: 0,
hard_stop: false,
last_diagnostic_class: "",
last_diagnostic_signature: "",
last_diagnostic_snippet: "",
write_epoch: 0,
same_diagnostic_streak: 0,
edit_since_failure: false,
reverify_owed: false,
}
}
/**
* Reset only the action-stream tracking (signatures, observation/error
* streaks, ping-pong). Leaves the consecutive-trip escalation and feedback
* counters alone. Used when an action is exempt or the turn made no tool call.
*/
fn __agent_stall_reset_action(state: AgentStallState) -> AgentStallState {
return state
+ {
last_signature: "",
streak: 0,
last_observation_signature: "",
last_outcome_kind: "",
same_observation_streak: 0,
same_error_streak: 0,
context_window_error_streak: 0,
ping_pong_a: "",
ping_pong_b: "",
ping_pong_alternations: 0,
}
}
fn __agent_tool_call_name(call) -> string {
return to_string(call?.name ?? call?.tool_name ?? "")
}
fn __agent_tool_call_args(call) -> dict {
let raw = call?.arguments ?? call?.tool_args
if type_of(raw) == "dict" {
return raw
}
return {}
}
fn __agent_tool_call_signature(call) -> string {
let args_text = json_stringify(__agent_tool_call_args(call))
return __agent_tool_call_name(call) + "\n" + args_text
}
/** observation classification (the polling-exemption mechanism) */
fn __agent_stall_result_ok(result) -> bool {
if result?.ok != nil {
return result.ok ? true : false
}
if result?.success != nil {
return result.success ? true : false
}
let status = to_string(result?.status ?? "")
return status == "ok" || status == "success"
}
fn __agent_stall_result_name(result) -> string {
return to_string(result?.tool_name ?? result?.name ?? "")
}
/**
* A context-window / token-limit error is its own trip condition because
* repeating it is never productive and never recovers on its own.
*/
fn __agent_stall_is_context_window_error(text: string) -> bool {
let lowered = lowercase(text)
return contains(lowered, "context window")
|| contains(lowered, "context length")
|| contains(lowered, "context_length")
|| contains(lowered, "maximum context")
|| contains(lowered, "token limit")
|| contains(lowered, "too many tokens")
|| contains(lowered, "context_length_exceeded")
|| contains(lowered, "max_tokens")
}
/**
* Outcome of a dispatched result, normalized for the detector: one of
* "ok" / "error" / "context_window", plus a byte-stable signature of the
* observation payload (for "ok") or the error text (for "error").
*/
fn __agent_stall_result_outcome(result) -> dict {
if __agent_stall_result_ok(result) {
let payload = json_stringify(result?.result ?? result?.output ?? result?.content ?? nil)
return {kind: "ok", signature: "ok:" + sha256(payload)}
}
let error_text = to_string(result?.error ?? result?.message ?? result?.result ?? "")
if __agent_stall_is_context_window_error(error_text) {
return {kind: "context_window", signature: "ctx"}
}
return {kind: "error", signature: "err:" + sha256(error_text)}
}
/**
* Short, human-readable evidence snippet for a failing result (used to ground
* the strategy-shift nudge and the terminal current-failure payload). Never
* hashed — the byte-stable signature stays in __agent_stall_result_outcome.
*/
fn __agent_stall_result_snippet(result) -> string {
if __agent_stall_result_ok(result) {
return ""
}
let text = to_string(result?.error ?? result?.message ?? result?.result ?? "")
let normalized = trim(text)
if len(normalized) <= 240 {
return normalized
}
return normalized.slice(0, 240)
}
/**
* Pick the representative verification result for a turn from its dispatch:
* the FIRST failing result (the diagnostic the agent must fix) if any,
* otherwise the first result (a "pass" turn). Returns nil when there is no
* dispatch result to classify (the turn made no observed tool call), so the
* caller leaves the current-failure model untouched.
*/
fn __agent_stall_turn_result(prev_dispatch) {
if prev_dispatch == nil {
return nil
}
let results = if type_of(prev_dispatch) == "list" {
prev_dispatch
} else {
prev_dispatch?.results ?? []
}
var first = nil
for result in results {
if !__agent_stall_result_ok(result) {
return result
}
if first == nil {
first = result
}
}
return first
}
/**
* __agent_stall_fold_diagnostic folds the current-failure model for one turn.
* It is a PURE function of (state, prev_dispatch, turn_made_edit):
*
* - It classifies the turn's representative verification result (the first
* failing dispatch result, else the first result) into a ProbeOutcome-shaped
* class and a byte-stable signature via __agent_stall_result_outcome (never
* reimplementing the sha256/normalization).
* - On a successful workspace-mutating edit this turn it bumps `write_epoch`
* (which arms the post-edit re-verify mandate the loop honors).
* - The failure SIGNATURE is the progress signal: a productive edit changes
* the error (signature changes => streak resets — legitimate progress, no
* false positive); a flailing edit leaves the SAME error (signature
* identical => streak advances — real thrash). So the streak advances on the
* same signature REGARDLESS of intervening edits.
* - It advances/resets `same_diagnostic_streak`:
* same failure signature => futile (streak + 1), even across edits
* different signature => reset to 1 (a different mistake, or progress)
* "pass" => clear streak, reverify_owed, edit_since_failure
*
* Result handling:
* - a FAILING result => fold the streak as above; this is verification
* evidence, so any owed re-verify is satisfied (clear reverify_owed /
* edit_since_failure). A turn that BOTH edits and re-tests still classifies
* here: __agent_stall_turn_result returns the FIRST failing result, so the
* re-test's failure is seen past the edit's own "ok" (the edit bumps the
* epoch; the fail still advances the streak).
* - a non-failing result, NOT an edit turn => a genuine passing verification
* clears the current-failure model.
* - a non-failing result on an EDIT turn => the result is the edit's own
* success, NOT verification of the failure. Owe a re-verify and preserve the
* failure model (same as the no-result edit case).
* - no result (prev_dispatch nil/empty) => if this turn made an edit on top of
* a live failure, owe a re-verify (reverify_owed / edit_since_failure) and
* bump the epoch; otherwise leave the model untouched (epoch still bumps on
* an edit).
*/
fn __agent_stall_fold_diagnostic(state: AgentStallState, prev_dispatch, turn_made_edit: bool) -> AgentStallState {
// Edit bookkeeping first: a successful workspace mutation advances the epoch,
// which arms the post-edit re-verify mandate in the loop.
let new_write_epoch = if turn_made_edit {
state.write_epoch + 1
} else {
state.write_epoch
}
let result = __agent_stall_turn_result(prev_dispatch)
let result_is_fail = result != nil && !__agent_stall_result_ok(result)
if !result_is_fail {
// No FAILING verification this turn. A successful edit on top of a live
// failure is NOT proof the failure is resolved (its "ok" is the edit
// succeeding, not the check passing): owe a re-verify and preserve the
// current-failure model so the next re-fail of the SAME diagnostic is
// recognized across the edit. A genuine passing result on a NON-edit turn
// is real verification evidence and clears the model.
if turn_made_edit && state.last_diagnostic_class == "fail" {
return state
+ {write_epoch: new_write_epoch, edit_since_failure: true, reverify_owed: true}
}
if result == nil {
// No verification evidence and no edit-on-failure: leave the model alone.
return state + {write_epoch: new_write_epoch}
}
// A passing verification clears the current-failure model.
let pass_outcome = __agent_stall_result_outcome(result)
return state
+ {
last_diagnostic_class: "pass",
last_diagnostic_signature: pass_outcome.signature,
last_diagnostic_snippet: "",
write_epoch: new_write_epoch,
same_diagnostic_streak: 0,
edit_since_failure: false,
reverify_owed: false,
}
}
// result_is_fail: fold against the prior failure model. The signature
// is the progress signal, so the streak advances on the SAME signature
// regardless of whether an edit intervened — a flailing edit that leaves the
// same error counts as thrash, while a productive edit changes the signature
// and resets the streak (the false-positive safety property).
let outcome = __agent_stall_result_outcome(result)
let signature = outcome.signature
let same_signature = signature == state.last_diagnostic_signature
&& state.last_diagnostic_class == "fail"
let new_streak = if same_signature {
state.same_diagnostic_streak + 1
} else {
1
}
// This is a verification result, so the post-edit re-verify mandate (if any)
// has now been satisfied: clear reverify_owed and the edit-since marker. The
// NEXT edit after this failure re-arms them.
return state
+ {
last_diagnostic_class: "fail",
last_diagnostic_signature: signature,
last_diagnostic_snippet: __agent_stall_result_snippet(result),
write_epoch: new_write_epoch,
same_diagnostic_streak: new_streak,
edit_since_failure: false,
reverify_owed: false,
}
}
/**
* Find the dispatch result for the given tool name. The detector tracks one
* action at a time, so the first matching result is the relevant observation.
*/
fn __agent_stall_outcome_for(prev_dispatch, tool_name: string) {
if prev_dispatch == nil {
return nil
}
let results = if type_of(prev_dispatch) == "list" {
prev_dispatch
} else {
prev_dispatch?.results ?? []
}
for result in results {
if __agent_stall_result_name(result) == tool_name {
return __agent_stall_result_outcome(result)
}
}
return nil
}
fn __agent_stall_feedback_text(warning: AgentStallWarning) -> string {
let pattern = warning.pattern
let count = to_string(warning.count)
// Evidence-aware repair loop: a strategy-shift nudge grounded in the actual
// diagnostic, not a blind "you repeated yourself" counter. This fires when
// the same failure has survived `stuck_same_diagnostic_after` repair turns
// with no intervening successful edit (or with edits that did not change the
// failure), so the right move is to change approach, not to retry harder.
if pattern == "stuck_same_diagnostic" {
let snippet = to_string(warning.diagnostic_snippet ?? "")
let evidence = if snippet != "" {
" The failing evidence is unchanged:\n" + snippet
} else {
""
}
return "Repair diagnostic: the same failure has persisted across "
+ count
+ " repair turns without changing."
+ evidence
+ "\nStop retrying the same fix. Re-read the relevant code, form a NEW hypothesis "
+ "about the root cause, or gather different evidence before editing again. If you "
+ "cannot make progress, report this specific blocker instead of repeating."
}
let core = if pattern == "repeated_same_observation" {
"the action `" + warning.tool_name + "` produced the same observation " + count
+ " times in a row"
} else if pattern == "repeated_error" {
"the action `" + warning.tool_name + "` failed with the same error " + count + " times in a row"
} else if pattern == "no_progress_monologue" {
count + " consecutive assistant messages made no progress (no tool call)"
} else if pattern == "errored_no_tool_call" {
"your previous turn ended with a provider error and emitted no tool call ("
+ count
+ " in a row) — re-emit the intended tool call"
} else if pattern == "ping_pong" {
"the loop is ping-ponging between two actions (" + count + " cycles)"
} else if pattern == "repeated_context_window_error" {
"the run hit a context-window error " + count + " times in a row"
} else {
"the last " + count + " tool calls repeated `" + warning.tool_name
+ "` with identical arguments"
}
if warning.hard_stop {
return "Loop detector (hard stop): "
+ core
+ ". "
+ to_string(warning.consecutive_trips)
+ " trips with no recovery — stop and report the blocker instead of repeating."
}
return "Stall diagnostic: "
+ core
+ ". Use different evidence, finish, or explain why repeating is necessary before doing so again."
}
fn __agent_stall_warning_record(
iteration: int,
tool_name: string,
args: dict,
signature: string,
pattern: string,
count: int,
consecutive_trips: int,
hard_stop: bool,
config: AgentStallConfig,
threshold: int,
) -> AgentStallWarning {
let args_text = json_stringify(args)
var record = {
iteration: iteration,
tool_name: tool_name,
repeat_count: count,
threshold: threshold,
arguments_digest: "sha256:" + sha256(args_text),
signature_digest: "sha256:" + sha256(signature),
pattern: pattern,
signature: signature,
count: count,
consecutive_trips: consecutive_trips,
hard_stop: hard_stop,
}
if config.include_arguments {
record = record + {arguments: args}
}
return record
}
/**
* agent_stall_inject_feedback injects bounded stall feedback.
*
* @effects: [agent]
* @errors: []
* @api_stability: experimental
*/
pub fn agent_stall_inject_feedback(
session_id: string,
warning: AgentStallWarning,
config: AgentStallConfig,
state: AgentStallState,
) -> AgentStallState {
let wants_feedback = config.inject_feedback ?? true
let should_feedback = wants_feedback && state.feedback_count < config.max_feedback
if should_feedback {
agent_session_inject_feedback(session_id, "stall_diagnostics", __agent_stall_feedback_text(warning))
return state + {feedback_count: state.feedback_count + 1}
}
return state
}
fn __agent_stall_maybe_emit(
session_id: string,
warning: AgentStallWarning,
config: AgentStallConfig,
state: AgentStallState,
defer_feedback: bool,
) {
agent_emit_event(session_id, "agent_loop_stall_warning", warning)
if defer_feedback {
return {state: state, warning: warning, feedback_deferred: true}
}
return {
state: agent_stall_inject_feedback(session_id, warning, config, state),
warning: warning,
feedback_deferred: false,
}
}
/**
* ping-pong tracking: track the two action signatures in a candidate A/B
* alternation and the count of alternations. Two alternations (A->B->A) is one
* cycle. Returns the updated state; the cycle count lives in
* `ping_pong_alternations`.
*/
fn __agent_stall_update_ping_pong(state: AgentStallState, signature: string) -> AgentStallState {
let a = state.ping_pong_a
let b = state.ping_pong_b
if a == "" {
return state + {ping_pong_a: signature, ping_pong_b: "", ping_pong_alternations: 0}
}
if b == "" {
if a == signature {
// Same action twice — this is the same-action repeat path, not ping-pong.
return state + {ping_pong_alternations: 0}
}
return state + {ping_pong_b: signature, ping_pong_alternations: 1}
}
let expected_next = if state.ping_pong_alternations % 2 == 1 {
a
} else {
b
}
if expected_next == signature {
return state + {ping_pong_alternations: state.ping_pong_alternations + 1}
}
if a == signature || b == signature {
// Within the A/B pair but out of strict alternation: restart the pair
// anchored on the current value.
return state + {ping_pong_a: signature, ping_pong_b: "", ping_pong_alternations: 0}
}
// A third distinct action: not a two-action ping-pong.
return state + {ping_pong_a: signature, ping_pong_b: "", ping_pong_alternations: 0}
}
/**
* Apply a fired trip: bump the consecutive-trip escalation and decide whether
* to flag a hard stop. Returns the state with the trip recorded.
*/
fn __agent_stall_register_trip(state: AgentStallState, config: AgentStallConfig) -> AgentStallState {
let trips = state.consecutive_trips + 1
return state + {consecutive_trips: trips, hard_stop: trips >= config.hard_stop_after_trips}
}
/**
* A non-tripping step recovers: reset the consecutive-trip escalation so the
* hard stop only fires on genuinely uninterrupted thrashing.
*/
fn __agent_stall_register_recovery(state: AgentStallState) -> AgentStallState {
return state + {consecutive_trips: 0, hard_stop: false}
}
/**
* Detect a no-progress monologue turn (assistant produced text but no tool
* call). Returns the observation when the streak crosses the threshold.
*/
fn __agent_stall_observe_no_progress(
session_id: string,
iteration: int,
config: AgentStallConfig,
state: AgentStallState,
defer_feedback: bool,
turn_stop_reason = "",
) -> AgentStallObservation {
// An agent message breaks any action-repeat / ping-pong run.
var next_state = __agent_stall_reset_action(state)
next_state = next_state + {no_progress_streak: next_state.no_progress_streak + 1}
if next_state.no_progress_streak < config.no_progress_messages {
next_state = __agent_stall_register_recovery(next_state)
return {
state: next_state,
enabled: true,
warning: nil,
feedback_deferred: false,
config: config,
hard_stop: false,
}
}
next_state = __agent_stall_register_trip(next_state, config)
// A turn that ended with a provider error (stop_reason=error) but emitted no
// tool call narrated its intent without landing an action — the cheap-model
// eval-meter failure class. Give it cause-specific feedback instead of the
// generic "made no progress" nag, so the model knows its turn errored and to
// re-emit the intended tool call.
let pattern = if to_string(turn_stop_reason) == "error" {
"errored_no_tool_call"
} else {
"no_progress_monologue"
}
let warning = __agent_stall_warning_record(
iteration,
"",
{},
"",
pattern,
next_state.no_progress_streak,
next_state.consecutive_trips,
next_state.hard_stop,
config,
config.no_progress_messages,
)
let emitted = __agent_stall_maybe_emit(session_id, warning, config, next_state, defer_feedback)
next_state = emitted.state + {warnings: emitted.state.warnings.push(warning)}
return {
state: next_state,
enabled: true,
warning: warning,
feedback_deferred: emitted.feedback_deferred ?? false,
config: config,
hard_stop: next_state.hard_stop,
}
}
/**
* __agent_stall_process_call folds a single tool call (and the observation it
* produced last turn) into the detector state and decides which condition, if
* any, tripped. Extracted from `agent_stall_observe_tool_calls` to keep that
* function within the cyclomatic-complexity budget. Returns the updated state
* plus a `trip` dict (or nil). Trip ordering: context-window, then repeated
* error, then repeated identical observation, then ping-pong, then the legacy
* signature-only threshold (used only when no dispatch results are available).
*/
fn __agent_stall_process_call(
state: AgentStallState,
config: AgentStallConfig,
call,
tool_name: string,
prev_dispatch,
) -> dict {
let signature = __agent_tool_call_signature(call)
let same_action = signature == state.last_signature
let outcome = __agent_stall_outcome_for(prev_dispatch, tool_name)
// Observation signature of the action that just repeated. When dispatch
// results are unavailable, fall back to the action signature itself so the
// legacy adjacent-repeat behavior still trips.
let obs_signature = if outcome != nil {
outcome.signature
} else {
"sig:" + sha256(signature)
}
let outcome_kind = if outcome != nil {
outcome.kind
} else {
"ok"
}
// Ping-pong is independent of the same-action streak and only considers
// distinct alternating actions; update it first.
var next_state = __agent_stall_update_ping_pong(state, signature)
let streak = if same_action {
next_state.streak + 1
} else {
1
}
let repeated = if streak > 1 {
next_state.repeated_tool_calls + 1
} else {
next_state.repeated_tool_calls
}
// The polling exemption (principled, not an allowlist): a repeated identical
// action counts toward the same-observation condition UNLESS we have positive
// evidence the observation changed — i.e. both this turn's and last turn's
// observations are real (non-fallback) AND differ. A changing observation is
// legitimate polling (status advances, watched file mutates) and resets the
// streak. When observations are unavailable (no dispatch results threaded in)
// the streak counts the raw signature repeat, preserving the legacy
// adjacent-identical-call behavior so existing thresholds still trip.
let both_known = outcome != nil && next_state.last_outcome_kind != ""
let observation_changed = both_known && obs_signature != next_state.last_observation_signature
let new_same_observation_streak = if !same_action || outcome_kind != "ok" {
if outcome_kind == "ok" {
1
} else {
0
}
} else if observation_changed {
1
} else {
next_state.same_observation_streak + 1
}
// Same-error streak: same action producing the same (or unknown) error.
// Distinct errors reset, because the agent is making different mistakes.
let new_same_error_streak = if !same_action || outcome_kind != "error" {
if outcome_kind == "error" {
1
} else {
0
}
} else if observation_changed {
1
} else {
next_state.same_error_streak + 1
}
let new_context_streak = if outcome_kind == "context_window" {
next_state.context_window_error_streak + 1
} else {
0
}
next_state = next_state
+ {
last_signature: signature,
streak: streak,
repeated_tool_calls: repeated,
last_observation_signature: obs_signature,
last_outcome_kind: if outcome != nil {
outcome_kind
} else {
""
},
same_observation_streak: new_same_observation_streak,
same_error_streak: new_same_error_streak,
context_window_error_streak: new_context_streak,
}
let ping_pong_cycles = next_state.ping_pong_alternations / 2
let trip = __agent_stall_classify_trip(
config,
{
context_streak: new_context_streak,
same_error_streak: new_same_error_streak,
same_observation_streak: new_same_observation_streak,
outcome_kind: outcome_kind,
ping_pong_cycles: ping_pong_cycles,
outcome_present: outcome != nil,
streak: streak,
},
)
return {state: next_state, trip: trip}
}
/**
* Decide which stall condition, if any, tripped for a single processed call.
* Extracted from `__agent_stall_process_call` to keep both within the
* cyclomatic-complexity budget. Trip ordering: context-window, repeated error,
* repeated identical observation, ping-pong, then the legacy signature-only
* threshold. When the evidence-aware repair loop is enabled it OWNS the
* repeated-failure semantics (same diagnostic across repair turns) via the
* diagnostic-grounded `stuck_same_diagnostic` pattern in the repair overlay, so
* the legacy `repeated_error` / `repeated_same_observation` trips are suppressed
* here (no double-fire; the richer repair nudge wins). Context-window and
* ping-pong remain — they are orthogonal failure modes the repair model does
* not cover.
*/
fn __agent_stall_classify_trip(config: AgentStallConfig, s) {
let repair_owns_repeat = config.repair_aware
if s.context_streak >= config.repeat_context_window_error {
return {
pattern: "repeated_context_window_error",
count: s.context_streak,
threshold: config.repeat_context_window_error,
}
}
if !repair_owns_repeat && s.same_error_streak >= config.repeat_same_error {
return {pattern: "repeated_error", count: s.same_error_streak, threshold: config.repeat_same_error}
}
if !repair_owns_repeat
&& s.outcome_kind == "ok"
&& s.same_observation_streak >= config.repeat_same_observation {
return {
pattern: "repeated_same_observation",
count: s.same_observation_streak,
threshold: config.repeat_same_observation,
}
}
if s.ping_pong_cycles >= config.ping_pong_cycles {
return {pattern: "ping_pong", count: s.ping_pong_cycles, threshold: config.ping_pong_cycles}
}
if !s.outcome_present && s.streak >= config.threshold {
return {pattern: "repeated_same_signature", count: s.streak, threshold: config.threshold}
}
return nil
}
/**
* agent_stall_observe_tool_calls detects degenerate agent loops.
*
* Conditions: same action -> identical observation, same action -> same
* error, no-progress monologue, ping-pong, and repeated context-window
* errors. `prev_dispatch` carries the previous turn's dispatch results so the
* detector can attribute an observation signature to the repeated action;
* pass nil to fall back to the signature-only same-action heuristic. The
* polling exemption is principled: a repeated identical action counts toward
* the same-observation and ping-pong conditions only when its observation is
* byte-identical across repeats. `exempt_tools` remains a secondary escape
* hatch.
*
* @effects: [agent]
* @errors: [agent_loop]
* @api_stability: experimental
*/
fn __agent_stall_observe_core(
session_id: string,
tool_calls: list,
iteration: int,
raw_config,
state: AgentStallState,
defer_feedback: bool,
prev_dispatch = nil,
turn_text = "",
had_parse_errors = false,
turn_stop_reason = "",
) -> AgentStallObservation {
let config = __agent_stall_config(raw_config)
if !config.enabled {
return {
state: state,
enabled: false,
warning: nil,
feedback_deferred: false,
config: config,
hard_stop: false,
}
}
// A turn whose tool calls were all dropped by the parser carries real intent
// (it tried to act); the malformed-call path already injected purpose-built
// parse-guidance feedback. Treat it as a neutral recovery turn so it does NOT
// count toward the no-progress monologue streak — otherwise the loop nags the
// model to "emit one well-formed tool call" when it already did, just with a
// malformed body. Gated purely on the syntactic parse-error signal.
if had_parse_errors {
return {
state: __agent_stall_register_recovery(__agent_stall_reset_action(state)),
enabled: true,
warning: nil,
feedback_deferred: false,
config: config,
hard_stop: false,
}
}
// Soft progress-report tools (e.g. `agent_progress`) report status without
// advancing task state, so they are excluded from the real-action stream: a
// turn whose only call is such a report is treated as no-progress.
let substantive_calls = tool_calls
.filter({ c -> !contains(SOFT_PROGRESS_TOOLS, __agent_tool_call_name(c)) })
if len(substantive_calls) == 0 {
// No substantive tool call this turn — either none at all, or only soft
// progress reports. With visible text or a progress report, that is a
// no-progress monologue candidate; a fully empty turn resets the action
// stream.
if to_string(turn_text) != "" || len(tool_calls) > 0 {
return __agent_stall_observe_no_progress(
session_id,
iteration,
config,
state,
defer_feedback,
turn_stop_reason,
)
}
return {
state: __agent_stall_register_recovery(__agent_stall_reset_action(state)),
enabled: true,
warning: nil,
feedback_deferred: false,
config: config,
hard_stop: false,
}
}
var next_state = state + {no_progress_streak: 0}
var emitted_warning = nil
var feedback_deferred = false
for call in substantive_calls {
let tool_name = __agent_tool_call_name(call)
if tool_name == "" || contains(config.exempt_tools, tool_name) {
next_state = __agent_stall_register_recovery(__agent_stall_reset_action(next_state))
continue
}
let processed = __agent_stall_process_call(next_state, config, call, tool_name, prev_dispatch)
next_state = processed.state
let trip = processed.trip
if trip != nil {
next_state = __agent_stall_register_trip(next_state, config)
let warning = __agent_stall_warning_record(
iteration,
tool_name,
__agent_tool_call_args(call),
__agent_tool_call_signature(call),
trip.pattern,
trip.count,
next_state.consecutive_trips,
next_state.hard_stop,
config,
trip.threshold,
)
let emitted = __agent_stall_maybe_emit(session_id, warning, config, next_state, defer_feedback)
next_state = emitted.state
if emitted_warning == nil {
emitted_warning = warning
}
feedback_deferred = feedback_deferred || emitted.feedback_deferred ?? false
next_state = next_state + {warnings: next_state.warnings.push(warning)}
} else {
next_state = __agent_stall_register_recovery(next_state)
}
}
return {
state: next_state,
enabled: true,
warning: emitted_warning,
feedback_deferred: feedback_deferred,
config: config,
hard_stop: next_state.hard_stop,
}
}
/**
* Evidence-aware repair overlay (#repair-diagnostics). Default OFF. When
* `repair_aware` is enabled this:
* 1. folds the current-failure model from the PRIOR turn's verification
* result + whether that turn made a successful edit (turn_made_edit), and
* 2. trips the "stuck_same_diagnostic" strategy-shift nudge when the same
* diagnostic has survived `stuck_same_diagnostic_after` repair turns.
* The trip rides the EXISTING `agent_loop_stall_warning` event +
* `__agent_stall_register_trip` escalation; no new event type is introduced.
* When the core observation already produced a warning this turn we keep it
* and only fold the model (we do not stack two warnings in one turn).
*/
fn __agent_stall_apply_repair(
session_id: string,
config: AgentStallConfig,
observation: AgentStallObservation,
iteration: int,
prev_dispatch,
turn_made_edit: bool,
defer_feedback: bool,
) -> AgentStallObservation {
if !config.repair_aware {
return observation
}
let folded = __agent_stall_fold_diagnostic(observation.state, prev_dispatch, turn_made_edit)
// Trip ONCE when the streak first crosses the threshold, so the strategy-shift
// nudge fires a single time per stuck episode rather than on every subsequent
// identical failure. Require that the streak ADVANCED this turn (a fresh
// same-signature failure was folded) — an edit/owe turn preserves the streak
// at the threshold without re-folding a failure, and must not re-trip. A later
// streak reset (a different/passing signature) re-arms the trip for the next
// genuinely-stuck episode. Also require that no other stall pattern already
// fired this turn (avoid double-nudging).
let streak_advanced = folded.same_diagnostic_streak > observation.state.same_diagnostic_streak
let should_trip = folded.same_diagnostic_streak == config.stuck_same_diagnostic_after
&& streak_advanced
&& observation.warning == nil
if !should_trip {
return observation + {state: folded}
}
let tripped = __agent_stall_register_trip(folded, config)
let warning = __agent_stall_warning_record(
iteration,
"",
{},
tripped.last_diagnostic_signature,
"stuck_same_diagnostic",
tripped.same_diagnostic_streak,
tripped.consecutive_trips,
tripped.hard_stop,
config,
config.stuck_same_diagnostic_after,
)
+ {
diagnostic_class: tripped.last_diagnostic_class,
diagnostic_signature: tripped.last_diagnostic_signature,
diagnostic_snippet: tripped.last_diagnostic_snippet,
}
let emitted = __agent_stall_maybe_emit(session_id, warning, config, tripped, defer_feedback)
let next_state = emitted.state + {warnings: emitted.state.warnings.push(warning)}
return {
state: next_state,
enabled: true,
warning: warning,
feedback_deferred: emitted.feedback_deferred ?? false,
config: config,
hard_stop: next_state.hard_stop,
}
}
/**
* agent_stall_observe_tool_calls detects degenerate agent loops and, when the
* evidence-aware repair loop is enabled (`stall_diagnostics.repair_aware`),
* folds a current-failure model and nudges a strategy shift on a stuck
* diagnostic. `turn_made_edit` reports whether the PRIOR turn (the one
* `prev_dispatch` describes) made a successful workspace-mutating edit; it is
* inert unless `repair_aware` is set.
*
* @effects: [agent]
* @errors: [agent_loop]
* @api_stability: experimental
*/
pub fn agent_stall_observe_tool_calls(
session_id: string,
tool_calls: list,
iteration: int,
raw_config,
state: AgentStallState,
defer_feedback: bool,
prev_dispatch = nil,
turn_text = "",
had_parse_errors = false,
turn_made_edit = false,
turn_stop_reason = "",
) -> AgentStallObservation {
let observation = __agent_stall_observe_core(
session_id,
tool_calls,
iteration,
raw_config,
state,
defer_feedback,
prev_dispatch,
turn_text,
had_parse_errors,
turn_stop_reason,
)
if !observation.enabled {
return observation
}
return __agent_stall_apply_repair(
session_id,
observation.config,
observation,
iteration,
prev_dispatch,
turn_made_edit,
defer_feedback,
)
}
/**
* agent_stall_repair_config exposes the parsed evidence-aware repair knobs
* (repair_aware / post_edit_reverify / stuck_same_diagnostic_after) so the
* agent loop can decide whether to honor the post-edit re-verify mandate
* without reaching into the private config parser.
*
* @effects: []
* @errors: []
* @api_stability: experimental
*/
pub fn agent_stall_repair_config(raw_config) -> dict {
let config = __agent_stall_config(raw_config)
return {
repair_aware: config.repair_aware,
post_edit_reverify: config.post_edit_reverify,
stuck_same_diagnostic_after: config.stuck_same_diagnostic_after,
}
}
/**
* agent_stall_current_failure projects the current-failure model into a
* stop-payload block (nil when there is no live failure). Used to enrich the
* terminal `stuck` and budget-exhaustion payloads with WHAT failed, not just
* "budget exhausted".
*
* @effects: []
* @errors: []
* @api_stability: experimental
*/
pub fn agent_stall_current_failure(stall_state: AgentStallState) {
if stall_state.last_diagnostic_class != "fail" {
return nil
}
return {
class: stall_state.last_diagnostic_class,
signature: stall_state.last_diagnostic_signature,
snippet: stall_state.last_diagnostic_snippet,
same_diagnostic_streak: stall_state.same_diagnostic_streak,
}
}
/**
* agent_stall_clear_current_failure clears the current-failure model so a
* SUCCESSFUL terminal hand-back (clean done / a passing `verify_completion`)
* does not report a stale `current_failure`. A run can complete without ever
* flowing a passing verification result through the fold (e.g. the loop stops
* `done` after the model's prose, or `verify_completion` passes out-of-band),
* which would otherwise leave `last_diagnostic_class == "fail"`. The loop calls
* this on the successful-termination signal before `agent_stall_apply_result`.
* A stuck / exhausted run does NOT call this, so it still carries the failure.
*
* @effects: []
* @errors: []
* @api_stability: experimental
*/
pub fn agent_stall_clear_current_failure(stall_state: AgentStallState) -> AgentStallState {
return stall_state
+ {last_diagnostic_class: "pass", same_diagnostic_streak: 0, reverify_owed: false}
}
/**
* agent_stall_apply_result adds stall diagnostics to an agent-loop result,
* including the evidence-aware repair loop's `current_failure` summary on the
* terminal hand-back when a live failure remains.
*
* @effects: []
* @errors: []
* @api_stability: experimental
*/
pub fn agent_stall_apply_result(result: dict, stall_enabled: bool, stall_state: AgentStallState) -> dict {
if !stall_enabled && len(stall_state.warnings) == 0 {
return result
}
let base = result
+ {
repeated_tool_calls: stall_state.repeated_tool_calls,
stall_warnings: stall_state.warnings,
suspected_loop: len(stall_state.warnings) > 0,
}
let current_failure = agent_stall_current_failure(stall_state)
if current_failure == nil {
return base
}
return base + {current_failure: current_failure}
}
fn __agent_done_judge_stall_cadence(opts) {
let judge = opts?.done_judge
if type_of(judge) != "dict" {
return nil
}
let cadence = judge?.cadence
if type_of(cadence) != "dict" || cadence?.when != "stalled" {
return nil
}
return cadence
}
/**
* agent_stall_done_judge_due decides whether the stall done judge is due.
*
* @effects: []
* @errors: []
* @api_stability: experimental
* @example: agent_stall_done_judge_due(opts, 0, 3)
*/
pub fn agent_stall_done_judge_due(opts, invocations: int, turn_number: int) -> bool {
let cadence = __agent_done_judge_stall_cadence(opts)
if cadence == nil {
return false
}
let max_invocations = cadence?.max_invocations
if max_invocations != nil && invocations >= max_invocations {
return false
}
let min_iterations = cadence?.min_iterations_before_first
if min_iterations != nil && turn_number <= min_iterations {
return false
}
let every = cadence?.every
if every != nil && turn_number % every != 0 {
return false
}
return true
}