use std::sync::{Arc, Mutex};
use crate::sync_util::LockExt;
use super::message::{LoopMessage, UserMessage};
const MAX_QUOTED: usize = 5;
const EXCERPT_CAP: usize = 160;
#[derive(Debug)]
pub struct FailureTracker {
inner: Mutex<Inner>,
threshold: usize,
}
#[derive(Debug)]
struct Inner {
consecutive: usize,
escalation: usize,
timeouts: usize,
recent: Vec<(String, String)>,
last_emitted_at: usize,
denials: usize,
recent_denials: Vec<(String, String)>,
last_denial_emitted_at: usize,
}
impl FailureTracker {
pub fn new(threshold: usize) -> Arc<Self> {
assert!(
threshold >= 2,
"failure tracker threshold must be >= 2 (got {threshold})"
);
Arc::new(Self {
threshold,
inner: Mutex::new(Inner {
consecutive: 0,
escalation: 0,
timeouts: 0,
recent: Vec::new(),
last_emitted_at: 0,
denials: 0,
recent_denials: Vec::new(),
last_denial_emitted_at: 0,
}),
})
}
pub fn record(&self, outcome: super::activity::Outcome, tool_name: &str, excerpt: &str) {
use super::activity::Outcome;
let mut inner = self.inner.lock_ignore_poison();
match outcome {
Outcome::Ok => {
inner.consecutive = 0;
inner.escalation = 0;
inner.timeouts = 0;
inner.recent.clear();
inner.last_emitted_at = 0;
inner.denials = 0;
inner.recent_denials.clear();
inner.last_denial_emitted_at = 0;
return;
}
Outcome::Denied => {
inner.denials += 1;
inner
.recent_denials
.push((tool_name.to_string(), condense(excerpt)));
if inner.recent_denials.len() > MAX_QUOTED {
let drop = inner.recent_denials.len() - MAX_QUOTED;
inner.recent_denials.drain(0..drop);
}
return;
}
Outcome::Error => {
inner.consecutive += 1;
inner.escalation += 1;
}
Outcome::Timeout => {
inner.consecutive += 1;
inner.escalation += 2;
inner.timeouts += 1;
}
}
inner
.recent
.push((tool_name.to_string(), condense(excerpt)));
if inner.recent.len() > MAX_QUOTED {
let drop = inner.recent.len() - MAX_QUOTED;
inner.recent.drain(0..drop);
}
}
#[cfg(test)]
pub fn record_result(&self, is_error: bool, tool_name: &str, excerpt: &str) {
use super::activity::Outcome;
let outcome = if is_error {
Outcome::Error
} else {
Outcome::Ok
};
self.record(outcome, tool_name, excerpt);
}
pub fn poll_reflection(&self) -> Vec<LoopMessage> {
let mut inner = self.inner.lock_ignore_poison();
let mut out = Vec::new();
if inner.denials >= self.threshold {
let due = inner.last_denial_emitted_at == 0
|| inner.denials.saturating_sub(inner.last_denial_emitted_at) >= self.threshold;
if due {
inner.last_denial_emitted_at = inner.denials;
let body = format_permission_checkpoint(inner.denials, &inner.recent_denials);
out.push(LoopMessage::User(UserMessage { content: body }));
}
}
if inner.escalation >= self.threshold {
let due = inner.last_emitted_at == 0
|| inner.escalation.saturating_sub(inner.last_emitted_at) >= self.threshold;
if due {
inner.last_emitted_at = inner.escalation;
let body = format_checkpoint(inner.consecutive, inner.timeouts, &inner.recent);
out.push(LoopMessage::User(UserMessage { content: body }));
}
}
out
}
}
fn condense(s: &str) -> String {
let one_line: String = s.split_whitespace().collect::<Vec<_>>().join(" ");
if one_line.chars().count() > EXCERPT_CAP {
let kept: String = one_line.chars().take(EXCERPT_CAP).collect();
format!("{kept}…")
} else {
one_line
}
}
fn format_checkpoint(consecutive: usize, timeouts: usize, recent: &[(String, String)]) -> String {
let mut s = format!("[Recovery checkpoint] {consecutive} tool calls in a row have failed:\n");
for (tool, excerpt) in recent {
s.push_str(&format!(" - {tool}: {excerpt}\n"));
}
if timeouts > 0 {
s.push_str(&format!(
"{timeouts} of these timed out — the command ran out its time budget, it didn't \
fail on bad input. Re-running it unchanged will hang again: narrow the work, fix \
why it hangs, or raise the timeout deliberately — don't just retry.\n",
));
}
s.push_str(
"Stop and diagnose before retrying — this is a system checkpoint, not a new task:\n\
1. What root cause do these share — wrong arguments, wrong tool, or wrong approach?\n\
2. If you've already tried a fix twice, it isn't working. Change the approach; don't tweak it.\n\
3. If you're missing information, gather it first (read the file, list the directory,\n\
re-read the exact error) before acting again.\n\
Name the root cause in one sentence, then take a DIFFERENT next step.",
);
if let Some(tool) = dominant_tool(recent) {
s.push_str(&format!(
"\nEvery one of these was `{tool}`. Re-read its description and parameter \
schema in your tool definitions before calling it again — or use a different \
tool to make progress.",
));
}
s
}
fn format_permission_checkpoint(denials: usize, recent: &[(String, String)]) -> String {
let mut s = format!(
"[Permission checkpoint] {denials} tool calls in a row were blocked by the \
permission system:\n"
);
for (tool, excerpt) in recent {
s.push_str(&format!(" - {tool}: {excerpt}\n"));
}
s.push_str(
"This is a policy block, not a tool error. Retrying, rephrasing, or switching to \
another tool will not clear it, and you must NOT try to work around it — do not write \
a script to perform the blocked action, move the work to an allowed path, or otherwise \
route around the guardrail. Only the user can permit this. Either ask the user to \
approve it (they can run `/allow add <tool> <pattern>`, e.g. `/allow add write \
~/dir/**`), or stop and report plainly what is blocked and what you would do once it \
is allowed.",
);
s
}
fn dominant_tool(recent: &[(String, String)]) -> Option<String> {
let first = recent.first()?.0.as_str();
if recent.iter().all(|(t, _)| t == first) {
Some(first.to_string())
} else {
None
}
}
#[cfg(test)]
mod tests {
use super::*;
fn content_of(msgs: &[LoopMessage]) -> String {
match msgs.first() {
Some(LoopMessage::User(u)) => u.content.clone(),
_ => panic!("expected one User message"),
}
}
#[test]
fn below_threshold_is_silent() {
let t = FailureTracker::new(3);
t.record_result(true, "edit", "no match");
t.record_result(true, "edit", "no match either");
assert!(t.poll_reflection().is_empty(), "2 < threshold 3");
}
#[test]
fn distinct_failures_trip_at_threshold() {
let t = FailureTracker::new(3);
t.record_result(true, "edit", "old_string not found");
t.record_result(true, "read", "file not found");
t.record_result(true, "bash", "command failed");
let msgs = t.poll_reflection();
assert_eq!(msgs.len(), 1, "streak of 3 distinct errors nudges");
let body = content_of(&msgs);
assert!(body.contains("Recovery checkpoint"));
assert!(body.contains("3 tool calls in a row have failed"));
assert!(body.contains("edit: old_string not found"));
assert!(body.contains("read: file not found"));
assert!(body.contains("DIFFERENT next step"));
assert!(!body.contains("Re-read its description"));
}
#[test]
fn one_tool_dominating_points_at_its_contract() {
let t = FailureTracker::new(3);
for _ in 0..3 {
t.record_result(true, "edit", "old_string not found");
}
let body = content_of(&t.poll_reflection());
assert!(
body.contains("Every one of these was `edit`"),
"single-tool streak should name the tool: {body}"
);
assert!(body.contains("Re-read its description"));
}
#[test]
fn two_timeouts_trip_a_threshold_three_tracker() {
use super::super::activity::Outcome;
let t = FailureTracker::new(3);
t.record(Outcome::Timeout, "bash", "Command timed out after 120s");
assert!(
t.poll_reflection().is_empty(),
"single timeout (weight 2) < threshold 3"
);
t.record(Outcome::Timeout, "bash", "Command timed out after 120s");
let msgs = t.poll_reflection();
assert_eq!(msgs.len(), 1, "two timeouts escalate past threshold");
let body = content_of(&msgs);
assert!(body.contains("2 tool calls in a row have failed"), "{body}");
}
#[test]
fn timeout_checkpoint_calls_out_the_timeout() {
use super::super::activity::Outcome;
let t = FailureTracker::new(2);
t.record(Outcome::Timeout, "bash", "Command timed out after 120s");
let body = content_of(&t.poll_reflection());
assert!(
body.contains("timed out") && body.contains("time budget"),
"checkpoint should name the timeout failure mode: {body}"
);
}
#[test]
fn error_then_timeout_reaches_threshold_three() {
use super::super::activity::Outcome;
let t = FailureTracker::new(3);
t.record(Outcome::Error, "edit", "no match");
assert!(t.poll_reflection().is_empty(), "escalation 1 < 3");
t.record(Outcome::Timeout, "bash", "Command timed out after 5s");
assert_eq!(t.poll_reflection().len(), 1, "error+timeout escalate to 3");
}
#[test]
fn success_clears_the_streak() {
let t = FailureTracker::new(3);
t.record_result(true, "edit", "miss");
t.record_result(true, "edit", "miss");
t.record_result(false, "read", "ok"); t.record_result(true, "edit", "miss");
assert!(
t.poll_reflection().is_empty(),
"one success reset the counter; only 1 error since"
);
}
#[test]
fn nudges_once_per_streak_not_per_call() {
let t = FailureTracker::new(3);
for _ in 0..3 {
t.record_result(true, "edit", "miss");
}
assert_eq!(t.poll_reflection().len(), 1, "first crossing nudges");
t.record_result(true, "edit", "miss");
assert!(
t.poll_reflection().is_empty(),
"streak 4, last emitted at 3 — not due again"
);
}
#[test]
fn re_arms_after_another_threshold() {
let t = FailureTracker::new(3);
for _ in 0..3 {
t.record_result(true, "edit", "miss");
}
assert_eq!(t.poll_reflection().len(), 1);
for _ in 0..3 {
t.record_result(true, "edit", "miss");
}
let msgs = t.poll_reflection();
assert_eq!(msgs.len(), 1, "streak of 6 re-arms");
assert!(content_of(&msgs).contains("6 tool calls in a row"));
}
#[test]
fn poll_is_idempotent_within_a_streak() {
let t = FailureTracker::new(2);
t.record_result(true, "edit", "miss");
t.record_result(true, "edit", "miss");
assert_eq!(t.poll_reflection().len(), 1);
assert!(
t.poll_reflection().is_empty(),
"second poll with no new failures stays silent"
);
}
#[test]
fn excerpt_is_condensed_to_one_bounded_line() {
let t = FailureTracker::new(2);
let noisy = format!("line one\n line two\t{}", "x".repeat(400));
t.record_result(true, "bash", &noisy);
t.record_result(true, "bash", "second");
let body = content_of(&t.poll_reflection());
assert!(!body.contains('\t'), "tabs collapsed");
assert!(body.contains('…'));
assert!(
!body.contains(&"x".repeat(200)),
"excerpt capped well under the raw length"
);
}
#[test]
fn only_last_five_failures_quoted() {
let t = FailureTracker::new(3);
for i in 0..7 {
t.record_result(true, "edit", &format!("err{i}"));
}
let body = content_of(&t.poll_reflection());
assert!(!body.contains("err0"), "oldest dropped beyond MAX_QUOTED");
assert!(!body.contains("err1"));
assert!(body.contains("err2"));
assert!(body.contains("err6"));
}
#[test]
fn denial_streak_emits_permission_checkpoint_not_mechanical() {
use super::super::activity::Outcome;
let t = FailureTracker::new(3);
for _ in 0..3 {
t.record(
Outcome::Denied,
"edit",
"Permission denied: writes outside project",
);
}
let msgs = t.poll_reflection();
assert_eq!(msgs.len(), 1, "denial streak nudges once");
let body = content_of(&msgs);
assert!(!body.contains("DIFFERENT next step"), "{body}");
assert!(!body.contains("Re-read its description"), "{body}");
assert!(!body.contains("wrong arguments, wrong tool"), "{body}");
assert!(!body.contains("tool calls in a row have failed"), "{body}");
assert!(body.contains("Permission checkpoint"), "{body}");
assert!(body.contains("/allow"), "{body}");
let lc = body.to_lowercase();
assert!(
lc.contains("work around") || lc.contains("route around"),
"must forbid the workaround: {body}"
);
assert!(body.contains("edit: Permission denied"), "{body}");
}
#[test]
fn denials_do_not_inflate_the_mechanical_streak() {
use super::super::activity::Outcome;
let t = FailureTracker::new(3);
t.record(Outcome::Error, "edit", "old_string not found");
t.record(Outcome::Error, "read", "file not found");
t.record(
Outcome::Denied,
"write",
"Permission denied: outside project",
);
assert!(
t.poll_reflection().is_empty(),
"2 mechanical errors + 1 denial: neither streak at threshold"
);
}
#[test]
fn denial_does_not_reset_the_mechanical_streak() {
use super::super::activity::Outcome;
let t = FailureTracker::new(3);
t.record(Outcome::Error, "edit", "no match");
t.record(Outcome::Error, "edit", "no match");
t.record(Outcome::Denied, "write", "Permission denied: x");
t.record(Outcome::Error, "edit", "no match");
let msgs = t.poll_reflection();
assert_eq!(msgs.len(), 1, "3 mechanical errors across a denial trip");
assert!(content_of(&msgs).contains("3 tool calls in a row have failed"));
}
#[test]
fn success_clears_the_denial_streak() {
use super::super::activity::Outcome;
let t = FailureTracker::new(3);
t.record(Outcome::Denied, "write", "Permission denied: x");
t.record(Outcome::Denied, "write", "Permission denied: x");
t.record(Outcome::Ok, "read", "ok");
t.record(Outcome::Denied, "write", "Permission denied: x");
assert!(
t.poll_reflection().is_empty(),
"success reset the denial streak; 1 denial < threshold"
);
}
}