use std::sync::{Arc, Mutex};
use crate::sync_util::LockExt;
use super::message::{LoopMessage, UserMessage};
const MAX_QUOTED: usize = 5;
const EXCERPT_CAP: usize = 160;
#[derive(Debug)]
pub struct FailureTracker {
inner: Mutex<Inner>,
threshold: usize,
}
#[derive(Debug)]
struct Inner {
consecutive: usize,
recent: Vec<(String, String)>,
last_emitted_at: usize,
}
impl FailureTracker {
pub fn new(threshold: usize) -> Arc<Self> {
assert!(
threshold >= 2,
"failure tracker threshold must be >= 2 (got {threshold})"
);
Arc::new(Self {
threshold,
inner: Mutex::new(Inner {
consecutive: 0,
recent: Vec::new(),
last_emitted_at: 0,
}),
})
}
pub fn record_result(&self, is_error: bool, tool_name: &str, excerpt: &str) {
let mut inner = self.inner.lock_ignore_poison();
if !is_error {
inner.consecutive = 0;
inner.recent.clear();
inner.last_emitted_at = 0;
return;
}
inner.consecutive += 1;
inner
.recent
.push((tool_name.to_string(), condense(excerpt)));
if inner.recent.len() > MAX_QUOTED {
let drop = inner.recent.len() - MAX_QUOTED;
inner.recent.drain(0..drop);
}
}
pub fn poll_reflection(&self) -> Vec<LoopMessage> {
let mut inner = self.inner.lock_ignore_poison();
if inner.consecutive < self.threshold {
return Vec::new();
}
let due = inner.last_emitted_at == 0
|| inner.consecutive.saturating_sub(inner.last_emitted_at) >= self.threshold;
if !due {
return Vec::new();
}
inner.last_emitted_at = inner.consecutive;
let body = format_checkpoint(inner.consecutive, &inner.recent);
vec![LoopMessage::User(UserMessage { content: body })]
}
}
fn condense(s: &str) -> String {
let one_line: String = s.split_whitespace().collect::<Vec<_>>().join(" ");
if one_line.chars().count() > EXCERPT_CAP {
let kept: String = one_line.chars().take(EXCERPT_CAP).collect();
format!("{kept}…")
} else {
one_line
}
}
fn format_checkpoint(consecutive: usize, recent: &[(String, String)]) -> String {
let mut s = format!("[Recovery checkpoint] {consecutive} tool calls in a row have failed:\n");
for (tool, excerpt) in recent {
s.push_str(&format!(" - {tool}: {excerpt}\n"));
}
s.push_str(
"Stop and diagnose before retrying — this is a system checkpoint, not a new task:\n\
1. What root cause do these share — wrong arguments, wrong tool, or wrong approach?\n\
2. If you've already tried a fix twice, it isn't working. Change the approach; don't tweak it.\n\
3. If you're missing information, gather it first (read the file, list the directory,\n\
re-read the exact error) before acting again.\n\
Name the root cause in one sentence, then take a DIFFERENT next step.",
);
if let Some(tool) = dominant_tool(recent) {
s.push_str(&format!(
"\nEvery one of these was `{tool}`. Re-read its description and parameter \
schema in your tool definitions before calling it again — or use a different \
tool to make progress.",
));
}
s
}
fn dominant_tool(recent: &[(String, String)]) -> Option<String> {
let first = recent.first()?.0.as_str();
if recent.iter().all(|(t, _)| t == first) {
Some(first.to_string())
} else {
None
}
}
#[cfg(test)]
mod tests {
use super::*;
fn content_of(msgs: &[LoopMessage]) -> String {
match msgs.first() {
Some(LoopMessage::User(u)) => u.content.clone(),
_ => panic!("expected one User message"),
}
}
#[test]
fn below_threshold_is_silent() {
let t = FailureTracker::new(3);
t.record_result(true, "edit", "no match");
t.record_result(true, "edit", "no match either");
assert!(t.poll_reflection().is_empty(), "2 < threshold 3");
}
#[test]
fn distinct_failures_trip_at_threshold() {
let t = FailureTracker::new(3);
t.record_result(true, "edit", "old_string not found");
t.record_result(true, "read", "file not found");
t.record_result(true, "bash", "command failed");
let msgs = t.poll_reflection();
assert_eq!(msgs.len(), 1, "streak of 3 distinct errors nudges");
let body = content_of(&msgs);
assert!(body.contains("Recovery checkpoint"));
assert!(body.contains("3 tool calls in a row have failed"));
assert!(body.contains("edit: old_string not found"));
assert!(body.contains("read: file not found"));
assert!(body.contains("DIFFERENT next step"));
assert!(!body.contains("Re-read its description"));
}
#[test]
fn one_tool_dominating_points_at_its_contract() {
let t = FailureTracker::new(3);
for _ in 0..3 {
t.record_result(true, "edit", "old_string not found");
}
let body = content_of(&t.poll_reflection());
assert!(
body.contains("Every one of these was `edit`"),
"single-tool streak should name the tool: {body}"
);
assert!(body.contains("Re-read its description"));
}
#[test]
fn success_clears_the_streak() {
let t = FailureTracker::new(3);
t.record_result(true, "edit", "miss");
t.record_result(true, "edit", "miss");
t.record_result(false, "read", "ok"); t.record_result(true, "edit", "miss");
assert!(
t.poll_reflection().is_empty(),
"one success reset the counter; only 1 error since"
);
}
#[test]
fn nudges_once_per_streak_not_per_call() {
let t = FailureTracker::new(3);
for _ in 0..3 {
t.record_result(true, "edit", "miss");
}
assert_eq!(t.poll_reflection().len(), 1, "first crossing nudges");
t.record_result(true, "edit", "miss");
assert!(
t.poll_reflection().is_empty(),
"streak 4, last emitted at 3 — not due again"
);
}
#[test]
fn re_arms_after_another_threshold() {
let t = FailureTracker::new(3);
for _ in 0..3 {
t.record_result(true, "edit", "miss");
}
assert_eq!(t.poll_reflection().len(), 1);
for _ in 0..3 {
t.record_result(true, "edit", "miss");
}
let msgs = t.poll_reflection();
assert_eq!(msgs.len(), 1, "streak of 6 re-arms");
assert!(content_of(&msgs).contains("6 tool calls in a row"));
}
#[test]
fn poll_is_idempotent_within_a_streak() {
let t = FailureTracker::new(2);
t.record_result(true, "edit", "miss");
t.record_result(true, "edit", "miss");
assert_eq!(t.poll_reflection().len(), 1);
assert!(
t.poll_reflection().is_empty(),
"second poll with no new failures stays silent"
);
}
#[test]
fn excerpt_is_condensed_to_one_bounded_line() {
let t = FailureTracker::new(2);
let noisy = format!("line one\n line two\t{}", "x".repeat(400));
t.record_result(true, "bash", &noisy);
t.record_result(true, "bash", "second");
let body = content_of(&t.poll_reflection());
assert!(!body.contains('\t'), "tabs collapsed");
assert!(body.contains('…'));
assert!(
!body.contains(&"x".repeat(200)),
"excerpt capped well under the raw length"
);
}
#[test]
fn only_last_five_failures_quoted() {
let t = FailureTracker::new(3);
for i in 0..7 {
t.record_result(true, "edit", &format!("err{i}"));
}
let body = content_of(&t.poll_reflection());
assert!(!body.contains("err0"), "oldest dropped beyond MAX_QUOTED");
assert!(!body.contains("err1"));
assert!(body.contains("err2"));
assert!(body.contains("err6"));
}
}