koda_core/loop_guard.rs
1//! Loop detection for the inference loop.
2//!
3//! Modeled after Gemini CLI's approach: simple consecutive-identical-call
4//! detection + feedback injection instead of hard stops. No windowed
5//! fingerprinting, no name saturation heuristics, no tool-only suppression.
6//!
7//! ## Design philosophy
8//!
9//! Claude Code and Codex have **zero** loop detection — they trust the model.
10//! Gemini CLI has the only thoughtful approach: detect consecutive identical
11//! tool calls (same name + args), then inject a "take a step back" feedback
12//! message to nudge the model out of the loop. Hard-stop only on the 2nd
13//! detection (model ignored the feedback).
14//!
15//! ## What we DON'T do (and why)
16//!
17//! - **No windowed fingerprint tracking** — nobody else does this.
18//! - **No tool-name saturation** — editing 12 files in a refactoring is normal.
19//! - **No tool-only response suppression** — efficient models work silently.
20//! - **No per-turn tool call cap** — frontier models emit 30+ parallel calls.
21//! - **No deduplication** — if a model emits 66 identical calls, the user
22//! should see that and switch models, not have us silently paper over it.
23//!
24//! ## What we DO
25//!
26//! 1. **Consecutive identical calls** — same `(tool, args)` called
27//! `CONSECUTIVE_REPEAT_THRESHOLD` times in a row → inject feedback.
28//! 2. **Hard iteration cap (top-level only)** — absolute ceiling on
29//! the main inference loop. User can extend interactively.
30//! Sub-agent loops are **uncapped** as of #1110; they trust the
31//! model and rely on consecutive-identical detection, provider
32//! stop reasons, cancellation, and context bounds (P3 in DESIGN.md).
33
34use crate::providers::ToolCall;
35use std::collections::VecDeque;
36
37/// Default hard cap for the main inference loop.
38pub const MAX_ITERATIONS_DEFAULT: u32 = 200;
39
40// `MAX_SUB_AGENT_ITERATIONS` deleted in #1110: per `DESIGN.md` P3 ("Build for
41// the world six months from now"), sub-agents trust the model and rely on
42// `LoopDetector`, provider stop reasons, cancellation, and context bounds
43// instead of a hardcoded iteration count. Codex and Zed both ship without
44// any per-sub-agent iteration cap.
45
46/// How many **consecutive** identical tool calls (same name + args) trigger
47/// loop detection. "Consecutive" means the same fingerprint appears this
48/// many times with no other tool call in between.
49///
50/// Set to 5 to match Gemini CLI's `TOOL_CALL_LOOP_THRESHOLD`.
51/// A normal "read → edit → test" cycle never triggers this because each
52/// step is a different tool call.
53const CONSECUTIVE_REPEAT_THRESHOLD: usize = 5;
54
55/// How many recent tool names to show in the hard-cap prompt.
56const DISPLAY_RECENT: usize = 5;
57
58// ── Loop detection ────────────────────────────────────────────────
59
60/// What to do when a loop is detected.
61#[derive(Debug, Clone, PartialEq, Eq)]
62pub enum LoopAction {
63 /// No loop detected — continue normally.
64 Ok,
65 /// First detection — inject feedback message to nudge the model.
66 /// Contains a descriptive message for the feedback injection.
67 InjectFeedback(String),
68 /// Second detection — model ignored feedback, hard stop.
69 HardStop(String),
70}
71
72/// Tracks consecutive identical tool calls.
73///
74/// Detection is simple: if the last N tool calls all have the same
75/// fingerprint (tool name + args), that's a loop. On first detection,
76/// the caller injects a feedback message. On second detection (model
77/// ignored the feedback), the caller hard-stops.
78pub struct LoopDetector {
79 /// The fingerprint of the last tool call.
80 last_fingerprint: Option<String>,
81 /// How many consecutive times we've seen `last_fingerprint`.
82 consecutive_count: usize,
83 /// How many times we've detected a loop in this session.
84 detection_count: u32,
85 /// Ring buffer of recent tool names (for display in hard-cap prompt).
86 recent: VecDeque<String>,
87}
88
89impl Default for LoopDetector {
90 fn default() -> Self {
91 Self::new()
92 }
93}
94
95impl LoopDetector {
96 /// Create a new loop detector with empty history.
97 pub fn new() -> Self {
98 Self {
99 last_fingerprint: None,
100 consecutive_count: 0,
101 detection_count: 0,
102 recent: VecDeque::new(),
103 }
104 }
105
106 /// Record a batch of tool calls and check for loops.
107 ///
108 /// Returns a [`LoopAction`] indicating what the caller should do.
109 pub fn record(&mut self, tool_calls: &[ToolCall]) -> LoopAction {
110 for tc in tool_calls {
111 let fp = fingerprint(&tc.function_name, &tc.arguments);
112
113 // Update consecutive counter
114 if self.last_fingerprint.as_ref() == Some(&fp) {
115 self.consecutive_count += 1;
116 } else {
117 self.last_fingerprint = Some(fp);
118 self.consecutive_count = 1;
119 }
120
121 // Update display ring buffer
122 self.recent.push_back(tc.function_name.clone());
123 if self.recent.len() > DISPLAY_RECENT {
124 self.recent.pop_front();
125 }
126 }
127
128 self.check()
129 }
130
131 /// Clear the detection state after feedback injection so the model
132 /// gets a fresh chance. Increments `detection_count` so the next
133 /// trigger will be a hard stop.
134 pub fn clear_after_feedback(&mut self) {
135 self.detection_count += 1;
136 self.last_fingerprint = None;
137 self.consecutive_count = 0;
138 }
139
140 /// Recent tool names (most recent last), for display in the hard-cap prompt.
141 pub fn recent_names(&self) -> Vec<String> {
142 self.recent.iter().cloned().collect()
143 }
144
145 fn check(&self) -> LoopAction {
146 if self.consecutive_count < CONSECUTIVE_REPEAT_THRESHOLD {
147 return LoopAction::Ok;
148 }
149
150 let fp = self.last_fingerprint.as_deref().unwrap_or("unknown");
151 let tool_name = fp.split(':').next().unwrap_or(fp);
152 let detail = format!(
153 "'{tool_name}' called {n} times consecutively with identical arguments",
154 n = self.consecutive_count,
155 );
156
157 if self.detection_count == 0 {
158 // First detection — inject feedback
159 LoopAction::InjectFeedback(detail)
160 } else {
161 // Already injected feedback before — hard stop
162 LoopAction::HardStop(detail)
163 }
164 }
165}
166
167/// Stable fingerprint: tool name + first 200 chars of args.
168fn fingerprint(name: &str, args: &str) -> String {
169 let prefix = &args[..args.len().min(200)];
170 format!("{name}:{prefix}")
171}
172
173// ── Hard-cap prompt ───────────────────────────────────────────────
174
175/// Options for continuing after hitting the hard cap.
176#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
177#[serde(rename_all = "snake_case")]
178pub enum LoopContinuation {
179 /// Stop the inference loop.
180 Stop,
181 /// Continue for 50 more iterations.
182 Continue50,
183 /// Continue for 200 more iterations.
184 Continue200,
185}
186
187impl LoopContinuation {
188 /// Number of additional iterations granted.
189 pub fn extra_iterations(self) -> u32 {
190 match self {
191 Self::Stop => 0,
192 Self::Continue50 => 50,
193 Self::Continue200 => 200,
194 }
195 }
196}
197
198// ── Tests ─────────────────────────────────────────────────────────
199
200#[cfg(test)]
201mod tests {
202 use super::*;
203
204 fn call(name: &str, args: &str) -> ToolCall {
205 ToolCall {
206 id: "x".into(),
207 function_name: name.into(),
208 arguments: args.into(),
209 thought_signature: None,
210 }
211 }
212
213 #[test]
214 fn no_loop_on_unique_calls() {
215 let mut d = LoopDetector::new();
216 assert_eq!(
217 d.record(&[call("Edit", "{\"path\":\"a.rs\"}")]),
218 LoopAction::Ok
219 );
220 assert_eq!(
221 d.record(&[call("Edit", "{\"path\":\"b.rs\"}")]),
222 LoopAction::Ok
223 );
224 assert_eq!(
225 d.record(&[call("Bash", "{\"cmd\":\"ls\"}")]),
226 LoopAction::Ok
227 );
228 }
229
230 #[test]
231 fn detects_consecutive_identical_calls() {
232 let mut d = LoopDetector::new();
233 let tc = call("Edit", "{\"path\":\"src/main.rs\"}");
234 for _ in 0..CONSECUTIVE_REPEAT_THRESHOLD - 1 {
235 assert_eq!(d.record(std::slice::from_ref(&tc)), LoopAction::Ok);
236 }
237 // Should trigger feedback on threshold
238 assert!(matches!(
239 d.record(std::slice::from_ref(&tc)),
240 LoopAction::InjectFeedback(_)
241 ));
242 }
243
244 #[test]
245 fn different_tool_resets_consecutive_count() {
246 let mut d = LoopDetector::new();
247 let tc = call("Edit", "{\"path\":\"src/main.rs\"}");
248 // Almost at threshold
249 for _ in 0..CONSECUTIVE_REPEAT_THRESHOLD - 2 {
250 assert_eq!(d.record(std::slice::from_ref(&tc)), LoopAction::Ok);
251 }
252 // Different tool resets the count
253 assert_eq!(
254 d.record(&[call("Bash", "{\"cmd\":\"test\"}")]),
255 LoopAction::Ok
256 );
257 // Back to same tool — starts from 1 again
258 for _ in 0..CONSECUTIVE_REPEAT_THRESHOLD - 1 {
259 assert_eq!(d.record(std::slice::from_ref(&tc)), LoopAction::Ok);
260 }
261 assert!(matches!(
262 d.record(std::slice::from_ref(&tc)),
263 LoopAction::InjectFeedback(_)
264 ));
265 }
266
267 #[test]
268 fn read_edit_test_cycle_never_triggers() {
269 // The most common coding workflow should NEVER trigger.
270 let mut d = LoopDetector::new();
271 let test_cmd = "{\"command\":\"cargo test\"}";
272 let read_args = "{\"path\":\"src/lib.rs\"}";
273
274 for cycle in 0..20 {
275 assert_eq!(
276 d.record(&[call("Read", read_args)]),
277 LoopAction::Ok,
278 "read should not trigger at cycle {cycle}"
279 );
280 let edit_args = format!("{{\"path\":\"src/lib.rs\",\"old\":\"v{cycle}\"}}");
281 assert_eq!(
282 d.record(&[call("Edit", &edit_args)]),
283 LoopAction::Ok,
284 "edit should not trigger at cycle {cycle}"
285 );
286 assert_eq!(
287 d.record(&[call("Bash", test_cmd)]),
288 LoopAction::Ok,
289 "test should not trigger at cycle {cycle}"
290 );
291 }
292 }
293
294 #[test]
295 fn feedback_then_hard_stop() {
296 let mut d = LoopDetector::new();
297 let tc = call("Read", "{\"path\":\"stuck.rs\"}");
298
299 // First detection → feedback
300 for _ in 0..CONSECUTIVE_REPEAT_THRESHOLD {
301 d.record(std::slice::from_ref(&tc));
302 }
303 // The last record returned InjectFeedback — now simulate the
304 // caller clearing state and the model looping again
305 d.detection_count = 1; // feedback was injected
306 d.clear_after_feedback();
307
308 // Second detection → hard stop
309 for _ in 0..CONSECUTIVE_REPEAT_THRESHOLD {
310 d.record(std::slice::from_ref(&tc));
311 }
312 assert!(matches!(d.check(), LoopAction::HardStop(_)));
313 }
314
315 #[test]
316 fn parallel_calls_same_tool_not_a_loop() {
317 // 10 parallel Read calls with DIFFERENT args in one batch — not a loop
318 let mut d = LoopDetector::new();
319 let batch: Vec<ToolCall> = (0..10)
320 .map(|i| call("Read", &format!("{{\"path\":\"file{i}.rs\"}}")))
321 .collect();
322 assert_eq!(d.record(&batch), LoopAction::Ok);
323 }
324
325 #[test]
326 fn same_tool_different_args_not_consecutive() {
327 // Same tool name but different args each time — not consecutive
328 let mut d = LoopDetector::new();
329 for i in 0..20 {
330 let args = format!("{{\"command\":\"ls -variant-{i}\"}}");
331 assert_eq!(
332 d.record(&[call("Bash", &args)]),
333 LoopAction::Ok,
334 "different args should not trigger at call {i}"
335 );
336 }
337 }
338
339 #[test]
340 fn recent_names_tracks_last_five() {
341 let mut d = LoopDetector::new();
342 for i in 0..8 {
343 let name = format!("Tool{i}");
344 d.record(&[call(&name, "{}")]);
345 }
346 let names = d.recent_names();
347 assert_eq!(names.len(), 5);
348 assert_eq!(names[0], "Tool3");
349 assert_eq!(names[4], "Tool7");
350 }
351}