Skip to main content

koda_core/
loop_guard.rs

1//! Loop detection for the inference loop.
2//!
3//! Modeled after Gemini CLI's approach: simple consecutive-identical-call
4//! detection + feedback injection instead of hard stops. No windowed
5//! fingerprinting, no name saturation heuristics, no tool-only suppression.
6//!
7//! ## Design philosophy
8//!
9//! Claude Code and Codex have **zero** loop detection — they trust the model.
10//! Gemini CLI has the only thoughtful approach: detect consecutive identical
11//! tool calls (same name + args), then inject a "take a step back" feedback
12//! message to nudge the model out of the loop. Hard-stop only on the 2nd
13//! detection (model ignored the feedback).
14//!
15//! ## What we DON'T do (and why)
16//!
17//! - **No windowed fingerprint tracking** — nobody else does this.
18//! - **No tool-name saturation** — editing 12 files in a refactoring is normal.
19//! - **No tool-only response suppression** — efficient models work silently.
20//! - **No per-turn tool call cap** — frontier models emit 30+ parallel calls.
21//! - **No deduplication** — if a model emits 66 identical calls, the user
22//!   should see that and switch models, not have us silently paper over it.
23//!
24//! ## What we DO
25//!
26//! 1. **Consecutive identical calls** — same `(tool, args)` called
27//!    `CONSECUTIVE_REPEAT_THRESHOLD` times in a row → inject feedback.
28//! 2. **Hard iteration cap** — absolute ceiling on loop iterations.
29//!    User can extend interactively.
30
31use crate::providers::ToolCall;
32use std::collections::VecDeque;
33
34/// Default hard cap for the main inference loop.
35pub const MAX_ITERATIONS_DEFAULT: u32 = 200;
36
37/// Hard cap for sub-agent loops.
38pub const MAX_SUB_AGENT_ITERATIONS: usize = 20;
39
40/// How many **consecutive** identical tool calls (same name + args) trigger
41/// loop detection. "Consecutive" means the same fingerprint appears this
42/// many times with no other tool call in between.
43///
44/// Set to 5 to match Gemini CLI's `TOOL_CALL_LOOP_THRESHOLD`.
45/// A normal "read → edit → test" cycle never triggers this because each
46/// step is a different tool call.
47const CONSECUTIVE_REPEAT_THRESHOLD: usize = 5;
48
49/// How many recent tool names to show in the hard-cap prompt.
50const DISPLAY_RECENT: usize = 5;
51
52// ── Loop detection ────────────────────────────────────────────────
53
54/// What to do when a loop is detected.
55#[derive(Debug, Clone, PartialEq, Eq)]
56pub enum LoopAction {
57    /// No loop detected — continue normally.
58    Ok,
59    /// First detection — inject feedback message to nudge the model.
60    /// Contains a descriptive message for the feedback injection.
61    InjectFeedback(String),
62    /// Second detection — model ignored feedback, hard stop.
63    HardStop(String),
64}
65
66/// Tracks consecutive identical tool calls.
67///
68/// Detection is simple: if the last N tool calls all have the same
69/// fingerprint (tool name + args), that's a loop. On first detection,
70/// the caller injects a feedback message. On second detection (model
71/// ignored the feedback), the caller hard-stops.
72pub struct LoopDetector {
73    /// The fingerprint of the last tool call.
74    last_fingerprint: Option<String>,
75    /// How many consecutive times we've seen `last_fingerprint`.
76    consecutive_count: usize,
77    /// How many times we've detected a loop in this session.
78    detection_count: u32,
79    /// Ring buffer of recent tool names (for display in hard-cap prompt).
80    recent: VecDeque<String>,
81}
82
83impl Default for LoopDetector {
84    fn default() -> Self {
85        Self::new()
86    }
87}
88
89impl LoopDetector {
90    /// Create a new loop detector with empty history.
91    pub fn new() -> Self {
92        Self {
93            last_fingerprint: None,
94            consecutive_count: 0,
95            detection_count: 0,
96            recent: VecDeque::new(),
97        }
98    }
99
100    /// Record a batch of tool calls and check for loops.
101    ///
102    /// Returns a [`LoopAction`] indicating what the caller should do.
103    pub fn record(&mut self, tool_calls: &[ToolCall]) -> LoopAction {
104        for tc in tool_calls {
105            let fp = fingerprint(&tc.function_name, &tc.arguments);
106
107            // Update consecutive counter
108            if self.last_fingerprint.as_ref() == Some(&fp) {
109                self.consecutive_count += 1;
110            } else {
111                self.last_fingerprint = Some(fp);
112                self.consecutive_count = 1;
113            }
114
115            // Update display ring buffer
116            self.recent.push_back(tc.function_name.clone());
117            if self.recent.len() > DISPLAY_RECENT {
118                self.recent.pop_front();
119            }
120        }
121
122        self.check()
123    }
124
125    /// Clear the detection state after feedback injection so the model
126    /// gets a fresh chance. Increments `detection_count` so the next
127    /// trigger will be a hard stop.
128    pub fn clear_after_feedback(&mut self) {
129        self.detection_count += 1;
130        self.last_fingerprint = None;
131        self.consecutive_count = 0;
132    }
133
134    /// Recent tool names (most recent last), for display in the hard-cap prompt.
135    pub fn recent_names(&self) -> Vec<String> {
136        self.recent.iter().cloned().collect()
137    }
138
139    fn check(&self) -> LoopAction {
140        if self.consecutive_count < CONSECUTIVE_REPEAT_THRESHOLD {
141            return LoopAction::Ok;
142        }
143
144        let fp = self.last_fingerprint.as_deref().unwrap_or("unknown");
145        let tool_name = fp.split(':').next().unwrap_or(fp);
146        let detail = format!(
147            "'{tool_name}' called {n} times consecutively with identical arguments",
148            n = self.consecutive_count,
149        );
150
151        if self.detection_count == 0 {
152            // First detection — inject feedback
153            LoopAction::InjectFeedback(detail)
154        } else {
155            // Already injected feedback before — hard stop
156            LoopAction::HardStop(detail)
157        }
158    }
159}
160
161/// Stable fingerprint: tool name + first 200 chars of args.
162fn fingerprint(name: &str, args: &str) -> String {
163    let prefix = &args[..args.len().min(200)];
164    format!("{name}:{prefix}")
165}
166
167// ── Hard-cap prompt ───────────────────────────────────────────────
168
169/// Options for continuing after hitting the hard cap.
170#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
171#[serde(rename_all = "snake_case")]
172pub enum LoopContinuation {
173    /// Stop the inference loop.
174    Stop,
175    /// Continue for 50 more iterations.
176    Continue50,
177    /// Continue for 200 more iterations.
178    Continue200,
179}
180
181impl LoopContinuation {
182    /// Number of additional iterations granted.
183    pub fn extra_iterations(self) -> u32 {
184        match self {
185            Self::Stop => 0,
186            Self::Continue50 => 50,
187            Self::Continue200 => 200,
188        }
189    }
190}
191
192// ── Tests ─────────────────────────────────────────────────────────
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197
198    fn call(name: &str, args: &str) -> ToolCall {
199        ToolCall {
200            id: "x".into(),
201            function_name: name.into(),
202            arguments: args.into(),
203            thought_signature: None,
204        }
205    }
206
207    #[test]
208    fn no_loop_on_unique_calls() {
209        let mut d = LoopDetector::new();
210        assert_eq!(
211            d.record(&[call("Edit", "{\"path\":\"a.rs\"}")]),
212            LoopAction::Ok
213        );
214        assert_eq!(
215            d.record(&[call("Edit", "{\"path\":\"b.rs\"}")]),
216            LoopAction::Ok
217        );
218        assert_eq!(
219            d.record(&[call("Bash", "{\"cmd\":\"ls\"}")]),
220            LoopAction::Ok
221        );
222    }
223
224    #[test]
225    fn detects_consecutive_identical_calls() {
226        let mut d = LoopDetector::new();
227        let tc = call("Edit", "{\"path\":\"src/main.rs\"}");
228        for _ in 0..CONSECUTIVE_REPEAT_THRESHOLD - 1 {
229            assert_eq!(d.record(std::slice::from_ref(&tc)), LoopAction::Ok);
230        }
231        // Should trigger feedback on threshold
232        assert!(matches!(
233            d.record(std::slice::from_ref(&tc)),
234            LoopAction::InjectFeedback(_)
235        ));
236    }
237
238    #[test]
239    fn different_tool_resets_consecutive_count() {
240        let mut d = LoopDetector::new();
241        let tc = call("Edit", "{\"path\":\"src/main.rs\"}");
242        // Almost at threshold
243        for _ in 0..CONSECUTIVE_REPEAT_THRESHOLD - 2 {
244            assert_eq!(d.record(std::slice::from_ref(&tc)), LoopAction::Ok);
245        }
246        // Different tool resets the count
247        assert_eq!(
248            d.record(&[call("Bash", "{\"cmd\":\"test\"}")]),
249            LoopAction::Ok
250        );
251        // Back to same tool — starts from 1 again
252        for _ in 0..CONSECUTIVE_REPEAT_THRESHOLD - 1 {
253            assert_eq!(d.record(std::slice::from_ref(&tc)), LoopAction::Ok);
254        }
255        assert!(matches!(
256            d.record(std::slice::from_ref(&tc)),
257            LoopAction::InjectFeedback(_)
258        ));
259    }
260
261    #[test]
262    fn read_edit_test_cycle_never_triggers() {
263        // The most common coding workflow should NEVER trigger.
264        let mut d = LoopDetector::new();
265        let test_cmd = "{\"command\":\"cargo test\"}";
266        let read_args = "{\"path\":\"src/lib.rs\"}";
267
268        for cycle in 0..20 {
269            assert_eq!(
270                d.record(&[call("Read", read_args)]),
271                LoopAction::Ok,
272                "read should not trigger at cycle {cycle}"
273            );
274            let edit_args = format!("{{\"path\":\"src/lib.rs\",\"old\":\"v{cycle}\"}}");
275            assert_eq!(
276                d.record(&[call("Edit", &edit_args)]),
277                LoopAction::Ok,
278                "edit should not trigger at cycle {cycle}"
279            );
280            assert_eq!(
281                d.record(&[call("Bash", test_cmd)]),
282                LoopAction::Ok,
283                "test should not trigger at cycle {cycle}"
284            );
285        }
286    }
287
288    #[test]
289    fn feedback_then_hard_stop() {
290        let mut d = LoopDetector::new();
291        let tc = call("Read", "{\"path\":\"stuck.rs\"}");
292
293        // First detection → feedback
294        for _ in 0..CONSECUTIVE_REPEAT_THRESHOLD {
295            d.record(std::slice::from_ref(&tc));
296        }
297        // The last record returned InjectFeedback — now simulate the
298        // caller clearing state and the model looping again
299        d.detection_count = 1; // feedback was injected
300        d.clear_after_feedback();
301
302        // Second detection → hard stop
303        for _ in 0..CONSECUTIVE_REPEAT_THRESHOLD {
304            d.record(std::slice::from_ref(&tc));
305        }
306        assert!(matches!(d.check(), LoopAction::HardStop(_)));
307    }
308
309    #[test]
310    fn parallel_calls_same_tool_not_a_loop() {
311        // 10 parallel Read calls with DIFFERENT args in one batch — not a loop
312        let mut d = LoopDetector::new();
313        let batch: Vec<ToolCall> = (0..10)
314            .map(|i| call("Read", &format!("{{\"path\":\"file{i}.rs\"}}")))
315            .collect();
316        assert_eq!(d.record(&batch), LoopAction::Ok);
317    }
318
319    #[test]
320    fn same_tool_different_args_not_consecutive() {
321        // Same tool name but different args each time — not consecutive
322        let mut d = LoopDetector::new();
323        for i in 0..20 {
324            let args = format!("{{\"command\":\"ls -variant-{i}\"}}");
325            assert_eq!(
326                d.record(&[call("Bash", &args)]),
327                LoopAction::Ok,
328                "different args should not trigger at call {i}"
329            );
330        }
331    }
332
333    #[test]
334    fn recent_names_tracks_last_five() {
335        let mut d = LoopDetector::new();
336        for i in 0..8 {
337            let name = format!("Tool{i}");
338            d.record(&[call(&name, "{}")]);
339        }
340        let names = d.recent_names();
341        assert_eq!(names.len(), 5);
342        assert_eq!(names[0], "Tool3");
343        assert_eq!(names[4], "Tool7");
344    }
345}