koda_core/
loop_guard.rs

1//! Loop detection for the inference loop.
2//!
3//! Modeled after Gemini CLI's approach: simple consecutive-identical-call
4//! detection + feedback injection instead of hard stops. No windowed
5//! fingerprinting, no name saturation heuristics, no tool-only suppression.
6//!
7//! ## Design philosophy
8//!
9//! Claude Code and Codex have **zero** loop detection — they trust the model.
10//! Gemini CLI has the only thoughtful approach: detect consecutive identical
11//! tool calls (same name + args), then inject a "take a step back" feedback
12//! message to nudge the model out of the loop. Hard-stop only on the 2nd
13//! detection (model ignored the feedback).
14//!
15//! ## What we DON'T do (and why)
16//!
17//! - **No windowed fingerprint tracking** — nobody else does this.
18//! - **No tool-name saturation** — editing 12 files in a refactoring is normal.
19//! - **No tool-only response suppression** — efficient models work silently.
20//! - **No per-turn tool call cap** — frontier models emit 30+ parallel calls.
21//! - **No deduplication** — if a model emits 66 identical calls, the user
22//!   should see that and switch models, not have us silently paper over it.
23//!
24//! ## What we DO
25//!
26//! 1. **Consecutive identical calls** — same `(tool, args)` called
27//!    `CONSECUTIVE_REPEAT_THRESHOLD` times in a row → inject feedback.
28//! 2. **Hard iteration cap (top-level only)** — absolute ceiling on
29//!    the main inference loop. User can extend interactively.
30//!    Sub-agent loops are **uncapped** as of #1110; they trust the
31//!    model and rely on consecutive-identical detection, provider
32//!    stop reasons, cancellation, and context bounds (P3 in DESIGN.md).
33
34use crate::providers::ToolCall;
35use std::collections::VecDeque;
36
37/// Default hard cap for the main inference loop.
38pub const MAX_ITERATIONS_DEFAULT: u32 = 200;
39
40// `MAX_SUB_AGENT_ITERATIONS` deleted in #1110: per `DESIGN.md` P3 ("Build for
41// the world six months from now"), sub-agents trust the model and rely on
42// `LoopDetector`, provider stop reasons, cancellation, and context bounds
43// instead of a hardcoded iteration count. Codex and Zed both ship without
44// any per-sub-agent iteration cap.
45
46/// How many **consecutive** identical tool calls (same name + args) trigger
47/// loop detection. "Consecutive" means the same fingerprint appears this
48/// many times with no other tool call in between.
49///
50/// Set to 5 to match Gemini CLI's `TOOL_CALL_LOOP_THRESHOLD`.
51/// A normal "read → edit → test" cycle never triggers this because each
52/// step is a different tool call.
53const CONSECUTIVE_REPEAT_THRESHOLD: usize = 5;
54
55/// How many recent tool names to show in the hard-cap prompt.
56const DISPLAY_RECENT: usize = 5;
57
58// ── Loop detection ────────────────────────────────────────────────
59
60/// What to do when a loop is detected.
61#[derive(Debug, Clone, PartialEq, Eq)]
62pub enum LoopAction {
63    /// No loop detected — continue normally.
64    Ok,
65    /// First detection — inject feedback message to nudge the model.
66    /// Contains a descriptive message for the feedback injection.
67    InjectFeedback(String),
68    /// Second detection — model ignored feedback, hard stop.
69    HardStop(String),
70}
71
72/// Tracks consecutive identical tool calls.
73///
74/// Detection is simple: if the last N tool calls all have the same
75/// fingerprint (tool name + args), that's a loop. On first detection,
76/// the caller injects a feedback message. On second detection (model
77/// ignored the feedback), the caller hard-stops.
78pub struct LoopDetector {
79    /// The fingerprint of the last tool call.
80    last_fingerprint: Option<String>,
81    /// How many consecutive times we've seen `last_fingerprint`.
82    consecutive_count: usize,
83    /// How many times we've detected a loop in this session.
84    detection_count: u32,
85    /// Ring buffer of recent tool names (for display in hard-cap prompt).
86    recent: VecDeque<String>,
87}
88
89impl Default for LoopDetector {
90    fn default() -> Self {
91        Self::new()
92    }
93}
94
95impl LoopDetector {
96    /// Create a new loop detector with empty history.
97    pub fn new() -> Self {
98        Self {
99            last_fingerprint: None,
100            consecutive_count: 0,
101            detection_count: 0,
102            recent: VecDeque::new(),
103        }
104    }
105
106    /// Record a batch of tool calls and check for loops.
107    ///
108    /// Returns a [`LoopAction`] indicating what the caller should do.
109    pub fn record(&mut self, tool_calls: &[ToolCall]) -> LoopAction {
110        for tc in tool_calls {
111            let fp = fingerprint(&tc.function_name, &tc.arguments);
112
113            // Update consecutive counter
114            if self.last_fingerprint.as_ref() == Some(&fp) {
115                self.consecutive_count += 1;
116            } else {
117                self.last_fingerprint = Some(fp);
118                self.consecutive_count = 1;
119            }
120
121            // Update display ring buffer
122            self.recent.push_back(tc.function_name.clone());
123            if self.recent.len() > DISPLAY_RECENT {
124                self.recent.pop_front();
125            }
126        }
127
128        self.check()
129    }
130
131    /// Clear the detection state after feedback injection so the model
132    /// gets a fresh chance. Increments `detection_count` so the next
133    /// trigger will be a hard stop.
134    pub fn clear_after_feedback(&mut self) {
135        self.detection_count += 1;
136        self.last_fingerprint = None;
137        self.consecutive_count = 0;
138    }
139
140    /// Recent tool names (most recent last), for display in the hard-cap prompt.
141    pub fn recent_names(&self) -> Vec<String> {
142        self.recent.iter().cloned().collect()
143    }
144
145    fn check(&self) -> LoopAction {
146        if self.consecutive_count < CONSECUTIVE_REPEAT_THRESHOLD {
147            return LoopAction::Ok;
148        }
149
150        let fp = self.last_fingerprint.as_deref().unwrap_or("unknown");
151        let tool_name = fp.split(':').next().unwrap_or(fp);
152        let detail = format!(
153            "'{tool_name}' called {n} times consecutively with identical arguments",
154            n = self.consecutive_count,
155        );
156
157        if self.detection_count == 0 {
158            // First detection — inject feedback
159            LoopAction::InjectFeedback(detail)
160        } else {
161            // Already injected feedback before — hard stop
162            LoopAction::HardStop(detail)
163        }
164    }
165}
166
167/// Stable fingerprint: tool name + first 200 chars of args.
168fn fingerprint(name: &str, args: &str) -> String {
169    let prefix = &args[..args.len().min(200)];
170    format!("{name}:{prefix}")
171}
172
173// ── Hard-cap prompt ───────────────────────────────────────────────
174
175/// Options for continuing after hitting the hard cap.
176#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
177#[serde(rename_all = "snake_case")]
178pub enum LoopContinuation {
179    /// Stop the inference loop.
180    Stop,
181    /// Continue for 50 more iterations.
182    Continue50,
183    /// Continue for 200 more iterations.
184    Continue200,
185}
186
187impl LoopContinuation {
188    /// Number of additional iterations granted.
189    pub fn extra_iterations(self) -> u32 {
190        match self {
191            Self::Stop => 0,
192            Self::Continue50 => 50,
193            Self::Continue200 => 200,
194        }
195    }
196}
197
198// ── Tests ─────────────────────────────────────────────────────────
199
200#[cfg(test)]
201mod tests {
202    use super::*;
203
204    fn call(name: &str, args: &str) -> ToolCall {
205        ToolCall {
206            id: "x".into(),
207            function_name: name.into(),
208            arguments: args.into(),
209            thought_signature: None,
210        }
211    }
212
213    #[test]
214    fn no_loop_on_unique_calls() {
215        let mut d = LoopDetector::new();
216        assert_eq!(
217            d.record(&[call("Edit", "{\"path\":\"a.rs\"}")]),
218            LoopAction::Ok
219        );
220        assert_eq!(
221            d.record(&[call("Edit", "{\"path\":\"b.rs\"}")]),
222            LoopAction::Ok
223        );
224        assert_eq!(
225            d.record(&[call("Bash", "{\"cmd\":\"ls\"}")]),
226            LoopAction::Ok
227        );
228    }
229
230    #[test]
231    fn detects_consecutive_identical_calls() {
232        let mut d = LoopDetector::new();
233        let tc = call("Edit", "{\"path\":\"src/main.rs\"}");
234        for _ in 0..CONSECUTIVE_REPEAT_THRESHOLD - 1 {
235            assert_eq!(d.record(std::slice::from_ref(&tc)), LoopAction::Ok);
236        }
237        // Should trigger feedback on threshold
238        assert!(matches!(
239            d.record(std::slice::from_ref(&tc)),
240            LoopAction::InjectFeedback(_)
241        ));
242    }
243
244    #[test]
245    fn different_tool_resets_consecutive_count() {
246        let mut d = LoopDetector::new();
247        let tc = call("Edit", "{\"path\":\"src/main.rs\"}");
248        // Almost at threshold
249        for _ in 0..CONSECUTIVE_REPEAT_THRESHOLD - 2 {
250            assert_eq!(d.record(std::slice::from_ref(&tc)), LoopAction::Ok);
251        }
252        // Different tool resets the count
253        assert_eq!(
254            d.record(&[call("Bash", "{\"cmd\":\"test\"}")]),
255            LoopAction::Ok
256        );
257        // Back to same tool — starts from 1 again
258        for _ in 0..CONSECUTIVE_REPEAT_THRESHOLD - 1 {
259            assert_eq!(d.record(std::slice::from_ref(&tc)), LoopAction::Ok);
260        }
261        assert!(matches!(
262            d.record(std::slice::from_ref(&tc)),
263            LoopAction::InjectFeedback(_)
264        ));
265    }
266
267    #[test]
268    fn read_edit_test_cycle_never_triggers() {
269        // The most common coding workflow should NEVER trigger.
270        let mut d = LoopDetector::new();
271        let test_cmd = "{\"command\":\"cargo test\"}";
272        let read_args = "{\"path\":\"src/lib.rs\"}";
273
274        for cycle in 0..20 {
275            assert_eq!(
276                d.record(&[call("Read", read_args)]),
277                LoopAction::Ok,
278                "read should not trigger at cycle {cycle}"
279            );
280            let edit_args = format!("{{\"path\":\"src/lib.rs\",\"old\":\"v{cycle}\"}}");
281            assert_eq!(
282                d.record(&[call("Edit", &edit_args)]),
283                LoopAction::Ok,
284                "edit should not trigger at cycle {cycle}"
285            );
286            assert_eq!(
287                d.record(&[call("Bash", test_cmd)]),
288                LoopAction::Ok,
289                "test should not trigger at cycle {cycle}"
290            );
291        }
292    }
293
294    #[test]
295    fn feedback_then_hard_stop() {
296        let mut d = LoopDetector::new();
297        let tc = call("Read", "{\"path\":\"stuck.rs\"}");
298
299        // First detection → feedback
300        for _ in 0..CONSECUTIVE_REPEAT_THRESHOLD {
301            d.record(std::slice::from_ref(&tc));
302        }
303        // The last record returned InjectFeedback — now simulate the
304        // caller clearing state and the model looping again
305        d.detection_count = 1; // feedback was injected
306        d.clear_after_feedback();
307
308        // Second detection → hard stop
309        for _ in 0..CONSECUTIVE_REPEAT_THRESHOLD {
310            d.record(std::slice::from_ref(&tc));
311        }
312        assert!(matches!(d.check(), LoopAction::HardStop(_)));
313    }
314
315    #[test]
316    fn parallel_calls_same_tool_not_a_loop() {
317        // 10 parallel Read calls with DIFFERENT args in one batch — not a loop
318        let mut d = LoopDetector::new();
319        let batch: Vec<ToolCall> = (0..10)
320            .map(|i| call("Read", &format!("{{\"path\":\"file{i}.rs\"}}")))
321            .collect();
322        assert_eq!(d.record(&batch), LoopAction::Ok);
323    }
324
325    #[test]
326    fn same_tool_different_args_not_consecutive() {
327        // Same tool name but different args each time — not consecutive
328        let mut d = LoopDetector::new();
329        for i in 0..20 {
330            let args = format!("{{\"command\":\"ls -variant-{i}\"}}");
331            assert_eq!(
332                d.record(&[call("Bash", &args)]),
333                LoopAction::Ok,
334                "different args should not trigger at call {i}"
335            );
336        }
337    }
338
339    #[test]
340    fn recent_names_tracks_last_five() {
341        let mut d = LoopDetector::new();
342        for i in 0..8 {
343            let name = format!("Tool{i}");
344            d.record(&[call(&name, "{}")]);
345        }
346        let names = d.recent_names();
347        assert_eq!(names.len(), 5);
348        assert_eq!(names[0], "Tool3");
349        assert_eq!(names[4], "Tool7");
350    }
351}
koda_core/loop_guard.rs

koda_core/
loop_guard.rs