Skip to main content

zagens_core/engine/
loop_guard.rs

1//! Pure-data guardrails for repeated tool-call loops (P2 PR4 → `zagens-core`).
2
3use std::collections::HashMap;
4use std::collections::hash_map::DefaultHasher;
5use std::fmt::Write as _;
6use std::hash::{Hash, Hasher};
7
8use serde_json::Value;
9
10const IDENTICAL_CALL_BLOCK_THRESHOLD: u32 = 3;
11const FAILURE_WARN_THRESHOLD: u32 = 3;
12const FAILURE_HALT_THRESHOLD: u32 = 8;
13
14#[derive(Debug, Clone, PartialEq, Eq)]
15pub enum AttemptDecision {
16    Proceed,
17    Block(String),
18}
19
20#[derive(Debug, Clone, PartialEq, Eq)]
21pub enum OutcomeDecision {
22    Continue,
23    Warn(String),
24    Halt(String),
25}
26
27#[derive(Debug, Default)]
28pub struct LoopGuard {
29    call_counts: HashMap<(String, u64), u32>,
30    failure_counts: HashMap<String, u32>,
31}
32
33impl LoopGuard {
34    pub fn record_attempt(&mut self, tool: &str, args: &Value) -> AttemptDecision {
35        let key = (tool.to_string(), hash_args(args));
36        let count = self.call_counts.entry(key).or_insert(0);
37        *count = count.saturating_add(1);
38        if *count >= IDENTICAL_CALL_BLOCK_THRESHOLD {
39            return AttemptDecision::Block(format!(
40                "Blocked: this exact call (`{tool}` with these arguments) has already run {count} times this turn. Stop retrying it unchanged. Either change the arguments or pick a different tool."
41            ));
42        }
43        AttemptDecision::Proceed
44    }
45
46    pub fn record_outcome(&mut self, tool: &str, ok: bool) -> OutcomeDecision {
47        let failures = self.failure_counts.entry(tool.to_string()).or_insert(0);
48        if ok {
49            *failures = 0;
50            return OutcomeDecision::Continue;
51        }
52
53        *failures = failures.saturating_add(1);
54        if *failures >= FAILURE_HALT_THRESHOLD {
55            return OutcomeDecision::Halt(format!(
56                "Stop retrying `{tool}` - it has failed {failures} consecutive times. Choose a different approach."
57            ));
58        }
59        if *failures == FAILURE_WARN_THRESHOLD {
60            return OutcomeDecision::Warn(format!(
61                "Tool `{tool}` has failed {failures} consecutive times this turn."
62            ));
63        }
64        OutcomeDecision::Continue
65    }
66
67    /// Clear consecutive-failure counters so a granted continuation (e.g. a
68    /// long-horizon "change approach" nudge issued after a [`OutcomeDecision::Halt`])
69    /// doesn't immediately re-halt on the same tool. Identical-call counts are
70    /// left intact, so blindly repeating the *exact* same call is still blocked.
71    pub fn reset_failures(&mut self) {
72        self.failure_counts.clear();
73    }
74
75    /// Clear identical-call counts after the workspace changed (a state-mutating
76    /// tool succeeded). Re-running the *exact same* verify/read call after an
77    /// intervening edit is legitimate work — not a redundant loop — so it must
78    /// not stay blocked. Without this, an iterative `edit → re-run same test`
79    /// loop trips the 3× block and the model is forced into meaningless
80    /// arg-reordering to dodge the guard (defeating its purpose). Hammering the
81    /// same call with **no** intervening change still blocks, because nothing
82    /// calls this between those identical attempts.
83    pub fn note_state_changed(&mut self) {
84        self.call_counts.clear();
85    }
86
87    /// Whether a tool's success means the workspace materially changed, so the
88    /// identical-call counter should be cleared (see [`Self::note_state_changed`]).
89    #[must_use]
90    pub fn is_state_mutating_tool(tool: &str) -> bool {
91        crate::engine::tool_effects::tool_writes_state(tool)
92    }
93}
94
95fn hash_args(args: &Value) -> u64 {
96    let mut canonical = String::new();
97    write_canonical_json(args, &mut canonical);
98    let mut hasher = DefaultHasher::new();
99    canonical.hash(&mut hasher);
100    hasher.finish()
101}
102
103fn write_canonical_json(value: &Value, out: &mut String) {
104    match value {
105        Value::Null => out.push_str("null"),
106        Value::Bool(value) => out.push_str(if *value { "true" } else { "false" }),
107        Value::Number(value) => {
108            let _ = write!(out, "{value}");
109        }
110        Value::String(value) => {
111            out.push_str(&serde_json::to_string(value).expect("serializing string cannot fail"));
112        }
113        Value::Array(values) => {
114            out.push('[');
115            for (idx, item) in values.iter().enumerate() {
116                if idx > 0 {
117                    out.push(',');
118                }
119                write_canonical_json(item, out);
120            }
121            out.push(']');
122        }
123        Value::Object(values) => {
124            out.push('{');
125            let mut entries = values.iter().collect::<Vec<_>>();
126            entries.sort_by(|a, b| a.0.cmp(b.0));
127            for (idx, (key, item)) in entries.into_iter().enumerate() {
128                if idx > 0 {
129                    out.push(',');
130                }
131                out.push_str(&serde_json::to_string(key).expect("serializing key cannot fail"));
132                out.push(':');
133                write_canonical_json(item, out);
134            }
135            out.push('}');
136        }
137    }
138}
139
140#[cfg(test)]
141mod tests {
142    use super::*;
143    use serde_json::json;
144
145    #[test]
146    fn third_identical_tool_call_is_blocked() {
147        let mut guard = LoopGuard::default();
148        let args = json!({"path": "src/main.rs"});
149
150        assert_eq!(
151            guard.record_attempt("read_file", &args),
152            AttemptDecision::Proceed
153        );
154        assert_eq!(
155            guard.record_attempt("read_file", &args),
156            AttemptDecision::Proceed
157        );
158
159        let AttemptDecision::Block(message) = guard.record_attempt("read_file", &args) else {
160            panic!("third identical call should be blocked");
161        };
162        assert!(message.contains("read_file"));
163        assert!(message.contains("already run 3 times"));
164    }
165
166    #[test]
167    fn paginated_reads_are_not_false_positives() {
168        let mut guard = LoopGuard::default();
169
170        for offset in [0, 100, 200] {
171            assert_eq!(
172                guard.record_attempt(
173                    "read_file",
174                    &json!({"path": "src/main.rs", "offset": offset})
175                ),
176                AttemptDecision::Proceed
177            );
178        }
179    }
180
181    #[test]
182    fn tool_failure_counter_warns_at_three_and_halts_at_eight() {
183        let mut guard = LoopGuard::default();
184
185        assert_eq!(
186            guard.record_outcome("grep_files", false),
187            OutcomeDecision::Continue
188        );
189        assert_eq!(
190            guard.record_outcome("grep_files", false),
191            OutcomeDecision::Continue
192        );
193        assert!(matches!(
194            guard.record_outcome("grep_files", false),
195            OutcomeDecision::Warn(message) if message.contains("failed 3 consecutive times")
196        ));
197
198        for _ in 4..8 {
199            assert_eq!(
200                guard.record_outcome("grep_files", false),
201                OutcomeDecision::Continue
202            );
203        }
204        assert!(matches!(
205            guard.record_outcome("grep_files", false),
206            OutcomeDecision::Halt(message) if message.contains("failed 8 consecutive times")
207        ));
208    }
209
210    #[test]
211    fn successful_tool_call_resets_failure_counter() {
212        let mut guard = LoopGuard::default();
213
214        assert_eq!(
215            guard.record_outcome("grep_files", false),
216            OutcomeDecision::Continue
217        );
218        assert_eq!(
219            guard.record_outcome("grep_files", false),
220            OutcomeDecision::Continue
221        );
222        assert_eq!(
223            guard.record_outcome("grep_files", true),
224            OutcomeDecision::Continue
225        );
226        assert_eq!(
227            guard.record_outcome("grep_files", false),
228            OutcomeDecision::Continue
229        );
230    }
231
232    #[test]
233    fn reset_failures_clears_halt_so_a_continuation_does_not_immediately_rehalt() {
234        let mut guard = LoopGuard::default();
235        // Drive to the halt threshold (8 consecutive failures); intermediate
236        // decisions include a Warn at 3, which we don't assert here.
237        for _ in 0..7 {
238            let _ = guard.record_outcome("apply_patch", false);
239        }
240        // Eighth consecutive failure halts.
241        assert!(matches!(
242            guard.record_outcome("apply_patch", false),
243            OutcomeDecision::Halt(_)
244        ));
245        // A granted "change approach" continuation resets the counters …
246        guard.reset_failures();
247        // … so the next failure starts the count over instead of re-halting.
248        assert_eq!(
249            guard.record_outcome("apply_patch", false),
250            OutcomeDecision::Continue
251        );
252    }
253
254    #[test]
255    fn reset_failures_leaves_identical_call_blocking_intact() {
256        let mut guard = LoopGuard::default();
257        let args = json!({"path": "src/main.rs"});
258        assert_eq!(
259            guard.record_attempt("read_file", &args),
260            AttemptDecision::Proceed
261        );
262        assert_eq!(
263            guard.record_attempt("read_file", &args),
264            AttemptDecision::Proceed
265        );
266        guard.reset_failures();
267        // Identical-call counter is independent of the failure counter, so the
268        // third unchanged call is still blocked after a failure reset.
269        assert!(matches!(
270            guard.record_attempt("read_file", &args),
271            AttemptDecision::Block(_)
272        ));
273    }
274
275    #[test]
276    fn note_state_changed_unblocks_identical_call_after_an_edit() {
277        let mut guard = LoopGuard::default();
278        let cmd = json!({"command": "go test ./config/..."});
279        assert_eq!(
280            guard.record_attempt("exec_shell", &cmd),
281            AttemptDecision::Proceed
282        );
283        assert_eq!(
284            guard.record_attempt("exec_shell", &cmd),
285            AttemptDecision::Proceed
286        );
287        // An intervening successful edit changed the workspace → prior identical
288        // verify calls are no longer redundant, so re-running is allowed again.
289        guard.note_state_changed();
290        assert_eq!(
291            guard.record_attempt("exec_shell", &cmd),
292            AttemptDecision::Proceed
293        );
294        assert_eq!(
295            guard.record_attempt("exec_shell", &cmd),
296            AttemptDecision::Proceed
297        );
298        // …but without any further change, hammering it still trips the block.
299        assert!(matches!(
300            guard.record_attempt("exec_shell", &cmd),
301            AttemptDecision::Block(_)
302        ));
303    }
304
305    #[test]
306    fn unified_writes_state_predicate_covers_file_and_shell_mutators() {
307        assert!(LoopGuard::is_state_mutating_tool("write_file"));
308        assert!(LoopGuard::is_state_mutating_tool("edit_file"));
309        assert!(LoopGuard::is_state_mutating_tool("apply_patch"));
310        assert!(LoopGuard::is_state_mutating_tool("create_dirs"));
311        // M1 union: shell tools that may mutate workspace reset identical-call counts.
312        assert!(LoopGuard::is_state_mutating_tool("exec_shell"));
313        assert!(LoopGuard::is_state_mutating_tool("exec_shell_wait"));
314        assert!(!LoopGuard::is_state_mutating_tool("read_file"));
315        assert!(!LoopGuard::is_state_mutating_tool("grep_files"));
316        assert!(!LoopGuard::is_state_mutating_tool("exec_shell_cancel"));
317    }
318
319    #[test]
320    fn argument_hash_is_independent_of_object_key_order() {
321        let mut guard = LoopGuard::default();
322
323        assert_eq!(
324            guard.record_attempt("read_file", &json!({"path": "a", "offset": 0})),
325            AttemptDecision::Proceed
326        );
327        assert_eq!(
328            guard.record_attempt("read_file", &json!({"offset": 0, "path": "a"})),
329            AttemptDecision::Proceed
330        );
331        assert!(matches!(
332            guard.record_attempt("read_file", &json!({"path": "a", "offset": 0})),
333            AttemptDecision::Block(_)
334        ));
335    }
336}