rsclaw 2026.4.20

AI Agent Engine Compatible with OpenClaw
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
//! Tool call loop detection (AGENTS.md §20).
//!
//! Uses a sliding window over recent tool calls.
//! Per-tool thresholds allow different limits for different tools.
//!
//! Distinguishes between WARNING (model notified, execution continues)
//! and CRITICAL (execution blocked), matching OpenClaw behavior.
//!
//! Hashes tool name + params (like OpenClaw's hashToolCall) so
//! different arguments are treated as different calls.

use std::collections::{HashMap, VecDeque};

/// Default sliding-window size.
const DEFAULT_WINDOW: usize = 25;
/// Default warning threshold — generic loops trigger warning at this count.
const DEFAULT_WARNING_THRESHOLD: usize = 5;
/// Default critical threshold — loops at this count are blocked.
const DEFAULT_CRITICAL_THRESHOLD: usize = 10;

/// Built-in per-tool threshold overrides.
fn builtin_overrides() -> HashMap<String, (usize, usize)> {
    HashMap::new()
}

/// Hash tool name + params for loop detection (matches OpenClaw's hashToolCall).
pub fn hash_tool_call(tool_name: &str, params: &serde_json::Value) -> String {
    let stable = stable_stringify(params);
    // Use a simple hash (not SHA256) for speed - we only need uniqueness within a session
    let hash = simple_hash(&stable);
    format!("{tool_name}:{hash}")
}

/// Stable JSON stringify with sorted keys (matches OpenClaw's stableStringify).
fn stable_stringify(value: &serde_json::Value) -> String {
    match value {
        serde_json::Value::Null => "null".to_string(),
        serde_json::Value::Bool(b) => b.to_string(),
        serde_json::Value::Number(n) => n.to_string(),
        serde_json::Value::String(s) => format!("\"{}\"", escape_json_string(s)),
        serde_json::Value::Array(arr) => {
            format!("[{}]", arr.iter().map(stable_stringify).collect::<Vec<_>>().join(","))
        }
        serde_json::Value::Object(obj) => {
            let keys: Vec<_> = obj.keys().collect();
            let sorted_keys = sort_keys(&keys);
            let entries: Vec<String> = sorted_keys
                .iter()
                .map(|k| {
                    let v = obj.get(*k).unwrap_or(&serde_json::Value::Null);
                    format!("\"{}\":{}", escape_json_string(k), stable_stringify(v))
                })
                .collect();
            format!("{{{}}}", entries.join(","))
        }
    }
}

fn escape_json_string(s: &str) -> String {
    s.replace('\\', "\\\\")
        .replace('"', "\\\"")
        .replace('\n', "\\n")
        .replace('\r', "\\r")
        .replace('\t', "\\t")
}

fn sort_keys<'a>(keys: &[&'a String]) -> Vec<&'a String> {
    let mut sorted = keys.to_vec();
    sorted.sort();
    sorted
}

/// Simple hash function for loop detection (fast, in-memory).
fn simple_hash(s: &str) -> u64 {
    // FNV-1a hash with wrapping multiplication to avoid overflow
    let mut hash: u64 = 0xcbf29ce484222325;
    for byte in s.bytes() {
        hash ^= byte as u64;
        hash = hash.wrapping_mul(0x100000001b3);
    }
    hash
}

/// Result of a loop detection check.
#[derive(Debug, Clone)]
pub enum LoopCheckResult {
    /// No loop detected — proceed normally.
    Ok,
    /// Generic repeat loop at warning level — model is notified, execution
    /// continues.
    Warning {
        tool_name: String,
        count: usize,
        message: String,
    },
    /// Critical loop detected — execution is blocked.
    Critical {
        tool_name: String,
        count: usize,
        message: String,
    },
}

impl LoopCheckResult {
    /// Returns true if this result blocks execution.
    pub fn is_critical(&self) -> bool {
        matches!(self, LoopCheckResult::Critical { .. })
    }

    /// Returns the warning message if this is a warning, None otherwise.
    pub fn warning_message(&self) -> Option<String> {
        match self {
            LoopCheckResult::Warning { message, .. } => Some(message.clone()),
            _ => None,
        }
    }

    /// Convert to a `Result<Option<String>>` for use with the `?` operator.
    /// - `Ok(None)` → no loop detected, proceed
    /// - `Ok(Some(msg))` → warning, proceed with warning logged
    /// - `Err(...)` → critical loop, block
    pub fn to_result(&self) -> anyhow::Result<Option<String>> {
        match self {
            LoopCheckResult::Ok => Ok(None),
            LoopCheckResult::Warning { message, .. } => Ok(Some(message.clone())),
            LoopCheckResult::Critical { message, .. } => Err(anyhow::anyhow!("{}", message)),
        }
    }
}

/// A record of a tool call in history.
#[derive(Debug, Clone)]
pub struct ToolCallRecord {
    pub tool_name: String,
    pub args_hash: String,
    /// Hash of the result (for no-progress detection).
    pub result_hash: Option<String>,
}

/// Max consecutive failures with IDENTICAL error for the same tool before blocking.
/// Gives LLM enough room for iterative debugging (5 "fix and retry" cycles) while
/// still catching genuine dead-ends.
const MAX_SAME_ERROR_STREAK: usize = 5;

/// Fallback: max consecutive failures of ANY kind for the same tool before blocking.
/// Covers superficially-varying errors that still mean "stuck". A bit more lenient
/// than same-error since errors do genuinely differ during normal debugging.
const MAX_ANY_FAILURE_STREAK: usize = 8;

/// Normalize an error message so superficial differences (line:col, timestamps,
/// line numbers) don't produce different hashes, WITHOUT collapsing short
/// meaningful numbers like exit codes ("exit 1" vs "exit 127" must stay distinct).
///
/// Rule: only digit-runs of length ≥ 3 are replaced with "N". Line/column
/// numbers almost always hit that threshold once messages include ~3 digits
/// somewhere; single- and two-digit numbers (exit codes, version majors) are
/// preserved.
fn normalize_error(s: &str) -> String {
    let mut out = String::with_capacity(s.len());
    let mut run = String::new();
    let flush = |run: &mut String, out: &mut String| {
        if run.len() >= 3 {
            out.push('N');
        } else {
            out.push_str(run);
        }
        run.clear();
    };
    for c in s.chars() {
        if c.is_ascii_digit() {
            run.push(c);
        } else {
            flush(&mut run, &mut out);
            out.push(c);
        }
    }
    flush(&mut run, &mut out);
    // Collapse whitespace.
    out.split_whitespace().collect::<Vec<_>>().join(" ")
}

#[derive(Debug, Clone)]
pub struct LoopDetector {
    window: usize,
    warning_threshold: usize,
    critical_threshold: usize,
    overrides: HashMap<String, (usize, usize)>, // (warning, critical) per-tool
    /// History of tool call records with args_hash and result_hash.
    history: VecDeque<ToolCallRecord>,
    /// Per-tool streak of identical errors. Keyed by tool_name.
    /// Value: (error_hash, count). Reset when a different error OR success appears.
    error_streak: HashMap<String, (String, usize)>,
    /// Per-tool streak of ANY failures (regardless of error). Catches superficially
    /// varying errors that still mean the same thing. Reset on success.
    any_failure_streak: HashMap<String, usize>,
}

/// Inspect a tool result value and decide if it represents a failure.
fn is_result_failure(result: &serde_json::Value) -> bool {
    // exec-style: exit_code != 0
    if let Some(code) = result.get("exit_code").and_then(|v| v.as_i64()) {
        if code != 0 {
            return true;
        }
    }
    // Error field with non-empty string
    if let Some(err) = result.get("error").and_then(|v| v.as_str()) {
        if !err.is_empty() {
            return true;
        }
    }
    // Explicit success=false / ok=false
    if result.get("success").and_then(|v| v.as_bool()) == Some(false) {
        return true;
    }
    if result.get("ok").and_then(|v| v.as_bool()) == Some(false) {
        return true;
    }
    false
}

impl LoopDetector {
    pub fn new(window: usize, default_threshold: usize) -> Self {
        // When constructed with a single threshold (backwards compat), use it as the
        // warning threshold; critical is set one step above.
        Self::with_dual_thresholds(
            window,
            default_threshold,
            default_threshold.saturating_add(1),
        )
    }

    /// Create with explicit dual thresholds (warning + critical).
    pub fn with_dual_thresholds(
        window: usize,
        warning_threshold: usize,
        critical_threshold: usize,
    ) -> Self {
        Self {
            window,
            warning_threshold,
            critical_threshold,
            overrides: builtin_overrides(),
            history: VecDeque::new(),
            error_streak: HashMap::new(),
            any_failure_streak: HashMap::new(),
        }
    }

    pub fn with_overrides(
        window: usize,
        warning_threshold: usize,
        critical_threshold: usize,
        extra_overrides: HashMap<String, (usize, usize)>,
    ) -> Self {
        let mut overrides = builtin_overrides();
        overrides.extend(extra_overrides);
        Self {
            window,
            warning_threshold,
            critical_threshold,
            overrides,
            history: VecDeque::new(),
            error_streak: HashMap::new(),
            any_failure_streak: HashMap::new(),
        }
    }

    /// Create a LoopDetector compatible with runtime.rs caller that passes
    /// a single threshold value. We treat that value as the WARNING threshold
    /// and set critical = warning + 10 (matching OpenClaw's
    /// DEFAULT_CRITICAL_THRESHOLD = WARNING_THRESHOLD + 10 pattern).
    pub fn from_single_threshold(window: usize, threshold: usize) -> Self {
        let critical = threshold.saturating_add(10).max(threshold + 1);
        Self::with_dual_thresholds(window, threshold, critical)
    }

    fn thresholds_for(&self, tool_name: &str) -> (usize, usize) {
        self.overrides
            .get(tool_name)
            .copied()
            .unwrap_or((self.warning_threshold, self.critical_threshold))
    }

    /// Record a tool call with full params hash (OpenClaw-compatible).
    ///
    /// Returns `LoopCheckResult`:
    /// - `Ok` → proceed normally
    /// - `Warning` → model is notified, execution continues (generic repeat)
    /// - `Critical` → execution blocked (excessive repeats or circuit breaker)
    ///
    /// Progress detection: same args + different results = making progress.
    /// Only count as "loop" when same args AND same results (no progress).
    pub fn check_with_params(&mut self, tool_name: &str, params: &serde_json::Value) -> LoopCheckResult {
        let args_hash = hash_tool_call(tool_name, params);

        // Add to history (result_hash will be set later via record_result)
        self.history.push_back(ToolCallRecord {
            tool_name: tool_name.to_owned(),
            args_hash: args_hash.clone(),
            result_hash: None,
        });
        if self.history.len() > self.window {
            self.history.pop_front();
        }

        // Progress-aware loop detection:
        // Count only calls where same args AND same result (no progress).
        // Different results = making progress, don't count as loop.
        let same_args_records: Vec<_> = self
            .history
            .iter()
            .filter(|r| r.args_hash == args_hash)
            .collect();

        // Check if there's progress: different result_hash values among same args calls.
        let result_hashes: Vec<_> = same_args_records
            .iter()
            .filter_map(|r| r.result_hash.as_ref())
            .collect();

        let has_progress = result_hashes.len() >= 2 && {
            // If we have at least 2 different result_hash values, there's progress.
            let first = result_hashes.first();
            result_hashes.iter().any(|h| h != first.unwrap())
        };

        // Count for loop detection:
        // - If progress detected (different results), only count calls with no result_hash yet
        //   (these are pending calls that haven't finished, might be making progress)
        // - If no progress detected (same results or all pending), count all same args calls
        let count = if has_progress {
            // Making progress: only count pending calls (result_hash = None)
            same_args_records
                .iter()
                .filter(|r| r.result_hash.is_none())
                .count()
        } else {
            // No progress detected: count all same args calls
            same_args_records.len()
        };

        // Second axis: same tool repeatedly producing the same (normalized) error.
        // Catches "LLM retrying syntactically-different-but-equally-broken variants".
        if let Some((err_hash, streak)) = self.error_streak.get(tool_name) {
            if *streak >= MAX_SAME_ERROR_STREAK {
                return LoopCheckResult::Critical {
                    tool_name: tool_name.to_owned(),
                    count: *streak,
                    message: format!(
                        "CRITICAL: tool `{tool_name}` returned the same (normalized) error {streak} times in a row \
                         (error hash {err_hash}). Different arguments, same failure — the approach \
                         is wrong. Stop and report the problem to the user.",
                    ),
                };
            }
        }
        // Third axis: any-failure streak fallback — catches errors that differ
        // in surface form but are still repeated failures on the same tool.
        if let Some(streak) = self.any_failure_streak.get(tool_name) {
            if *streak >= MAX_ANY_FAILURE_STREAK {
                return LoopCheckResult::Critical {
                    tool_name: tool_name.to_owned(),
                    count: *streak,
                    message: format!(
                        "CRITICAL: tool `{tool_name}` failed {streak} times consecutively with no success. \
                         The approach is stuck. Stop and report the problem to the user.",
                    ),
                };
            }
        }

        let (warning_threshold, critical_threshold) = self.thresholds_for(tool_name);

        // Critical threshold — blocks execution
        if count >= critical_threshold {
            return LoopCheckResult::Critical {
                tool_name: tool_name.to_owned(),
                count,
                message: format!(
                    "CRITICAL: tool `{tool_name}` called {count} times in the last {} calls with identical arguments and results. \
                     No progress detected. Session execution blocked to prevent runaway loops.",
                    self.history.len(),
                ),
            };
        }

        // Warning threshold — model is notified but execution continues
        if count >= warning_threshold {
            return LoopCheckResult::Warning {
                tool_name: tool_name.to_owned(),
                count,
                message: format!(
                    "WARNING: You have called `{tool_name}` {count} times in the last {} \
                     calls with identical arguments and results. If this is not making progress, \
                     stop retrying and report the task as failed.",
                    self.history.len(),
                ),
            };
        }

        LoopCheckResult::Ok
    }

    /// Record a tool call and check for loops (legacy API - only uses tool_name).
    ///
    /// This is a backwards-compat wrapper that constructs an empty params value.
    /// Prefer `check_with_params` for proper argument hashing.
    pub fn check(&mut self, tool_name: &str) -> LoopCheckResult {
        self.check_with_params(tool_name, &serde_json::Value::Object(serde_json::Map::new()))
    }

    /// Record the result hash for the most recent tool call.
    /// Used for no-progress detection (same call, same result = stuck).
    /// Also maintains the per-tool error_streak for the second-axis loop check.
    pub fn record_result(&mut self, result: &serde_json::Value) {
        // Capture the tool name before the mutable borrow below.
        let tool_name = self.history.back().map(|r| r.tool_name.clone());

        if let Some(last) = self.history.back_mut() {
            let result_str = stable_stringify(result);
            last.result_hash = Some(format!("{}", simple_hash(&result_str)));
        }

        let Some(name) = tool_name else { return };

        let failure = is_result_failure(result);
        if failure {
            // Normalize the error signature — strip line:col, numeric suffixes.
            let raw_sig = result
                .get("error")
                .and_then(|v| v.as_str())
                .map(String::from)
                .or_else(|| result.get("stderr").and_then(|v| v.as_str()).map(String::from))
                .unwrap_or_else(|| stable_stringify(result));
            let err_sig = normalize_error(&raw_sig);
            let err_hash = format!("{}", simple_hash(&err_sig));

            self.error_streak
                .entry(name.clone())
                .and_modify(|(h, c)| {
                    if *h == err_hash {
                        *c += 1;
                    } else {
                        *h = err_hash.clone();
                        *c = 1;
                    }
                })
                .or_insert((err_hash, 1));
            // Increment any-failure streak too.
            *self.any_failure_streak.entry(name.clone()).or_insert(0) += 1;

        } else {
            // Success clears both streaks for this tool.
            self.error_streak.remove(&name);
            self.any_failure_streak.remove(&name);
        }
    }

    /// Reset the history (e.g. after a tool successfully produces new output).
    pub fn reset(&mut self) {
        self.history.clear();
        self.error_streak.clear();
        self.any_failure_streak.clear();
    }
}

impl Default for LoopDetector {
    fn default() -> Self {
        Self::with_dual_thresholds(
            DEFAULT_WINDOW,
            DEFAULT_WARNING_THRESHOLD,
            DEFAULT_CRITICAL_THRESHOLD,
        )
    }
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;

    fn is_ok(r: &LoopCheckResult) -> bool {
        matches!(r, LoopCheckResult::Ok)
    }
    fn is_warning(r: &LoopCheckResult) -> bool {
        matches!(r, LoopCheckResult::Warning { .. })
    }
    fn is_critical(r: &LoopCheckResult) -> bool {
        matches!(r, LoopCheckResult::Critical { .. })
    }

    #[test]
    fn no_loop_for_varied_tools() {
        let mut d = LoopDetector::default();
        assert!(is_ok(&d.check("read")));
        assert!(is_ok(&d.check("write")));
        assert!(is_ok(&d.check("exec")));
        assert!(is_ok(&d.check("read")));
    }

    #[test]
    fn detects_warning_before_critical() {
        // With dual thresholds: warn=3, crit=5
        // count >= warning_threshold triggers Warning, count >= critical triggers Critical
        let mut d = LoopDetector::with_dual_thresholds(10, 3, 5);
        assert!(is_ok(&d.check("read")));     // count=1
        assert!(is_ok(&d.check("read")));     // count=2
        assert!(is_warning(&d.check("read"))); // count=3 >= warn(3)
        assert!(is_warning(&d.check("read"))); // count=4
        assert!(is_critical(&d.check("read"))); // count=5 >= crit(5)
    }

    #[test]
    fn single_threshold_constructor_sets_critical_above() {
        // LoopDetector::new with threshold=3 sets warning=3, critical=4
        let mut d = LoopDetector::new(10, 3);
        assert!(is_ok(&d.check("exec")));      // count=1
        assert!(is_ok(&d.check("exec")));      // count=2
        assert!(is_warning(&d.check("exec"))); // count=3 >= warn(3)
        assert!(is_critical(&d.check("exec"))); // count=4 >= crit(4)
    }

    #[test]
    fn default_has_warning_at_5_critical_at_10() {
        let mut d = LoopDetector::default();
        // 5th call hits warning threshold
        for i in 0..4 {
            assert!(is_ok(&d.check("exec")), "call {} should be ok", i + 1);
        }
        assert!(is_warning(&d.check("exec")), "5th call should be warning");
        // 10th call hits critical threshold
        for i in 5..9 {
            assert!(
                is_warning(&d.check("exec")),
                "call {} should be warning",
                i + 1
            );
        }
        assert!(
            is_critical(&d.check("exec")),
            "10th call should be critical"
        );
    }

    #[test]
    fn custom_override_takes_priority() {
        let mut overrides = HashMap::new();
        overrides.insert("my_tool".into(), (2, 3)); // warn=2, crit=3
        let mut d = LoopDetector::with_overrides(10, 10, 20, overrides);
        assert!(is_ok(&d.check("my_tool")));
        assert!(is_warning(&d.check("my_tool"))); // 2nd = warn
        assert!(is_critical(&d.check("my_tool"))); // 3rd = crit
    }

    #[test]
    fn window_slides_correctly() {
        let mut d = LoopDetector::with_dual_thresholds(4, 3, 5);
        assert!(is_ok(&d.check("a")));
        assert!(is_ok(&d.check("b")));
        assert!(is_ok(&d.check("a")));
        assert!(is_ok(&d.check("b")));
        // window=[a,b,a,b]. "a" appears 2 times, not 3.
        assert!(is_ok(&d.check("a")));
        // window=[a,b,a,a] -> a appears 3 times -> warning.
        assert!(is_warning(&d.check("a")));
    }

    #[test]
    fn reset_clears_loop_state() {
        let mut d = LoopDetector::with_dual_thresholds(10, 3, 5);
        assert!(is_ok(&d.check("read")));
        assert!(is_ok(&d.check("read")));
        assert!(is_warning(&d.check("read")));
        d.reset();
        assert!(is_ok(&d.check("read")), "after reset, should be ok");
    }

    #[test]
    fn warning_message_contains_info() {
        let mut d = LoopDetector::with_dual_thresholds(10, 3, 5);
        for _ in 0..3 {
            d.check("exec");
        }
        let result = d.check("exec");
        if let LoopCheckResult::Warning {
            tool_name,
            count,
            message,
        } = result
        {
            assert_eq!(tool_name, "exec");
            assert_eq!(count, 4);
            assert!(message.contains("exec"));
        } else {
            panic!("expected Warning, got {:?}", result);
        }
    }

    #[test]
    fn different_params_count_as_different_calls() {
        let mut d = LoopDetector::with_dual_thresholds(10, 3, 5);
        let params_a = serde_json::json!({"command": "ls"});
        let params_b = serde_json::json!({"command": "pwd"});

        // These should be counted separately since params differ
        assert!(is_ok(&d.check_with_params("exec", &params_a)));
        assert!(is_ok(&d.check_with_params("exec", &params_b)));
        assert!(is_ok(&d.check_with_params("exec", &params_a)));
        // params_a appears 2 times, params_b appears 1 time - no warning
        assert!(is_ok(&d.check_with_params("exec", &params_b)));
    }

    #[test]
    fn same_params_trigger_warning() {
        let mut d = LoopDetector::with_dual_thresholds(10, 3, 5);
        let params = serde_json::json!({"command": "ls -la"});

        assert!(is_ok(&d.check_with_params("exec", &params)));       // count=1
        assert!(is_ok(&d.check_with_params("exec", &params)));       // count=2
        assert!(is_warning(&d.check_with_params("exec", &params)));  // count=3 >= warn(3)
        assert!(is_warning(&d.check_with_params("exec", &params)));  // count=4
        assert!(is_critical(&d.check_with_params("exec", &params))); // count=5 >= crit(5)
    }

    #[test]
    fn hash_tool_call_includes_params() {
        let params_a = serde_json::json!({"command": "ls"});
        let params_b = serde_json::json!({"command": "pwd"});
        let hash_a = hash_tool_call("exec", &params_a);
        let hash_b = hash_tool_call("exec", &params_b);
        // Different params should produce different hashes
        assert_ne!(hash_a, hash_b);
        // Same params should produce same hash
        let hash_a2 = hash_tool_call("exec", &params_a);
        assert_eq!(hash_a, hash_a2);
    }

    #[test]
    fn stable_stringify_sorts_keys() {
        let obj1 = serde_json::json!({"b": 2, "a": 1});
        let obj2 = serde_json::json!({"a": 1, "b": 2});
        // Different key order should produce same hash
        let hash1 = simple_hash(&stable_stringify(&obj1));
        let hash2 = simple_hash(&stable_stringify(&obj2));
        assert_eq!(hash1, hash2);
    }

    // ---------------------------------------------------------------------------
    // Progress detection tests
    // ---------------------------------------------------------------------------

    #[test]
    fn different_results_means_progress() {
        // Same params but different results = making progress, should NOT trigger loop.
        let mut d = LoopDetector::with_dual_thresholds(10, 3, 5);
        let params = serde_json::json!({"command": "ls"});

        // Call 1: check, then record result
        assert!(is_ok(&d.check_with_params("exec", &params)));
        d.record_result(&serde_json::json!({"stdout": "file1.txt"}));

        // Call 2: same params, different result = progress
        assert!(is_ok(&d.check_with_params("exec", &params)));
        d.record_result(&serde_json::json!({"stdout": "file1.txt file2.txt"}));

        // Call 3: still progressing
        assert!(is_ok(&d.check_with_params("exec", &params)));
        d.record_result(&serde_json::json!({"stdout": "file1.txt file2.txt file3.txt"}));

        // Call 4: even after many calls with same params, different results mean progress
        assert!(is_ok(&d.check_with_params("exec", &params)));
        d.record_result(&serde_json::json!({"stdout": "file1.txt file2.txt file3.txt file4.txt"}));

        // Should still be OK - no loop detected because results are changing
        assert!(is_ok(&d.check_with_params("exec", &params)));
    }

    #[test]
    fn same_results_means_no_progress() {
        // Same params AND same results = no progress, should trigger loop.
        let mut d = LoopDetector::with_dual_thresholds(10, 3, 5);
        let params = serde_json::json!({"command": "ls"});

        // Call 1-2: same params, same result = stuck
        assert!(is_ok(&d.check_with_params("exec", &params)));       // count=1
        d.record_result(&serde_json::json!({"stdout": "same_output"}));

        assert!(is_ok(&d.check_with_params("exec", &params)));       // count=2
        d.record_result(&serde_json::json!({"stdout": "same_output"}));

        // Call 3: count=3 >= warn(3) = warning
        assert!(is_warning(&d.check_with_params("exec", &params)));
        d.record_result(&serde_json::json!({"stdout": "same_output"}));

        // Call 4: still warning
        assert!(is_warning(&d.check_with_params("exec", &params)));
        d.record_result(&serde_json::json!({"stdout": "same_output"}));

        // Call 5: count=5 >= crit(5) = critical
        assert!(is_critical(&d.check_with_params("exec", &params)));
    }

    #[test]
    fn mixed_results_progres_detection() {
        // Some same results, some different = still considered progress.
        // Use warn=4, crit=6 to allow enough calls before progress kicks in.
        let mut d = LoopDetector::with_dual_thresholds(10, 4, 6);
        let params = serde_json::json!({"command": "ls"});

        // Call 1: initial
        assert!(is_ok(&d.check_with_params("exec", &params)));
        d.record_result(&serde_json::json!({"stdout": "a"}));

        // Call 2: same result as call 1
        assert!(is_ok(&d.check_with_params("exec", &params)));
        d.record_result(&serde_json::json!({"stdout": "a"}));

        // Call 3: different result = progress detected
        assert!(is_ok(&d.check_with_params("exec", &params)));
        d.record_result(&serde_json::json!({"stdout": "b"}));

        // Call 4: after progress detected, should not count as loop
        // (only pending calls with result_hash=None are counted)
        assert!(is_ok(&d.check_with_params("exec", &params)));
        d.record_result(&serde_json::json!({"stdout": "c"}));

        // Many more calls with different results - no loop
        for i in 0..20 {
            assert!(is_ok(&d.check_with_params("exec", &params)));
            d.record_result(&serde_json::json!({"stdout": format!("result_{}", i)}));
        }
    }

    #[test]
    fn no_result_hash_yet_counts_as_potential_loop() {
        // When result_hash is None (call hasn't finished), count it as potential loop.
        let mut d = LoopDetector::with_dual_thresholds(10, 3, 5);
        let params = serde_json::json!({"command": "ls"});

        // Call without recording result
        assert!(is_ok(&d.check_with_params("exec", &params)));
        // Don't call record_result

        // Another call (previous still has result_hash=None)
        assert!(is_ok(&d.check_with_params("exec", &params)));       // count=2
        assert!(is_warning(&d.check_with_params("exec", &params)));  // count=3 >= warn(3)
        assert!(is_warning(&d.check_with_params("exec", &params)));  // count=4
        assert!(is_critical(&d.check_with_params("exec", &params))); // count=5 >= crit(5)
    }
}