collet 0.1.1 - Docs.rs

use std::sync::Arc;
use std::sync::atomic::{AtomicU32, Ordering};
use std::time::{Duration, Instant};

/// Shared handle for adjusting the iteration limit from outside the agent loop.
///
/// The coordinator holds a clone; the guard checks the atomic value on each tick.
/// This allows web UI users to "extend" a running agent without stopping it.
#[derive(Clone, Debug)]
pub struct IterationBudget(Arc<AtomicU32>);

impl IterationBudget {
    pub fn new(max: u32) -> Self {
        Self(Arc::new(AtomicU32::new(max)))
    }

    /// Read the current ceiling.
    pub fn get(&self) -> u32 {
        self.0.load(Ordering::Relaxed)
    }

    /// Increase the ceiling by `extra` iterations.
    pub fn extend(&self, extra: u32) {
        self.0.fetch_add(extra, Ordering::Relaxed);
    }
}

/// Stuck-prevention guard: timeouts, iteration limits, circuit breakers.
pub struct AgentGuard {
    /// Maximum iterations per user message (dynamically adjustable).
    max_iterations: u32,
    /// Shared budget handle — when present, `max_iterations` is refreshed from this
    /// atomic before each check. External callers can bump the limit at any time.
    budget: Option<IterationBudget>,
    /// Current iteration count.
    iteration: u32,
    /// Consecutive tool failures.
    consecutive_failures: u32,
    /// Threshold to trip the circuit breaker.
    failure_threshold: u32,
    /// Per-tool timeout.
    tool_timeout: Duration,
    /// When the current task started.
    task_start: Instant,
    /// Hard timeout for entire task.
    task_timeout: Duration,
    /// Whether the "approaching timeout" warning has fired.
    approaching_notified: bool,
    /// Whether the "approaching iteration limit" warning has fired.
    approaching_iteration_notified: bool,
    /// How many times circuit breaker was paused (recovery injected) instead of hard-stopped.
    circuit_breaker_recoveries: u32,
}

/// Reason an agent loop was stopped (attached to `AgentEvent::Done`).
#[derive(Debug, Clone)]
pub enum StopReason {
    /// Task exceeded configured time limit.
    TaskTimeout { elapsed_secs: u64 },
    /// Reached maximum iteration count.
    IterationLimit { count: u32 },
    /// Too many consecutive tool failures.
    CircuitBreaker { failures: u32 },
    /// User cancelled.
    Cancelled,
}

impl std::fmt::Display for StopReason {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::TaskTimeout { elapsed_secs } => write!(f, "Task timeout after {elapsed_secs}s"),
            Self::IterationLimit { count } => write!(f, "Iteration limit reached ({count})"),
            Self::CircuitBreaker { failures } => {
                write!(f, "Circuit breaker: {failures} consecutive tool failures")
            }
            Self::Cancelled => write!(f, "Cancelled by user"),
        }
    }
}

impl StopReason {
    /// Whether this stop reason is eligible for auto-continuation.
    pub fn is_continuable(&self) -> bool {
        matches!(self, Self::TaskTimeout { .. } | Self::IterationLimit { .. })
    }
}

#[derive(Debug, Clone)]
pub enum GuardVerdict {
    /// Proceed with the next iteration.
    Proceed,
    /// Warning: task timeout approaching — inject wrap-up prompt.
    ApproachingTimeout { remaining: Duration },
    /// Warning: approaching iteration limit — inject wrap-up prompt.
    ApproachingIterationLimit { remaining: u32 },
    /// Circuit breaker first trip — inject diagnostic/recovery prompt instead of hard stop.
    CircuitBreakerRecovery { failures: u32 },
    /// Stop: iteration limit reached.
    IterationLimit { count: u32 },
    /// Stop: circuit breaker tripped (after recovery attempts exhausted).
    CircuitBreaker { failures: u32 },
    /// Stop: task timed out.
    TaskTimeout { elapsed: Duration },
}

/// Grace period before task timeout where a wrap-up prompt is injected.
const APPROACHING_TIMEOUT_SECS: u64 = 60;
/// Fraction of max_iterations remaining that triggers the approaching-limit warning.
const APPROACHING_ITERATION_RATIO: f32 = 0.2;
/// Number of circuit breaker recovery pauses before hard stop.
const MAX_CIRCUIT_BREAKER_RECOVERIES: u32 = 1;

impl AgentGuard {
    pub fn new(
        max_iterations: u32,
        failure_threshold: u32,
        tool_timeout_secs: u64,
        task_timeout_secs: u64,
    ) -> Self {
        Self {
            max_iterations,
            budget: None,
            iteration: 0,
            consecutive_failures: 0,
            failure_threshold,
            tool_timeout: Duration::from_secs(tool_timeout_secs),
            task_start: Instant::now(),
            task_timeout: Duration::from_secs(task_timeout_secs),
            approaching_notified: false,
            approaching_iteration_notified: false,
            circuit_breaker_recoveries: 0,
        }
    }

    /// Attach a shared budget handle for external iteration-limit adjustment.
    pub fn with_budget(mut self, budget: IterationBudget) -> Self {
        self.budget = Some(budget);
        self
    }

    /// Get the budget handle (if attached) for sharing with external controllers.
    pub fn budget(&self) -> Option<&IterationBudget> {
        self.budget.as_ref()
    }

    /// Check if the agent should continue.
    pub fn check(&mut self) -> GuardVerdict {
        // Refresh from shared budget if present (external extend support).
        if let Some(ref budget) = self.budget {
            self.max_iterations = budget.get();
        }

        if self.iteration >= self.max_iterations {
            return GuardVerdict::IterationLimit {
                count: self.iteration,
            };
        }

        if self.consecutive_failures >= self.failure_threshold {
            if self.circuit_breaker_recoveries < MAX_CIRCUIT_BREAKER_RECOVERIES {
                // First trip: inject recovery prompt, reset failures, allow continuation.
                self.circuit_breaker_recoveries += 1;
                self.consecutive_failures = 0;
                return GuardVerdict::CircuitBreakerRecovery {
                    failures: self.failure_threshold,
                };
            }
            return GuardVerdict::CircuitBreaker {
                failures: self.consecutive_failures,
            };
        }

        let elapsed = self.task_start.elapsed();
        if elapsed > self.task_timeout {
            return GuardVerdict::TaskTimeout { elapsed };
        }

        // Warn once when approaching timeout (remaining ≤ 60s).
        let remaining_time = self.task_timeout.saturating_sub(elapsed);
        if remaining_time <= Duration::from_secs(APPROACHING_TIMEOUT_SECS)
            && !self.approaching_notified
        {
            self.approaching_notified = true;
            return GuardVerdict::ApproachingTimeout {
                remaining: remaining_time,
            };
        }

        // Warn once when approaching iteration limit (last 20% of iterations).
        let approaching_threshold =
            ((self.max_iterations as f32) * APPROACHING_ITERATION_RATIO) as u32;
        let remaining_iters = self.max_iterations.saturating_sub(self.iteration);
        if remaining_iters > 0
            && remaining_iters <= approaching_threshold
            && !self.approaching_iteration_notified
        {
            self.approaching_iteration_notified = true;
            return GuardVerdict::ApproachingIterationLimit {
                remaining: remaining_iters,
            };
        }

        GuardVerdict::Proceed
    }

    /// Record a new iteration.
    pub fn tick(&mut self) {
        self.iteration += 1;
    }

    /// Record a tool success.
    pub fn record_success(&mut self) {
        self.consecutive_failures = 0;
    }

    /// Record a tool failure.
    pub fn record_failure(&mut self) {
        self.consecutive_failures += 1;
    }

    /// Reset for a new user message (or continuation round).
    pub fn reset(&mut self) {
        self.iteration = 0;
        self.consecutive_failures = 0;
        self.task_start = Instant::now();
        self.approaching_notified = false;
        self.approaching_iteration_notified = false;
        self.circuit_breaker_recoveries = 0;
    }

    /// Get the tool timeout duration.
    pub fn tool_timeout(&self) -> Duration {
        self.tool_timeout
    }

    /// Current iteration.
    pub fn iteration(&self) -> u32 {
        self.iteration
    }

    /// Configured iteration ceiling.
    pub fn max_iterations(&self) -> u32 {
        self.max_iterations
    }

    /// Time elapsed since task start.
    pub fn elapsed(&self) -> Duration {
        self.task_start.elapsed()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_guard_proceed() {
        let mut guard = AgentGuard::new(25, 3, 120, 600);
        assert!(matches!(guard.check(), GuardVerdict::Proceed));
    }

    #[test]
    fn test_guard_iteration_limit() {
        let mut guard = AgentGuard::new(3, 3, 120, 600);
        guard.tick();
        guard.tick();
        guard.tick();
        assert!(matches!(
            guard.check(),
            GuardVerdict::IterationLimit { count: 3 }
        ));
    }

    #[test]
    fn test_guard_circuit_breaker() {
        let mut guard = AgentGuard::new(25, 3, 120, 600);
        guard.record_failure();
        guard.record_failure();
        guard.record_failure();
        // First trip → recovery (not hard stop)
        assert!(matches!(
            guard.check(),
            GuardVerdict::CircuitBreakerRecovery { .. }
        ));
        // failures were reset by recovery; add them again
        guard.record_failure();
        guard.record_failure();
        guard.record_failure();
        // Second trip → hard stop (recovery exhausted)
        assert!(matches!(guard.check(), GuardVerdict::CircuitBreaker { .. }));
    }

    #[test]
    fn test_guard_success_resets_failures() {
        let mut guard = AgentGuard::new(25, 3, 120, 600);
        guard.record_failure();
        guard.record_failure();
        guard.record_success();
        assert!(matches!(guard.check(), GuardVerdict::Proceed));
    }

    #[test]
    fn test_guard_reset() {
        let mut guard = AgentGuard::new(3, 3, 120, 600);
        guard.tick();
        guard.tick();
        guard.tick();
        assert!(matches!(guard.check(), GuardVerdict::IterationLimit { .. }));
        guard.reset();
        assert!(matches!(guard.check(), GuardVerdict::Proceed));
        assert_eq!(guard.iteration(), 0);
    }

    #[test]
    fn test_guard_tool_timeout() {
        let guard = AgentGuard::new(25, 3, 60, 600);
        assert_eq!(guard.tool_timeout(), Duration::from_secs(60));
    }

    #[test]
    fn test_guard_approaching_timeout() {
        // Use a very short task timeout to trigger approaching warning
        let mut guard = AgentGuard::new(25, 3, 120, 2);
        // Initially should proceed (task_timeout=2s, approaching=60s, but 60>2 so it fires immediately)
        // With 2s timeout: remaining starts at ~2s which is < 60s, so first check triggers approaching
        let verdict = guard.check();
        assert!(
            matches!(verdict, GuardVerdict::ApproachingTimeout { .. }),
            "Expected ApproachingTimeout, got {:?}",
            verdict
        );
        // Second check should proceed (already notified) until actual timeout
        let verdict2 = guard.check();
        assert!(
            matches!(verdict2, GuardVerdict::Proceed),
            "Expected Proceed after notification, got {:?}",
            verdict2
        );
    }

    #[test]
    fn test_guard_configurable_task_timeout() {
        let guard = AgentGuard::new(25, 3, 120, 1200);
        assert_eq!(guard.task_timeout, Duration::from_secs(1200));
    }

    #[test]
    fn test_stop_reason_continuable() {
        assert!(StopReason::TaskTimeout { elapsed_secs: 600 }.is_continuable());
        assert!(StopReason::IterationLimit { count: 50 }.is_continuable());
        assert!(!StopReason::CircuitBreaker { failures: 3 }.is_continuable());
        assert!(!StopReason::Cancelled.is_continuable());
    }
}