Skip to main content

phi_core/context/
execution.rs

1use serde::{Deserialize, Serialize};
2
3// ---------------------------------------------------------------------------
4// Execution limits
5// ---------------------------------------------------------------------------
6
7/*
8ExecutionLimits — a safety net against runaway agent loops.
9
10Without limits, a poorly-designed tool or a confused LLM could loop forever,
11burning tokens and money. These three limits provide defense-in-depth:
12
13  max_turns    — catches infinite tool-call loops
14  max_total_tokens — catches token budget overruns (cost control)
15  max_duration — catches wall-clock hangs (e.g., a bash tool that blocks)
16
17The agent loop checks these BEFORE each turn (in ExecutionTracker::check_limits).
18When a limit is hit, it injects a "[Agent stopped: ...]" user message into the
19conversation so the LLM (and user) can see what happened, then returns.
20
21RUST QUIRK: `std::time::Duration`
22
23Duration is Rust's type for a span of time (not a point in time — that's Instant/SystemTime).
24Constructors:
25  Duration::from_secs(600)   → 10 minutes
26  Duration::from_millis(100) → 100ms
27  Duration::from_nanos(1)    → 1 nanosecond
28
29Internally, Duration is stored as (seconds: u64, nanoseconds: u32) — no floating point,
30no overflow risk for reasonable values.
31
32The full path `std::time::Duration` is used instead of a `use` import because it appears
33only in this one struct — no need to pollute the module namespace.
34*/
35/// Execution limits for the agent loop — guards against infinite loops and budget overruns.
36#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct ExecutionLimits {
38    /// Maximum number of LLM turns (one turn = one LLM call + its tool results)
39    pub max_turns: usize,
40    /// Maximum total tokens consumed across all turns (input + output)
41    pub max_total_tokens: usize,
42    /// Maximum wall-clock duration. Uses std::time::Duration (not f64 seconds) for precision.
43    pub max_duration: std::time::Duration,
44    /// Maximum cumulative dollar cost for the run. `None` means no cost cap.
45    /// Requires `AgentLoopConfig.cost_config` to be set — without pricing rates the
46    /// accumulated cost is always 0.0 and this limit has no effect.
47    #[serde(default)]
48    pub max_cost: Option<f64>,
49}
50
51impl Default for ExecutionLimits {
52    fn default() -> Self {
53        Self {
54            max_turns: 50,
55            max_total_tokens: 1_000_000,
56            max_duration: std::time::Duration::from_secs(600),
57            max_cost: None,
58        }
59    }
60}
61
62/// Tracks execution state against limits
63pub struct ExecutionTracker {
64    pub limits: ExecutionLimits,
65    pub turns: usize,
66    pub tokens_used: usize,
67    /// Accumulated dollar cost across all turns. Updated via `record_cost()`.
68    /// Only non-zero when `AgentLoopConfig.cost_config` is set.
69    pub cost_accumulated: f64,
70    pub started_at: std::time::Instant,
71}
72
73// ---------------------------------------------------------------------------
74// Current tool execution state — 0.10.0
75// ---------------------------------------------------------------------------
76
77/// Snapshot of the tool currently executing inside the agent loop.
78///
79/// Populated by `execute_single_tool` immediately before
80/// `AgentTool::execute()` is invoked and cleared on return (success, error,
81/// or timeout). External consumers read it via the shared `Arc<Mutex<...>>`
82/// installed on [`AgentLoopConfig::current_tool`](crate::agent_loop::AgentLoopConfig::current_tool)
83/// — typically through the
84/// [`BasicAgent::current_tool_timeout`](crate::agents::BasicAgent::current_tool_timeout)
85/// delegate. Use case: emit a "pause-time estimate" upper bound when a host
86/// pauses an in-flight session.
87///
88/// **Single-tool model.** The slot records the most-recently-started tool.
89/// Under `ToolExecutionStrategy::Parallel` or `Batched`, concurrent tools
90/// race on this slot: the last writer wins and the cleared state may briefly
91/// reflect a sibling tool's completion. This is intentional for the v0
92/// pause-time-estimate use case (a single conservative upper bound suffices);
93/// callers needing per-call granularity should subscribe to
94/// `AgentEvent::ToolExecutionStart` / `ToolExecutionEnd` directly.
95#[derive(Debug, Clone)]
96pub struct CurrentToolExecution {
97    /// Tool name (the same string the LLM emitted as `Content::ToolCall.name`).
98    pub name: String,
99    /// Effective timeout for the in-flight call (per-tool override → config-level → None).
100    /// Mirrors the resolution order in `execute_single_tool`.
101    pub timeout: Option<std::time::Duration>,
102}
103
104impl ExecutionTracker {
105    pub fn new(limits: ExecutionLimits) -> Self {
106        Self {
107            limits,
108            turns: 0,
109            tokens_used: 0,
110            cost_accumulated: 0.0,
111            started_at: std::time::Instant::now(),
112        }
113    }
114
115    pub fn record_turn(&mut self, tokens: usize) {
116        self.turns += 1;
117        self.tokens_used += tokens;
118    }
119
120    /// Accumulate incremental cost for the current turn.
121    pub fn record_cost(&mut self, cost: f64) {
122        self.cost_accumulated += cost;
123    }
124
125    /// Check if any limit has been exceeded. Returns the reason if so.
126    /*
127    RUST QUIRK: `Option<String>` as "either an error reason, or nothing"
128
129    `check_limits()` returns:
130      Some("Max turns reached (50/50)")  ← a limit was hit
131      None                                ← all limits OK
132
133    This is the Rust way to return "optional data" — no exceptions, no sentinel values (-1, ""),
134    no separate boolean + string pair. The caller pattern-matches to handle both cases.
135
136    RUST QUIRK: `Instant::elapsed()` for wall-clock timing
137
138    `std::time::Instant` records a moment in time (monotonic clock, not wall clock).
139    Monotonic means it never goes backwards — safe to use for durations.
140    `started_at.elapsed()` returns a `Duration` = current time - started_at.
141
142    The `>=` comparison between two Durations works because Duration implements PartialOrd.
143
144    RUST QUIRK: `{:.0}` format specifier — zero decimal places for f64
145
146    `format!("Max duration reached ({:.0}s/{:.0}s)", elapsed.as_secs_f64(), ...)`
147    `{:.0}` means "format as float with 0 decimal places" → "42" not "42.000000"
148    Other examples: {:.2} = 2 decimal places, {:>10.3} = right-aligned, 10 wide, 3 decimal places
149    */
150    pub fn check_limits(&self) -> Option<String> {
151        if self.turns >= self.limits.max_turns {
152            return Some(format!(
153                "Max turns reached ({}/{})",
154                self.turns, self.limits.max_turns
155            ));
156        }
157        if self.tokens_used >= self.limits.max_total_tokens {
158            return Some(format!(
159                "Max tokens reached ({}/{})",
160                self.tokens_used, self.limits.max_total_tokens
161            ));
162        }
163        let elapsed = self.started_at.elapsed(); // Duration since ExecutionTracker::new()
164        if elapsed >= self.limits.max_duration {
165            return Some(format!(
166                "Max duration reached ({:.0}s/{:.0}s)", // {:.0} = 0 decimal places
167                elapsed.as_secs_f64(),
168                self.limits.max_duration.as_secs_f64()
169            ));
170        }
171        if let Some(max) = self.limits.max_cost {
172            if self.cost_accumulated >= max {
173                return Some(format!(
174                    "Max cost reached (${:.4}/${:.4})",
175                    self.cost_accumulated, max
176                ));
177            }
178        }
179        None // All limits OK — return None (no reason to stop)
180    }
181}