phi_core/context/execution.rs
1use serde::{Deserialize, Serialize};
2
3// ---------------------------------------------------------------------------
4// Execution limits
5// ---------------------------------------------------------------------------
6
7/*
8ExecutionLimits — a safety net against runaway agent loops.
9
10Without limits, a poorly-designed tool or a confused LLM could loop forever,
11burning tokens and money. These three limits provide defense-in-depth:
12
13 max_turns — catches infinite tool-call loops
14 max_total_tokens — catches token budget overruns (cost control)
15 max_duration — catches wall-clock hangs (e.g., a bash tool that blocks)
16
17The agent loop checks these BEFORE each turn (in ExecutionTracker::check_limits).
18When a limit is hit, it injects a "[Agent stopped: ...]" user message into the
19conversation so the LLM (and user) can see what happened, then returns.
20
21RUST QUIRK: `std::time::Duration`
22
23Duration is Rust's type for a span of time (not a point in time — that's Instant/SystemTime).
24Constructors:
25 Duration::from_secs(600) → 10 minutes
26 Duration::from_millis(100) → 100ms
27 Duration::from_nanos(1) → 1 nanosecond
28
29Internally, Duration is stored as (seconds: u64, nanoseconds: u32) — no floating point,
30no overflow risk for reasonable values.
31
32The full path `std::time::Duration` is used instead of a `use` import because it appears
33only in this one struct — no need to pollute the module namespace.
34*/
35/// Execution limits for the agent loop — guards against infinite loops and budget overruns.
36#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct ExecutionLimits {
38 /// Maximum number of LLM turns (one turn = one LLM call + its tool results)
39 pub max_turns: usize,
40 /// Maximum total tokens consumed across all turns (input + output)
41 pub max_total_tokens: usize,
42 /// Maximum wall-clock duration. Uses std::time::Duration (not f64 seconds) for precision.
43 pub max_duration: std::time::Duration,
44 /// Maximum cumulative dollar cost for the run. `None` means no cost cap.
45 /// Requires `AgentLoopConfig.cost_config` to be set — without pricing rates the
46 /// accumulated cost is always 0.0 and this limit has no effect.
47 #[serde(default)]
48 pub max_cost: Option<f64>,
49}
50
51impl Default for ExecutionLimits {
52 fn default() -> Self {
53 Self {
54 max_turns: 50,
55 max_total_tokens: 1_000_000,
56 max_duration: std::time::Duration::from_secs(600),
57 max_cost: None,
58 }
59 }
60}
61
62/// Tracks execution state against limits
63pub struct ExecutionTracker {
64 pub limits: ExecutionLimits,
65 pub turns: usize,
66 pub tokens_used: usize,
67 /// Accumulated dollar cost across all turns. Updated via `record_cost()`.
68 /// Only non-zero when `AgentLoopConfig.cost_config` is set.
69 pub cost_accumulated: f64,
70 pub started_at: std::time::Instant,
71}
72
73// ---------------------------------------------------------------------------
74// Current tool execution state — 0.10.0
75// ---------------------------------------------------------------------------
76
77/// Snapshot of the tool currently executing inside the agent loop.
78///
79/// Populated by `execute_single_tool` immediately before
80/// `AgentTool::execute()` is invoked and cleared on return (success, error,
81/// or timeout). External consumers read it via the shared `Arc<Mutex<...>>`
82/// installed on [`AgentLoopConfig::current_tool`](crate::agent_loop::AgentLoopConfig::current_tool)
83/// — typically through the
84/// [`BasicAgent::current_tool_timeout`](crate::agents::BasicAgent::current_tool_timeout)
85/// delegate. Use case: emit a "pause-time estimate" upper bound when a host
86/// pauses an in-flight session.
87///
88/// **Single-tool model.** The slot records the most-recently-started tool.
89/// Under `ToolExecutionStrategy::Parallel` or `Batched`, concurrent tools
90/// race on this slot: the last writer wins and the cleared state may briefly
91/// reflect a sibling tool's completion. This is intentional for the v0
92/// pause-time-estimate use case (a single conservative upper bound suffices);
93/// callers needing per-call granularity should subscribe to
94/// `AgentEvent::ToolExecutionStart` / `ToolExecutionEnd` directly.
95#[derive(Debug, Clone)]
96pub struct CurrentToolExecution {
97 /// Tool name (the same string the LLM emitted as `Content::ToolCall.name`).
98 pub name: String,
99 /// Effective timeout for the in-flight call (per-tool override → config-level → None).
100 /// Mirrors the resolution order in `execute_single_tool`.
101 pub timeout: Option<std::time::Duration>,
102}
103
104impl ExecutionTracker {
105 pub fn new(limits: ExecutionLimits) -> Self {
106 Self {
107 limits,
108 turns: 0,
109 tokens_used: 0,
110 cost_accumulated: 0.0,
111 started_at: std::time::Instant::now(),
112 }
113 }
114
115 pub fn record_turn(&mut self, tokens: usize) {
116 self.turns += 1;
117 self.tokens_used += tokens;
118 }
119
120 /// Accumulate incremental cost for the current turn.
121 pub fn record_cost(&mut self, cost: f64) {
122 self.cost_accumulated += cost;
123 }
124
125 /// Check if any limit has been exceeded. Returns the reason if so.
126 /*
127 RUST QUIRK: `Option<String>` as "either an error reason, or nothing"
128
129 `check_limits()` returns:
130 Some("Max turns reached (50/50)") ← a limit was hit
131 None ← all limits OK
132
133 This is the Rust way to return "optional data" — no exceptions, no sentinel values (-1, ""),
134 no separate boolean + string pair. The caller pattern-matches to handle both cases.
135
136 RUST QUIRK: `Instant::elapsed()` for wall-clock timing
137
138 `std::time::Instant` records a moment in time (monotonic clock, not wall clock).
139 Monotonic means it never goes backwards — safe to use for durations.
140 `started_at.elapsed()` returns a `Duration` = current time - started_at.
141
142 The `>=` comparison between two Durations works because Duration implements PartialOrd.
143
144 RUST QUIRK: `{:.0}` format specifier — zero decimal places for f64
145
146 `format!("Max duration reached ({:.0}s/{:.0}s)", elapsed.as_secs_f64(), ...)`
147 `{:.0}` means "format as float with 0 decimal places" → "42" not "42.000000"
148 Other examples: {:.2} = 2 decimal places, {:>10.3} = right-aligned, 10 wide, 3 decimal places
149 */
150 pub fn check_limits(&self) -> Option<String> {
151 if self.turns >= self.limits.max_turns {
152 return Some(format!(
153 "Max turns reached ({}/{})",
154 self.turns, self.limits.max_turns
155 ));
156 }
157 if self.tokens_used >= self.limits.max_total_tokens {
158 return Some(format!(
159 "Max tokens reached ({}/{})",
160 self.tokens_used, self.limits.max_total_tokens
161 ));
162 }
163 let elapsed = self.started_at.elapsed(); // Duration since ExecutionTracker::new()
164 if elapsed >= self.limits.max_duration {
165 return Some(format!(
166 "Max duration reached ({:.0}s/{:.0}s)", // {:.0} = 0 decimal places
167 elapsed.as_secs_f64(),
168 self.limits.max_duration.as_secs_f64()
169 ));
170 }
171 if let Some(max) = self.limits.max_cost {
172 if self.cost_accumulated >= max {
173 return Some(format!(
174 "Max cost reached (${:.4}/${:.4})",
175 self.cost_accumulated, max
176 ));
177 }
178 }
179 None // All limits OK — return None (no reason to stop)
180 }
181}