1use anyhow::{Context, Result, anyhow};
4use hashbrown::HashMap;
5use once_cell::sync::Lazy;
6use serde_json::{Value, json};
7use std::borrow::Cow;
8use std::cell::RefCell;
9use std::future::Future;
10use std::sync::Mutex;
11use std::time::{Duration, Instant, SystemTime};
12use tokio::task::Id as TokioTaskId;
13use tracing::{trace, warn};
14use vtcode_commons::ErrorCategory;
15
16use crate::config::constants::tools;
17use crate::core::agent::harness_kernel::PreparedToolCall;
18use crate::core::memory_pool::SizeRecommendation;
19use crate::mcp::McpToolExecutor;
20use crate::tool_policy::ToolExecutionDecision;
21use crate::tools::error_messages::agent_execution;
22use crate::tools::invocation::ToolInvocationId;
23use crate::tools::mcp::{legacy_mcp_tool_name, parse_canonical_mcp_tool_name};
24use crate::tools::safety_gateway::{
25 SafetyContext, SafetyDecision, SafetyError as GatewaySafetyError,
26};
27use crate::ui::search::fuzzy_match;
28
29use super::assembly::public_tool_name_candidates;
30use super::execution_kernel;
31use super::normalize_tool_output;
32use super::{
33 ExecSettlementMode, ExecutionPolicySnapshot, ToolErrorType, ToolExecutionError,
34 ToolExecutionOutcome, ToolExecutionRecord, ToolExecutionRequest, ToolHandler, ToolRegistry,
35};
36use vtcode_config::constants::execution::{LOOP_THROTTLE_MAX_MS, LOOP_THROTTLE_REGISTRY_BASE_MS};
37
38const REENTRANCY_STACK_DEPTH_LIMIT: usize = 64;
39const REENTRANCY_PER_TOOL_LIMIT: usize = 1;
43
44static TOOL_REENTRANCY_STACKS: Lazy<Mutex<HashMap<TokioTaskId, Vec<String>>>> =
45 Lazy::new(|| Mutex::new(HashMap::new()));
46thread_local! {
47 static THREAD_REENTRANCY_STACK: RefCell<Vec<String>> = const { RefCell::new(Vec::new()) };
48}
49
50fn lock_reentrancy_stacks() -> std::sync::MutexGuard<'static, HashMap<TokioTaskId, Vec<String>>> {
51 TOOL_REENTRANCY_STACKS
52 .lock()
53 .unwrap_or_else(std::sync::PoisonError::into_inner)
54}
55
56#[derive(Debug)]
57struct ReentrancyViolation {
58 stack_depth: usize,
59 tool_reentry_count: usize,
60 stack_trace: String,
61}
62
63enum ReentrancyContext {
64 Task(TokioTaskId),
65 Thread,
66}
67
68struct ToolReentrancyGuard {
69 context: Option<ReentrancyContext>,
70}
71
72impl ToolReentrancyGuard {
73 fn enter(tool_name: &str) -> std::result::Result<Self, ReentrancyViolation> {
74 if let Some(task_id) = tokio::task::try_id() {
75 let mut stacks = lock_reentrancy_stacks();
76 let stack = stacks.entry(task_id).or_default();
77 let stack_depth = stack.len();
78 let tool_reentry_count = stack
79 .iter()
80 .filter(|active_tool| active_tool.as_str() == tool_name)
81 .count();
82
83 if stack_depth >= REENTRANCY_STACK_DEPTH_LIMIT
84 || tool_reentry_count >= REENTRANCY_PER_TOOL_LIMIT
85 {
86 let stack_trace = if stack.is_empty() {
87 "<empty>".to_string()
88 } else {
89 stack.join(" -> ")
90 };
91 return Err(ReentrancyViolation {
92 stack_depth,
93 tool_reentry_count,
94 stack_trace,
95 });
96 }
97
98 stack.push(tool_name.to_string());
99 return Ok(Self {
100 context: Some(ReentrancyContext::Task(task_id)),
101 });
102 }
103
104 let violation = THREAD_REENTRANCY_STACK.with(|stack_cell| {
105 let mut stack = stack_cell.borrow_mut();
106 let stack_depth = stack.len();
107 let tool_reentry_count = stack
108 .iter()
109 .filter(|active_tool| active_tool.as_str() == tool_name)
110 .count();
111
112 if stack_depth >= REENTRANCY_STACK_DEPTH_LIMIT
113 || tool_reentry_count >= REENTRANCY_PER_TOOL_LIMIT
114 {
115 let stack_trace = if stack.is_empty() {
116 "<empty>".to_string()
117 } else {
118 stack.join(" -> ")
119 };
120 Some(ReentrancyViolation {
121 stack_depth,
122 tool_reentry_count,
123 stack_trace,
124 })
125 } else {
126 stack.push(tool_name.to_string());
127 None
128 }
129 });
130
131 if let Some(violation) = violation {
132 return Err(violation);
133 }
134
135 Ok(Self {
136 context: Some(ReentrancyContext::Thread),
137 })
138 }
139}
140
141impl Drop for ToolReentrancyGuard {
142 fn drop(&mut self) {
143 let Some(context) = self.context.take() else {
144 return;
145 };
146
147 match context {
148 ReentrancyContext::Task(task_id) => {
149 let mut stacks = lock_reentrancy_stacks();
150 let should_remove = if let Some(stack) = stacks.get_mut(&task_id) {
151 let _ = stack.pop();
152 stack.is_empty()
153 } else {
154 false
155 };
156 if should_remove {
157 stacks.remove(&task_id);
158 }
159 }
160 ReentrancyContext::Thread => {
161 THREAD_REENTRANCY_STACK.with(|stack_cell| {
162 let _ = stack_cell.borrow_mut().pop();
163 });
164 }
165 }
166 }
167}
168
169impl ToolRegistry {
170 fn annotate_timeout_error_payload(
171 payload: &mut Value,
172 timeout_category: &str,
173 timeout_ms: u64,
174 circuit_breaker: bool,
175 ) {
176 if let Some(obj) = payload
177 .get_mut("error")
178 .and_then(|value| value.as_object_mut())
179 {
180 obj.insert(
181 "timeout_category".into(),
182 Value::String(timeout_category.to_string()),
183 );
184 obj.insert("timeout_ms".into(), Value::from(timeout_ms));
185 obj.insert("circuit_breaker".into(), Value::Bool(circuit_breaker));
186 }
187 }
188
189 fn safety_denial_error(
190 &self,
191 tool_name: &str,
192 reason: &str,
193 violation: Option<GatewaySafetyError>,
194 retry_after: Option<Duration>,
195 ) -> ToolExecutionError {
196 let mut error = ToolExecutionError::policy_violation(
197 tool_name.to_string(),
198 format!("Safety gateway denied execution: {reason}"),
199 );
200
201 match violation {
202 Some(GatewaySafetyError::RateLimitExceeded { .. }) => {
203 error.error_type = ToolErrorType::NetworkError;
204 error.category = ErrorCategory::RateLimit;
205 error.retryable = true;
206 error.is_recoverable = true;
207 }
208 Some(GatewaySafetyError::TurnLimitReached { .. })
209 | Some(GatewaySafetyError::SessionLimitReached { .. }) => {
210 error.error_type = ToolErrorType::ExecutionError;
211 error.category = ErrorCategory::ResourceExhausted;
212 error.retryable = false;
213 error.is_recoverable = false;
214 }
215 Some(GatewaySafetyError::PlanModeViolation(_)) => {
216 error.error_type = ToolErrorType::PolicyViolation;
217 error.category = ErrorCategory::PlanModeViolation;
218 error.retryable = false;
219 error.is_recoverable = true;
220 }
221 Some(GatewaySafetyError::CommandPolicyDenied(_))
222 | Some(GatewaySafetyError::DotfileProtectionViolation(_))
223 | None => {}
224 }
225
226 if let Some(delay) = retry_after {
227 error.retry_after_ms = Some(delay.as_millis() as u64);
228 }
229 error.circuit_breaker_impact = error.category.should_trip_circuit_breaker();
230 error.recovery_suggestions = error.category.recovery_suggestions();
231 error
232 }
233
234 pub fn safety_gateway(&self) -> std::sync::Arc<crate::tools::safety_gateway::SafetyGateway> {
235 std::sync::Arc::clone(&self.safety_gateway)
236 }
237
238 async fn check_safety_for_request(
239 &self,
240 tool_name: &str,
241 args: &Value,
242 invocation_id: Option<String>,
243 ) -> Option<ToolExecutionError> {
244 let context = SafetyContext::new(self.harness_context_snapshot().session_id);
245 let invocation_id = invocation_id
246 .and_then(|id| ToolInvocationId::parse(&id).ok())
247 .unwrap_or_default();
248 let safety_result = self
249 .safety_gateway
250 .check_and_record_with_id(&context, tool_name, args, Some(invocation_id))
251 .await;
252
253 match safety_result.decision {
254 SafetyDecision::Allow | SafetyDecision::NeedsApproval(_) => None,
255 SafetyDecision::Deny(reason) => Some(
256 self.safety_denial_error(
257 tool_name,
258 &reason,
259 safety_result.violation,
260 safety_result.retry_after,
261 )
262 .with_surface("tool_registry"),
263 ),
264 }
265 }
266
267 pub fn execute_public_tool_request(
270 &self,
271 request: ToolExecutionRequest,
272 ) -> impl Future<Output = ToolExecutionOutcome> + '_ {
273 self.execute_tool_request_internal(request)
274 }
275
276 pub async fn execute_prepared_public_tool_request(
277 &self,
278 prepared: &PreparedToolCall,
279 policy: ExecutionPolicySnapshot,
280 ) -> ToolExecutionOutcome {
281 let request = ToolExecutionRequest::new(
282 prepared.canonical_name.clone(),
283 prepared.effective_args.clone(),
284 )
285 .with_policy(
286 policy
287 .with_prevalidated(prepared.already_preflighted)
288 .with_safety_prevalidated(false),
289 );
290 self.execute_tool_request_internal(request).await
291 }
292
293 async fn execute_tool_request_internal(
294 &self,
295 request: ToolExecutionRequest,
296 ) -> ToolExecutionOutcome {
297 let policy = request.policy.clone();
298 let mut retry_policy = crate::retry::RetryPolicy::from_retries(
299 policy.max_retries as u32,
300 policy.retry_base_delay,
301 policy.retry_max_delay,
302 policy.retry_multiplier,
303 );
304 retry_policy.jitter = policy.retry_jitter.clamp(0.0, 1.0);
305
306 let max_attempts = retry_policy.max_attempts.max(1);
307 let mut attempt_index: u32 = 0;
308 let mut last_error: Option<ToolExecutionError> = None;
309
310 while attempt_index < max_attempts {
311 if !policy.safety_prevalidated
312 && let Some(safety_error) = self
313 .check_safety_for_request(
314 request.tool_name.as_str(),
315 &request.args,
316 policy.invocation_id.clone(),
317 )
318 .await
319 {
320 let decorated = safety_error
321 .with_tool_call_context(request.tool_name.as_str(), &request.args)
322 .with_attempt(attempt_index + 1)
323 .with_surface("tool_registry");
324 if let Some(terminal) = Self::classify_and_step(
325 decorated,
326 &retry_policy,
327 request.tool_name.as_str(),
328 &mut attempt_index,
329 max_attempts,
330 &mut last_error,
331 )
332 .await
333 {
334 return ToolExecutionOutcome::failure(
335 request.tool_name.clone(),
336 attempt_index + 1,
337 terminal,
338 );
339 }
340 continue;
341 }
342
343 let result = self
344 .execute_public_tool_ref_internal_with_mode(
345 request.tool_name.as_str(),
346 &request.args,
347 policy.prevalidated,
348 policy.exec_settlement_mode,
349 )
350 .await;
351
352 match result {
353 Ok(output) => {
354 if let Some(structured_error) = ToolExecutionError::from_tool_output(&output) {
355 let decorated = structured_error
356 .with_tool_call_context(request.tool_name.as_str(), &request.args)
357 .with_attempt(attempt_index + 1)
358 .with_surface("tool_registry");
359 if let Some(terminal) = Self::classify_and_step(
360 decorated,
361 &retry_policy,
362 request.tool_name.as_str(),
363 &mut attempt_index,
364 max_attempts,
365 &mut last_error,
366 )
367 .await
368 {
369 return ToolExecutionOutcome::failure(
370 request.tool_name.clone(),
371 attempt_index + 1,
372 terminal,
373 );
374 }
375 continue;
376 }
377
378 return ToolExecutionOutcome::success(
379 request.tool_name.clone(),
380 attempt_index + 1,
381 output,
382 );
383 }
384 Err(error) => {
385 let mut base = ToolExecutionError::from_anyhow(
386 request.tool_name.clone(),
387 &error,
388 attempt_index,
389 false,
390 false,
391 Some("tool_registry"),
392 );
393 let lower_message = base.message.to_ascii_lowercase();
394 let lower_original = base
395 .original_error
396 .as_deref()
397 .unwrap_or_default()
398 .to_ascii_lowercase();
399 if lower_message.contains("circuit breaker")
400 || lower_original.contains("circuit breaker")
401 {
402 base.category = ErrorCategory::CircuitOpen;
403 base.retryable = true;
404 base.is_recoverable = true;
405 if base.retry_delay_ms.is_none() {
406 base.retry_delay_ms = Some(policy.retry_base_delay.as_millis() as u64);
407 }
408 }
409
410 if let Some(terminal) = Self::classify_and_step(
411 base,
412 &retry_policy,
413 request.tool_name.as_str(),
414 &mut attempt_index,
415 max_attempts,
416 &mut last_error,
417 )
418 .await
419 {
420 return ToolExecutionOutcome::failure(
421 request.tool_name.clone(),
422 attempt_index + 1,
423 terminal,
424 );
425 }
426 continue;
427 }
428 }
429 }
430
431 ToolExecutionOutcome::failure(
432 request.tool_name.clone(),
433 max_attempts,
434 last_error.unwrap_or_else(|| {
435 ToolExecutionError::new(
436 request.tool_name.clone(),
437 ToolErrorType::ExecutionError,
438 format!(
439 "Tool '{}' failed after {} attempts with no structured error",
440 request.tool_name, max_attempts
441 ),
442 )
443 .with_surface("tool_registry")
444 }),
445 )
446 }
447
448 async fn classify_and_step(
455 decorated: ToolExecutionError,
456 retry_policy: &crate::retry::RetryPolicy,
457 tool_name: &str,
458 attempt_index: &mut u32,
459 max_attempts: u32,
460 last_error: &mut Option<ToolExecutionError>,
461 ) -> Option<ToolExecutionError> {
462 let structured =
463 retry_policy.apply_to_tool_execution_error(decorated, *attempt_index, Some(tool_name));
464 let retry_delay = structured
465 .retry_after()
466 .or_else(|| structured.retry_delay());
467 if structured.retryable
468 && *attempt_index + 1 < max_attempts
469 && let Some(delay) = retry_delay
470 {
471 *last_error = Some(structured);
472 tokio::time::sleep(delay).await;
473 *attempt_index = attempt_index.saturating_add(1);
474 return None;
475 }
476 Some(structured)
477 }
478
479 async fn should_skip_loop_detection_for_active_exec_continuation(
480 &self,
481 tool_name: &str,
482 args: &Value,
483 ) -> bool {
484 if tool_name != tools::UNIFIED_EXEC {
485 return false;
486 }
487
488 if !crate::tools::tool_intent::unified_exec_action_in(args, &["poll", "continue"]) {
489 return false;
490 }
491 if crate::tools::tool_intent::unified_exec_action_is(args, "continue")
492 && crate::tools::command_args::interactive_input_text(args).is_some()
493 {
494 return false;
495 }
496
497 let Some(session_id) = crate::tools::command_args::session_id_text(args) else {
498 return false;
499 };
500
501 matches!(self.exec_session_completed(session_id).await, Ok(None))
502 }
503
504 async fn public_tool_catalog_for_error(
505 &self,
506 requested_name: &str,
507 ) -> (Vec<String>, Vec<String>) {
508 let mut tool_names = self.available_tools().await;
509 tool_names.sort_unstable();
510 tool_names.dedup();
511
512 let requested_candidates = public_tool_name_candidates(requested_name);
513 let mut similar_tools = Vec::new();
514
515 if let Ok(resolved) = self.resolve_public_tool_name_sync(requested_name)
516 && tool_names.iter().any(|tool| tool == &resolved)
517 {
518 similar_tools.push(resolved);
519 }
520
521 for tool in &tool_names {
522 if similar_tools.len() >= 3 {
523 break;
524 }
525
526 if similar_tools.iter().any(|candidate| candidate == tool) {
527 continue;
528 }
529
530 if requested_candidates
531 .iter()
532 .any(|candidate| fuzzy_match(candidate, tool))
533 {
534 similar_tools.push(tool.clone());
535 }
536 }
537
538 (tool_names, similar_tools)
539 }
540
541 pub fn preflight_validate_call(
542 &self,
543 name: &str,
544 args: &Value,
545 ) -> Result<super::ToolPreflightOutcome> {
546 execution_kernel::preflight_validate_call(self, name, args)
547 }
548
549 pub fn admit_public_tool_call(&self, name: &str, args: &Value) -> Result<PreparedToolCall> {
550 let preflight = self.preflight_validate_call(name, args)?;
551 Ok(PreparedToolCall::new(
552 preflight.normalized_tool_name,
553 preflight.readonly_classification,
554 preflight.parallel_safe_after_preflight,
555 preflight.effective_args,
556 ))
557 }
558
559 pub async fn execute_tool(&self, name: &str, args: Value) -> Result<Value> {
560 self.execute_tool_ref(name, &args).await
561 }
562
563 pub async fn execute_public_tool_ref(&self, name: &str, args: &Value) -> Result<Value> {
565 self.execute_public_tool_ref_internal(name, args, false)
566 .await
567 }
568
569 pub async fn execute_tool_ref(&self, name: &str, args: &Value) -> Result<Value> {
572 self.execute_tool_ref_internal(name, args, false, ExecSettlementMode::Manual)
573 .await
574 }
575
576 pub async fn execute_tool_ref_prevalidated(&self, name: &str, args: &Value) -> Result<Value> {
581 self.execute_tool_ref_internal(name, args, true, ExecSettlementMode::Manual)
582 .await
583 }
584
585 pub async fn execute_public_tool_ref_prevalidated(
587 &self,
588 name: &str,
589 args: &Value,
590 ) -> Result<Value> {
591 self.execute_public_tool_ref_prevalidated_with_mode(name, args, ExecSettlementMode::Manual)
592 .await
593 }
594
595 #[doc(hidden)]
596 pub async fn execute_public_tool_ref_prevalidated_with_mode(
597 &self,
598 name: &str,
599 args: &Value,
600 exec_settlement_mode: ExecSettlementMode,
601 ) -> Result<Value> {
602 self.execute_public_tool_ref_internal_with_mode(name, args, true, exec_settlement_mode)
603 .await
604 }
605
606 pub async fn execute_prepared_public_tool_ref_with_mode(
607 &self,
608 prepared: &PreparedToolCall,
609 exec_settlement_mode: ExecSettlementMode,
610 ) -> Result<Value> {
611 self.execute_public_tool_ref_internal_with_mode(
612 prepared.canonical_name.as_str(),
613 &prepared.effective_args,
614 prepared.already_preflighted,
615 exec_settlement_mode,
616 )
617 .await
618 }
619
620 async fn execute_public_tool_ref_internal(
621 &self,
622 name: &str,
623 args: &Value,
624 prevalidated: bool,
625 ) -> Result<Value> {
626 self.execute_public_tool_ref_internal_with_mode(
627 name,
628 args,
629 prevalidated,
630 ExecSettlementMode::Manual,
631 )
632 .await
633 }
634
635 async fn execute_public_tool_ref_internal_with_mode(
636 &self,
637 name: &str,
638 args: &Value,
639 prevalidated: bool,
640 exec_settlement_mode: ExecSettlementMode,
641 ) -> Result<Value> {
642 let routed_name = self
643 .resolve_public_tool(name)
644 .map(|resolution| resolution.registration_name().to_string())
645 .map_err(|error| anyhow!(error.to_string()))?;
646 let effective_args = execution_kernel::remap_public_unified_file_alias_args(
647 name,
648 routed_name.as_str(),
649 args,
650 );
651 self.execute_tool_ref_internal(
652 routed_name.as_str(),
653 effective_args.as_ref().unwrap_or(args),
654 prevalidated,
655 exec_settlement_mode,
656 )
657 .await
658 }
659
660 async fn execute_tool_ref_internal(
661 &self,
662 name: &str,
663 args: &Value,
664 prevalidated: bool,
665 exec_settlement_mode: ExecSettlementMode,
666 ) -> Result<Value> {
667 let _pool_guard = if self.optimization_config.memory_pool.enabled {
669 Some(self.memory_pool.get_string())
670 } else {
671 None
672 };
673
674 if self.optimization_config.memory_pool.enabled {
676 let recommendation = self
677 .memory_pool
678 .auto_tune(&self.optimization_config.memory_pool);
679
680 if !matches!(
682 (
683 recommendation.string_size_recommendation,
684 recommendation.value_size_recommendation,
685 recommendation.vec_size_recommendation
686 ),
687 (
688 SizeRecommendation::Maintain,
689 SizeRecommendation::Maintain,
690 SizeRecommendation::Maintain
691 )
692 ) {
693 tracing::debug!(
694 "Memory pool tuning recommendation: string={:?}, value={:?}, vec={:?}, allocations_avoided={}",
695 recommendation.string_size_recommendation,
696 recommendation.value_size_recommendation,
697 recommendation.vec_size_recommendation,
698 recommendation.total_allocations_avoided
699 );
700 }
701 }
702
703 let cached_tool = if self
705 .optimization_config
706 .tool_registry
707 .use_optimized_registry
708 {
709 let cache = self.hot_tool_cache.read();
710 cache.peek(name).cloned()
711 } else {
712 None
713 };
714
715 let (tool_name, tool_name_owned, display_name) =
718 if let Some(registration) = self.inventory.registration_for(name) {
719 let canonical = registration.name().to_string();
720 let display = if canonical == name {
721 canonical.clone()
722 } else {
723 format!("{} (alias for {})", name, canonical)
724 };
725 (canonical.clone(), canonical.clone(), display)
726 } else {
727 let tool_name_owned = name.to_string();
729 let display_name = tool_name_owned.clone();
730 (tool_name_owned.clone(), tool_name_owned, display_name)
731 };
732
733 if let Some(tool_arc) = cached_tool.as_ref()
735 && self
736 .optimization_config
737 .tool_registry
738 .use_optimized_registry
739 && tool_name != name
740 {
741 self.hot_tool_cache
743 .write()
744 .put(tool_name.clone(), tool_arc.clone());
745 }
746
747 let parameter_schema = self
748 .inventory
749 .registration_for(&tool_name)
750 .and_then(|registration| registration.parameter_schema().cloned());
751 let normalized_args =
752 execution_kernel::normalize_tool_args(&tool_name, args, parameter_schema.as_ref())?;
753 let args = normalized_args.as_ref();
754 let requested_name = name.to_string();
755
756 let args_for_recording = args.clone();
758 let context_snapshot = self.harness_context_snapshot();
760 let record_failure = |tool_name: String,
761 is_mcp_tool: bool,
762 mcp_provider: Option<String>,
763 args: Value,
764 error_msg: String,
765 timeout_category: Option<String>,
766 base_timeout_ms: Option<u64>,
767 adaptive_timeout_ms: Option<u64>,
768 effective_timeout_ms: Option<u64>,
769 circuit_breaker: bool| {
770 self.execution_history
771 .add_record(ToolExecutionRecord::failure(
772 tool_name,
773 requested_name.clone(),
774 is_mcp_tool,
775 mcp_provider,
776 args,
777 error_msg,
778 context_snapshot.clone(),
779 timeout_category,
780 base_timeout_ms,
781 adaptive_timeout_ms,
782 effective_timeout_ms,
783 circuit_breaker,
784 ));
785 };
786
787 let _reentrancy_guard = match ToolReentrancyGuard::enter(&tool_name) {
788 Ok(guard) => guard,
789 Err(violation) => {
790 let reentry_count = violation.tool_reentry_count + 1;
791 let error_message = format!(
792 "REENTRANCY GUARD: Tool '{}' was blocked to prevent recursive execution.\n\n\
793 ACTION REQUIRED: DO NOT retry this same tool call without changing control flow.\n\
794 Current stack depth: {}. Re-entry count for this tool in the current task: {}.\n\
795 Stack trace: {}",
796 display_name, violation.stack_depth, reentry_count, violation.stack_trace
797 );
798 let error = ToolExecutionError::new(
799 tool_name_owned.clone(),
800 ToolErrorType::PolicyViolation,
801 error_message.clone(),
802 );
803 let mut payload = error.to_json_value();
804 if let Some(obj) = payload.as_object_mut() {
805 obj.insert("reentrant_call_blocked".into(), json!(true));
806 obj.insert("stack_depth".into(), json!(violation.stack_depth));
807 obj.insert("reentry_count".into(), json!(reentry_count));
808 obj.insert("tool".into(), json!(display_name));
809 obj.insert("stack_trace".into(), json!(violation.stack_trace));
810 }
811 record_failure(
812 tool_name_owned.clone(),
813 false,
814 None,
815 args_for_recording.clone(),
816 error_message,
817 None,
818 None,
819 None,
820 None,
821 false,
822 );
823 return Ok(payload);
824 }
825 };
826
827 let readonly_classification = if prevalidated {
828 #[cfg(debug_assertions)]
829 {
830 if let Err(err) =
831 execution_kernel::preflight_validate_resolved_call(self, &tool_name, args)
832 && !agent_execution::is_plan_mode_denial(&err.to_string())
833 {
834 debug_assert!(
835 false,
836 "prevalidated execution received invalid call for '{}': {}",
837 tool_name, err
838 );
839 }
840 }
841 !crate::tools::tool_intent::classify_tool_intent(&tool_name, args).mutating
842 } else {
843 match execution_kernel::preflight_validate_resolved_call(self, &tool_name, args) {
844 Ok(outcome) => outcome.readonly_classification,
845 Err(err) => {
846 let err_msg = err.to_string();
847 record_failure(
848 tool_name_owned.clone(),
849 false,
850 None,
851 args_for_recording.clone(),
852 err_msg,
853 None,
854 None,
855 None,
856 None,
857 false,
858 );
859 return Err(err);
860 }
861 }
862 };
863
864 if readonly_classification {
865 trace!(tool = %tool_name, "Validation classified tool as read-only");
866 }
867
868 if self.is_plan_mode() && !self.is_plan_mode_allowed(&tool_name, args) {
871 let error_msg = agent_execution::plan_mode_denial_message(&display_name);
872 record_failure(
873 tool_name_owned.clone(),
874 false,
875 None,
876 args_for_recording.clone(),
877 error_msg.clone(),
878 None,
879 None,
880 None,
881 None,
882 false,
883 );
884 return Err(anyhow!(error_msg).context(agent_execution::PLAN_MODE_DENIED_CONTEXT));
885 }
886
887 let shared_circuit_breaker = self.shared_circuit_breaker();
888 if let Some(breaker) = shared_circuit_breaker.as_ref()
889 && !breaker.allow_request_for_tool(&tool_name)
890 {
891 let diagnostics = breaker.get_diagnostics(&tool_name);
892 let retry_after = diagnostics
893 .remaining_backoff
894 .map(|backoff| format!(" retry_after={}s.", backoff.as_secs()))
895 .unwrap_or_default();
896 let error_msg = format!(
897 "Tool '{}' is temporarily disabled due to high failure rate (Circuit Breaker OPEN).{}",
898 display_name, retry_after
899 );
900 self.execution_history.add_record(
901 ToolExecutionRecord::failure(
902 tool_name_owned.clone(),
903 requested_name.clone(),
904 false,
905 None,
906 args_for_recording.clone(),
907 error_msg.clone(),
908 context_snapshot.clone(),
909 None,
910 None,
911 None,
912 None,
913 true,
914 )
915 .with_circuit_breaker_state(format!("{:?}", diagnostics.status))
916 .with_retry_after(diagnostics.remaining_backoff),
917 );
918 return Err(anyhow!(error_msg).context("tool denied by circuit breaker"));
919 }
920
921 let timeout_category = self.timeout_category_for(&tool_name).await;
922
923 if let Some(backoff) = self.should_circuit_break(timeout_category) {
924 warn!(
925 tool = %tool_name,
926 category = %timeout_category.label(),
927 delay_ms = %backoff.as_millis(),
928 "Circuit breaker active for tool category; backing off before execution"
929 );
930 tokio::time::sleep(backoff).await;
931 }
932
933 let execution_span = tracing::debug_span!(
934 "tool_execution",
935 tool = %tool_name,
936 requested = %name,
937 session_id = %context_snapshot.session_id,
938 task_id = %context_snapshot.task_id.as_deref().unwrap_or("")
939 );
940 let _span_guard = execution_span.enter();
941
942 trace!(
943 tool = %tool_name,
944 session_id = %context_snapshot.session_id,
945 task_id = %context_snapshot.task_id.as_deref().unwrap_or(""),
946 "Executing tool with harness context"
947 );
948
949 if tool_name != name {
950 trace!(
951 requested = %name,
952 canonical = %tool_name,
953 "Resolved tool alias to canonical name"
954 );
955 }
956
957 let base_timeout_ms = self
958 .timeout_policy
959 .read()
960 .unwrap_or_else(|poisoned| {
961 warn!("timeout policy lock poisoned while reading execution timeout; recovering");
962 poisoned.into_inner()
963 })
964 .ceiling_for(timeout_category)
965 .map(|d| d.as_millis() as u64);
966 let adaptive_timeout_ms = self
967 .resiliency
968 .lock()
969 .adaptive_timeout_ceiling
970 .get(&timeout_category)
971 .filter(|d| d.as_millis() > 0)
972 .map(|d| d.as_millis() as u64);
973 let timeout_category_label = Some(timeout_category.label().to_string());
974
975 if let Some(rate_limit) = self.execution_history.rate_limit_per_minute() {
976 let calls_last_minute = self
977 .execution_history
978 .calls_in_window(Duration::from_secs(60));
979 if calls_last_minute >= rate_limit {
980 warn!(
981 tool = %tool_name_owned,
982 requested = %requested_name,
983 calls_last_minute,
984 rate_limit,
985 "Execution history rate-limit threshold exceeded (observability-only)"
986 );
987 }
988 }
989
990 let skip_loop_detection = self
991 .should_skip_loop_detection_for_active_exec_continuation(&tool_name, args)
992 .await;
993 if skip_loop_detection {
994 trace!(
995 tool = %tool_name,
996 "Skipping identical-call loop detection for active exec continuation"
997 );
998 }
999
1000 let loop_limit = if skip_loop_detection {
1002 0
1003 } else {
1004 self.execution_history.loop_limit_for(&tool_name, args)
1005 };
1006 let (is_loop, repeat_count, _) = if skip_loop_detection {
1007 (false, 0, String::new())
1008 } else {
1009 self.execution_history.detect_loop(&tool_name, args)
1010 };
1011 if is_loop && repeat_count > 1 {
1012 let delay_ms =
1013 (LOOP_THROTTLE_REGISTRY_BASE_MS * repeat_count as u64).min(LOOP_THROTTLE_MAX_MS);
1014 if delay_ms > 0 {
1015 tokio::time::sleep(Duration::from_millis(delay_ms)).await;
1016 }
1017 }
1018 if loop_limit > 0 && is_loop {
1019 warn!(
1020 tool = %tool_name,
1021 repeats = repeat_count,
1022 "Loop detected: agent calling same tool with identical parameters {} times",
1023 repeat_count
1024 );
1025 if repeat_count >= loop_limit {
1026 if readonly_classification {
1027 let reuse_max_age = Duration::from_secs(120);
1028 let reused = self
1029 .execution_history
1030 .find_recent_spooled_result(&tool_name, args, reuse_max_age)
1031 .or_else(|| {
1032 self.execution_history.find_recent_successful_result(
1033 &tool_name,
1034 args,
1035 reuse_max_age,
1036 )
1037 });
1038 if let Some(mut reused_value) = reused {
1039 if let Some(obj) = reused_value.as_object_mut() {
1040 obj.insert("reused_recent_result".into(), json!(true));
1041 obj.insert("loop_detected".into(), json!(true));
1042 obj.insert("repeat_count".into(), json!(repeat_count));
1043 obj.insert("limit".into(), json!(loop_limit));
1044 obj.insert("tool".into(), json!(display_name));
1045 let reused_spooled =
1046 obj.get("spool_path").and_then(|v| v.as_str()).is_some();
1047 let note = if reused_spooled {
1048 "Loop detected; reusing a recent spooled output for this identical read-only call. Continue from the spool file instead of re-running the tool."
1049 } else {
1050 "Loop detected; reusing a recent successful output for this identical read-only call. Change approach before calling the same tool again."
1051 };
1052 obj.insert("loop_detected_note".into(), json!(note));
1053 }
1054 return Ok(reused_value);
1055 }
1056 }
1057
1058 let delay_ms = (LOOP_THROTTLE_REGISTRY_BASE_MS * repeat_count as u64)
1059 .min(LOOP_THROTTLE_MAX_MS);
1060 if delay_ms > 0 {
1061 tokio::time::sleep(Duration::from_millis(delay_ms)).await;
1062 }
1063
1064 let error = ToolExecutionError::new(
1065 tool_name_owned.clone(),
1066 ToolErrorType::PolicyViolation,
1067 agent_execution::loop_detection_block_message(
1068 &display_name,
1069 repeat_count as u64,
1070 None,
1071 ),
1072 );
1073 let mut payload = error.to_json_value();
1074 if let Some(obj) = payload.as_object_mut() {
1075 obj.insert("loop_detected".into(), json!(true));
1076 obj.insert("repeat_count".into(), json!(repeat_count));
1077 obj.insert("limit".into(), json!(loop_limit));
1078 obj.insert("tool".into(), json!(display_name));
1079 }
1080
1081 record_failure(
1082 tool_name_owned,
1083 false,
1084 None,
1085 args_for_recording,
1086 "Tool call blocked due to repeated identical invocations".to_string(),
1087 timeout_category_label.clone(),
1088 base_timeout_ms,
1089 adaptive_timeout_ms,
1090 None,
1091 false,
1092 );
1093
1094 return Ok(payload);
1095 }
1096 }
1097
1098 let full_auto_denied = {
1099 let gateway = self.policy_gateway.lock().await;
1100 gateway.has_full_auto_allowlist() && !gateway.is_allowed_in_full_auto(&tool_name)
1101 };
1102 if full_auto_denied {
1103 let _error = ToolExecutionError::new(
1104 tool_name_owned.clone(),
1105 ToolErrorType::PolicyViolation,
1106 format!(
1107 "Tool '{}' is not permitted while full-auto mode is active",
1108 display_name
1109 ),
1110 );
1111
1112 record_failure(
1113 tool_name_owned.clone(),
1114 false,
1115 None,
1116 args_for_recording.clone(),
1117 "Tool execution denied by policy".to_string(),
1118 timeout_category_label.clone(),
1119 base_timeout_ms,
1120 adaptive_timeout_ms,
1121 None,
1122 false,
1123 );
1124
1125 return Err(anyhow!(
1126 "Tool '{}' is not permitted while full-auto mode is active",
1127 display_name
1128 )
1129 .context("tool denied by full-auto allowlist"));
1130 }
1131
1132 let skip_policy_prompt = self
1133 .policy_gateway
1134 .lock()
1135 .await
1136 .take_preapproved(&tool_name);
1137
1138 let decision = if skip_policy_prompt {
1139 ToolExecutionDecision::Allowed
1140 } else {
1141 self.policy_gateway
1144 .lock()
1145 .await
1146 .should_execute_tool(&tool_name)
1147 .await?
1148 };
1149
1150 if !decision.is_allowed() {
1151 let error_msg = match decision {
1152 ToolExecutionDecision::DeniedWithFeedback(feedback) => {
1153 format!("Tool '{}' denied by user: {}", display_name, feedback)
1154 }
1155 _ => format!("Tool '{}' execution denied by policy", display_name),
1156 };
1157
1158 let _error = ToolExecutionError::new(
1159 tool_name_owned.clone(),
1160 ToolErrorType::PolicyViolation,
1161 error_msg.clone(),
1162 );
1163
1164 record_failure(
1165 tool_name_owned.clone(),
1166 false,
1167 None,
1168 args_for_recording.clone(),
1169 error_msg.clone(),
1170 timeout_category_label.clone(),
1171 base_timeout_ms,
1172 adaptive_timeout_ms,
1173 None,
1174 false,
1175 );
1176
1177 return Err(anyhow!("{}", error_msg).context("tool denied by policy"));
1178 }
1179
1180 let args = match self
1181 .policy_gateway
1182 .lock()
1183 .await
1184 .apply_policy_constraints(&tool_name, args)
1185 {
1186 Ok(processed_args) => processed_args,
1187 Err(err) => {
1188 let error = ToolExecutionError::with_original_error(
1189 tool_name_owned.clone(),
1190 ToolErrorType::InvalidParameters,
1191 "Failed to apply policy constraints".to_string(),
1192 err.to_string(),
1193 );
1194
1195 record_failure(
1196 tool_name_owned,
1197 false,
1198 None,
1199 args_for_recording,
1200 format!("Failed to apply policy constraints: {}", err),
1201 timeout_category_label.clone(),
1202 base_timeout_ms,
1203 adaptive_timeout_ms,
1204 None,
1205 false,
1206 );
1207
1208 return Ok(error.to_json_value());
1209 }
1210 };
1211
1212 let mut needs_pty = false;
1214 let mut tool_exists = false;
1215 let mut is_mcp_tool = false;
1216 let mut mcp_provider: Option<String> = None;
1217 let mut mcp_tool_name: Option<String> = None;
1218 let mut mcp_lookup_error: Option<anyhow::Error> = None;
1219
1220 if let Some(registration) = self.inventory.registration_for(&tool_name) {
1222 needs_pty = registration.uses_pty();
1223 tool_exists = true;
1224 }
1225 if let Some((provider, remote_tool)) = parse_canonical_mcp_tool_name(&tool_name) {
1227 needs_pty = true;
1228 tool_exists = true;
1229 is_mcp_tool = true;
1230 mcp_provider = Some(provider.to_string());
1231 mcp_tool_name = Some(remote_tool.to_string());
1232 }
1233
1234 let mcp_client_opt = { self.mcp_client.read().ok().and_then(|g| g.clone()) };
1235 if !is_mcp_tool && let Some(mcp_client) = mcp_client_opt {
1236 let mut resolved_mcp_name = legacy_mcp_tool_name(name)
1237 .map(str::to_string)
1238 .unwrap_or_else(|| tool_name_owned.clone());
1239
1240 if let Some(alias_target) = self.resolve_mcp_tool_alias(&resolved_mcp_name).await
1241 && alias_target != resolved_mcp_name
1242 {
1243 trace!(
1244 requested = %resolved_mcp_name,
1245 resolved = %alias_target,
1246 "Resolved MCP tool alias"
1247 );
1248 resolved_mcp_name = alias_target;
1249 }
1250
1251 match mcp_client.has_mcp_tool(&resolved_mcp_name).await {
1252 Ok(true) => {
1253 needs_pty = true;
1254 tool_exists = true;
1255 is_mcp_tool = true;
1256 mcp_provider = self.find_mcp_provider(&resolved_mcp_name).await;
1257 mcp_tool_name = Some(resolved_mcp_name);
1258 }
1259 Ok(false) => {
1260 }
1263 Err(err) => {
1264 warn!("Error checking MCP tool '{}': {}", resolved_mcp_name, err);
1265 mcp_lookup_error = Some(err);
1266 }
1267 }
1268 }
1269
1270 if !tool_exists {
1272 if let Some(err) = mcp_lookup_error {
1273 let error = ToolExecutionError::with_original_error(
1274 tool_name_owned.clone(),
1275 ToolErrorType::ExecutionError,
1276 format!("Failed to resolve MCP tool '{}': {}", display_name, err),
1277 err.to_string(),
1278 );
1279
1280 record_failure(
1281 tool_name_owned,
1282 is_mcp_tool,
1283 mcp_provider.clone(),
1284 args_for_recording,
1285 format!("Failed to resolve MCP tool '{}': {}", display_name, err),
1286 timeout_category_label.clone(),
1287 base_timeout_ms,
1288 adaptive_timeout_ms,
1289 None,
1290 false,
1291 );
1292
1293 return Ok(error.to_json_value());
1294 }
1295
1296 let (all_tool_names, similar_tools) = self.public_tool_catalog_for_error(name).await;
1297 let suggestion = if !similar_tools.is_empty() {
1298 format!(" Did you mean: {}?", similar_tools.join(", "))
1299 } else {
1300 String::new()
1301 };
1302 let available_tool_list = all_tool_names.join(", ");
1303 let message = format!(
1304 "Unknown tool: {}. Available tools: {}.{}",
1305 display_name, available_tool_list, suggestion
1306 );
1307 let error = ToolExecutionError::new(
1308 tool_name_owned.clone(),
1309 ToolErrorType::ToolNotFound,
1310 message.clone(),
1311 );
1312
1313 record_failure(
1314 tool_name_owned,
1315 is_mcp_tool,
1316 mcp_provider.clone(),
1317 args_for_recording,
1318 message,
1319 timeout_category_label.clone(),
1320 base_timeout_ms,
1321 adaptive_timeout_ms,
1322 None,
1323 false,
1324 );
1325
1326 return Ok(error.to_json_value());
1327 }
1328
1329 if is_mcp_tool && !self.mcp_circuit_breaker.allow_request() {
1331 let diag = self.mcp_circuit_breaker.diagnostics();
1332 let error = ToolExecutionError::new(
1333 tool_name_owned.clone(),
1334 ToolErrorType::ExecutionError,
1335 format!("MCP circuit breaker {:?}; skipping execution", diag.state),
1336 );
1337 let payload = json!({
1338 "error": error.to_json_value(),
1339 "circuit_breaker_state": format!("{:?}", diag.state),
1340 "consecutive_failures": diag.consecutive_failures,
1341 "note": "MCP provider circuit breaker open; execution skipped",
1342 "last_failed_at": diag.last_failure_time
1343 .and_then(|ts| ts.duration_since(SystemTime::UNIX_EPOCH).ok())
1344 .map(|d| d.as_secs()),
1345 "current_timeout_seconds": diag.current_timeout.as_secs(),
1346 "mcp_provider": mcp_provider,
1347 });
1348 warn!(
1349 tool = %tool_name_owned,
1350 payload = %payload,
1351 "Skipping MCP tool execution due to circuit breaker"
1352 );
1353 self.execution_history.add_record(
1354 ToolExecutionRecord::failure(
1355 tool_name_owned,
1356 requested_name.clone(),
1357 is_mcp_tool,
1358 mcp_provider.clone(),
1359 args_for_recording,
1360 format!("MCP circuit breaker {:?}; execution skipped", diag.state),
1361 context_snapshot.clone(),
1362 timeout_category_label.clone(),
1363 base_timeout_ms,
1364 adaptive_timeout_ms,
1365 None,
1366 false,
1367 )
1368 .with_circuit_breaker_state(format!("{:?}", diag.state))
1369 .with_retry_after(diag.retry_after),
1370 );
1371 return Ok(payload);
1372 }
1373
1374 trace!(
1375 tool = %tool_name,
1376 requested = %name,
1377 is_mcp = is_mcp_tool,
1378 uses_pty = needs_pty,
1379 alias = %if tool_name == name { "" } else { name },
1380 mcp_provider = %mcp_provider.as_deref().unwrap_or(""),
1381 "Resolved tool route"
1382 );
1383
1384 let _pty_guard = if needs_pty {
1386 match self.start_pty_session() {
1387 Ok(guard) => Some(guard),
1388 Err(err) => {
1389 let error = ToolExecutionError::with_original_error(
1390 tool_name_owned.clone(),
1391 ToolErrorType::ExecutionError,
1392 "Failed to start PTY session".to_string(),
1393 err.to_string(),
1394 );
1395
1396 record_failure(
1397 tool_name_owned,
1398 is_mcp_tool,
1399 mcp_provider.clone(),
1400 args_for_recording,
1401 "Failed to start PTY session".to_string(),
1402 timeout_category_label.clone(),
1403 base_timeout_ms,
1404 adaptive_timeout_ms,
1405 None,
1406 false,
1407 );
1408
1409 return Ok(error.to_json_value());
1410 }
1411 }
1412 } else {
1413 None
1414 };
1415
1416 let execution_started_at = Instant::now();
1419 let effective_timeout = self.effective_timeout(timeout_category);
1420 let effective_timeout_ms = effective_timeout.map(|d| d.as_millis() as u64);
1421 let exec_future = async {
1422 if is_mcp_tool {
1423 let mcp_name = mcp_tool_name
1424 .as_deref()
1425 .context("MCP tool routing inconsistency: resolved MCP tool name missing")?;
1426 self.execute_mcp_tool(mcp_name, args).await
1427 } else if exec_settlement_mode.settle_noninteractive()
1428 && tool_name == tools::UNIFIED_EXEC
1429 {
1430 if self.optimization_config.memory_pool.enabled {
1431 let _execution_guard = self.memory_pool.get_value();
1432 let _string_guard = self.memory_pool.get_string();
1433 let _vec_guard = self.memory_pool.get_vec();
1434 self.execute_unified_exec_internal(args, exec_settlement_mode)
1435 .await
1436 } else {
1437 self.execute_unified_exec_internal(args, exec_settlement_mode)
1438 .await
1439 }
1440 } else if let Some(registration) = self.inventory.registration_for(&tool_name) {
1441 if registration.is_deprecated() {
1443 if let Some(msg) = registration.deprecation_message() {
1444 warn!("Tool '{}' is deprecated: {}", tool_name, msg);
1445 } else {
1446 warn!(
1447 "Tool '{}' is deprecated and may be removed in a future version",
1448 tool_name
1449 );
1450 }
1451 }
1452
1453 let handler = registration.handler();
1454 match handler {
1455 ToolHandler::RegistryFn(executor) => {
1456 if self.optimization_config.memory_pool.enabled {
1458 let _execution_guard = self.memory_pool.get_value();
1459 let _string_guard = self.memory_pool.get_string();
1460 let _vec_guard = self.memory_pool.get_vec();
1461 executor(self, args).await
1462 } else {
1463 executor(self, args).await
1464 }
1465 }
1466 ToolHandler::TraitObject(tool) => {
1467 if self
1469 .optimization_config
1470 .tool_registry
1471 .use_optimized_registry
1472 {
1473 if let Some(cached_tool) = cached_tool.as_ref() {
1474 cached_tool.execute(args).await
1476 } else {
1477 self.hot_tool_cache
1479 .write()
1480 .put(tool_name.clone(), tool.clone());
1481 tool.execute(args).await
1482 }
1483 } else {
1484 tool.execute(args).await
1485 }
1486 }
1487 }
1488 } else {
1489 let (tool_names, similar_tools) =
1492 self.public_tool_catalog_for_error(&requested_name).await;
1493 let available_tool_list = tool_names.join(", ");
1494
1495 let error_msg = if tool_name != requested_name {
1496 format!(
1498 "Tool '{}' (registered alias for '{}') not found in registry. \
1499 Available tools: {}. \
1500 Note: Tool aliases are defined during tool registration.",
1501 requested_name, tool_name, available_tool_list
1502 )
1503 } else {
1504 let suggestion = if !similar_tools.is_empty() {
1505 format!(" Did you mean: {}?", similar_tools.join(", "))
1506 } else {
1507 String::new()
1508 };
1509
1510 format!(
1511 "Tool '{}' not found in registry. Available tools: {}.{}",
1512 display_name, available_tool_list, suggestion
1513 )
1514 };
1515
1516 let error = ToolExecutionError::new(
1517 tool_name_owned.clone(),
1518 ToolErrorType::ToolNotFound,
1519 error_msg.clone(),
1520 );
1521
1522 record_failure(
1523 tool_name_owned.clone(),
1524 is_mcp_tool,
1525 mcp_provider.clone(),
1526 args_for_recording.clone(),
1527 error_msg,
1528 timeout_category_label.clone(),
1529 base_timeout_ms,
1530 adaptive_timeout_ms,
1531 effective_timeout_ms,
1532 false,
1533 );
1534
1535 Ok(error.to_json_value())
1536 }
1537 };
1538
1539 let result = if let Some(limit) = effective_timeout {
1540 trace!(
1541 tool = %tool_name_owned,
1542 category = %timeout_category.label(),
1543 timeout_ms = %limit.as_millis(),
1544 "Executing tool with effective timeout"
1545 );
1546 match tokio::time::timeout(limit, exec_future).await {
1547 Ok(res) => res,
1548 Err(_) => {
1549 let timeout_ms = limit.as_millis() as u64;
1550 let tripped = self.record_tool_failure(timeout_category);
1551 if tripped {
1552 warn!(
1553 tool = %tool_name_owned,
1554 category = %timeout_category.label(),
1555 "Tool circuit breaker tripped after consecutive timeout failures"
1556 );
1557 }
1558 let retry_after = self.should_circuit_break(timeout_category);
1559
1560 let mut timeout_error = ToolExecutionError::new(
1561 tool_name_owned.clone(),
1562 ToolErrorType::Timeout,
1563 format!(
1564 "Operation '{}' exceeded the {} timeout ceiling ({}s)",
1565 tool_name_owned,
1566 timeout_category.label(),
1567 limit.as_secs()
1568 ),
1569 )
1570 .with_tool_call_context(&tool_name_owned, &args_for_recording)
1571 .with_surface("tool_registry")
1572 .with_debug_metadata("timeout_category", timeout_category.label())
1573 .with_debug_metadata("timeout_ms", timeout_ms.to_string());
1574
1575 if tool_name_owned == tools::UNIFIED_EXEC {
1576 timeout_error.recovery_suggestions = vec![
1577 Cow::Borrowed(
1578 "Use unified_exec with action='poll' to check command progress",
1579 ),
1580 Cow::Borrowed(
1581 "Use unified_exec with action='list' to find active sessions",
1582 ),
1583 Cow::Borrowed(
1584 "Use unified_exec with action='close' if a stale session is still active",
1585 ),
1586 ];
1587 }
1588
1589 if let Some(delay) = retry_after {
1590 timeout_error.retry_after_ms =
1591 Some(delay.as_millis().min(u128::from(u64::MAX)) as u64);
1592 }
1593
1594 let mut timeout_payload = timeout_error.to_json_value();
1595 Self::annotate_timeout_error_payload(
1596 &mut timeout_payload,
1597 timeout_category.label(),
1598 timeout_ms,
1599 tripped,
1600 );
1601
1602 if let Some(breaker) = shared_circuit_breaker.as_ref() {
1603 breaker.record_failure_category_for_tool(
1604 &tool_name_owned,
1605 ErrorCategory::Timeout,
1606 );
1607 }
1608 if is_mcp_tool {
1609 self.mcp_circuit_breaker
1610 .record_failure_category(ErrorCategory::Timeout);
1611 }
1612 record_failure(
1613 tool_name_owned,
1614 is_mcp_tool,
1615 mcp_provider,
1616 args_for_recording,
1617 timeout_error.user_message(),
1618 timeout_category_label.clone(),
1619 base_timeout_ms,
1620 adaptive_timeout_ms,
1621 Some(timeout_ms),
1622 tripped,
1623 );
1624 return Ok(timeout_payload);
1625 }
1626 }
1627 } else {
1628 exec_future.await
1629 };
1630
1631 match result {
1636 Ok(value) => {
1637 if let Some(breaker) = shared_circuit_breaker.as_ref() {
1638 breaker.record_success_for_tool(&tool_name_owned);
1639 }
1640 if is_mcp_tool {
1641 self.mcp_circuit_breaker.record_success();
1642 }
1643 self.reset_tool_failure(timeout_category);
1644 let should_decay = {
1645 let mut state = self.resiliency.lock();
1646 let success_streak = state.adaptive_tuning.success_streak;
1647 if let Some(counter) = state.success_trackers.get_mut(&timeout_category) {
1648 *counter = counter.saturating_add(1);
1649 let counter_val = *counter;
1650 if counter_val >= success_streak {
1651 *counter = 0;
1652 true
1653 } else {
1654 false
1655 }
1656 } else {
1657 false
1658 }
1659 };
1660 if should_decay {
1661 self.decay_adaptive_timeout(timeout_category);
1662 }
1663 self.record_tool_latency(timeout_category, execution_started_at.elapsed());
1664 let processed_value = self
1666 .process_tool_output(&tool_name_owned, value, is_mcp_tool)
1667 .await;
1668 let normalized_value = normalize_tool_output(processed_value);
1669
1670 if !readonly_classification {
1671 self.execution_history.clear();
1672 }
1673
1674 self.execution_history
1675 .add_record(ToolExecutionRecord::success(
1676 tool_name_owned,
1677 requested_name,
1678 is_mcp_tool,
1679 mcp_provider,
1680 args_for_recording,
1681 normalized_value.clone(),
1682 context_snapshot.clone(),
1683 timeout_category_label.clone(),
1684 base_timeout_ms,
1685 adaptive_timeout_ms,
1686 effective_timeout_ms,
1687 false,
1688 ));
1689
1690 Ok(normalized_value)
1691 }
1692 Err(err) => {
1693 let error = ToolExecutionError::from_anyhow(
1694 tool_name_owned.clone(),
1695 &err,
1696 0,
1697 false,
1698 false,
1699 Some("tool_registry"),
1700 )
1701 .with_tool_call_context(&tool_name_owned, &args_for_recording);
1702 let error_category = error.category;
1703 if let Some(breaker) = shared_circuit_breaker.as_ref() {
1704 breaker.record_failure_category_for_tool(&tool_name_owned, error_category);
1705 }
1706 if is_mcp_tool {
1707 self.mcp_circuit_breaker
1708 .record_failure_category(error_category);
1709 }
1710
1711 let tripped = if error_category.should_trip_circuit_breaker() {
1712 let tripped = self.record_tool_failure(timeout_category);
1713 if tripped {
1714 warn!(
1715 tool = %tool_name_owned,
1716 category = %timeout_category.label(),
1717 "Tool circuit breaker tripped after consecutive failures"
1718 );
1719 }
1720 tripped
1721 } else {
1722 false
1723 };
1724
1725 let mut payload = error.to_json_value();
1726 Self::annotate_timeout_error_payload(
1727 &mut payload,
1728 timeout_category.label(),
1729 effective_timeout_ms.unwrap_or(0),
1730 tripped,
1731 );
1732
1733 record_failure(
1734 tool_name_owned,
1735 is_mcp_tool,
1736 mcp_provider,
1737 args_for_recording,
1738 format!("Tool execution failed: {}", err),
1739 timeout_category_label.clone(),
1740 base_timeout_ms,
1741 adaptive_timeout_ms,
1742 effective_timeout_ms,
1743 tripped,
1744 );
1745
1746 Ok(payload)
1747 }
1748 }
1749 }
1750}