use super::budget_blocking::{DuplicateSendFileNoopCtx, ToolBlockKind, ToolBudgetBlockCtx};
use super::execution_io::ToolExecutionIoCtx;
use super::guards::LoopPatternGuardOutcome;
use super::project_dir::{
extract_project_dir_hint_with_aliases, is_file_recheck_tool, is_recognized_project_root,
maybe_inject_project_dir_into_tool_args, project_dir_from_tool_args, scope_allows_project_dir,
tool_call_includes_project_path,
};
use super::result_learning::{ResultLearningEnv, ResultLearningState};
use super::types::{ToolExecutionCtx, ToolExecutionOutcome};
use crate::agent::execution_state::{
extract_target_hints_from_arguments, OutcomeEntry, StepExecutionPlan,
};
use crate::agent::recall_guardrails::is_personal_memory_tool;
use crate::agent::*;
use crate::traits::{ToolCallSemantics, ToolTargetHint, ToolTargetHintKind};
use crate::utils::{truncate_str, truncate_with_note};
const TOOL_COMPLETE_SUMMARY_MAX_CHARS: usize = 140;
const EXTERNAL_ACTION_ACK_MAX_CHARS: usize = 500;
fn extract_error_summary_line(result_text: &str) -> Option<String> {
result_text
.lines()
.find(|l| {
l.contains("API ERROR")
|| l.contains("Error")
|| l.contains("error")
|| l.contains("Forbidden")
|| l.contains("Unauthorized")
|| l.contains("BLOCKED")
})
.map(|l| l.chars().take(200).collect())
}
fn raw_internal_scope_violation(
raw_arguments: &str,
session_id: &str,
resolved_goal_id: Option<&str>,
) -> Option<String> {
let parsed = serde_json::from_str::<Value>(raw_arguments).ok()?;
let map = parsed.as_object()?;
if let Some(candidate_session_id) = map.get("_session_id").and_then(|v| v.as_str()) {
if candidate_session_id != session_id {
return Some(format!(
"_session_id mismatch (expected `{}`, got `{}`)",
session_id, candidate_session_id
));
}
}
if let Some(candidate_goal_id) = map.get("_goal_id").and_then(|v| v.as_str()) {
match resolved_goal_id {
Some(expected_goal_id) if candidate_goal_id != expected_goal_id => {
return Some(format!(
"_goal_id mismatch (expected `{}`, got `{}`)",
expected_goal_id, candidate_goal_id
));
}
None => {
return Some(format!(
"_goal_id `{}` provided but no goal scope is active",
candidate_goal_id
));
}
_ => {}
}
}
None
}
struct DeterministicToolContractViolation {
reason: String,
coaching: String,
}
fn scheduled_goal_runs_missing_goal_id_violation(
raw_arguments: &str,
) -> Option<DeterministicToolContractViolation> {
let parsed = serde_json::from_str::<Value>(raw_arguments).ok()?;
let map = parsed.as_object()?;
let has_goal_id = map
.get("goal_id")
.and_then(|v| v.as_str())
.is_some_and(|v| !v.trim().is_empty());
if has_goal_id {
None
} else {
let action = map
.get("action")
.and_then(|v| v.as_str())
.unwrap_or("<missing>");
Some(DeterministicToolContractViolation {
reason: format!(
"action `{}` requires `goal_id` for `scheduled_goal_runs`",
action
),
coaching: "If the user asked to learn/remember/save facts, use `remember_fact`. \
If they asked about scheduled-goal runs, first get IDs with \
`manage_memories(action='list_scheduled')`, then call `scheduled_goal_runs` with `goal_id`."
.to_string(),
})
}
}
fn manage_memories_missing_goal_id_violation(
raw_arguments: &str,
) -> Option<DeterministicToolContractViolation> {
const ACTIONS_REQUIRING_GOAL_ID: &[&str] = &[
"add_schedule",
"cancel_scheduled",
"pause_scheduled",
"resume_scheduled",
"retry_scheduled",
"diagnose_scheduled",
];
let parsed = serde_json::from_str::<Value>(raw_arguments).ok()?;
let map = parsed.as_object()?;
let action = map.get("action").and_then(|v| v.as_str())?;
if !ACTIONS_REQUIRING_GOAL_ID.contains(&action) {
return None;
}
let has_goal_id = map
.get("goal_id")
.and_then(|v| v.as_str())
.is_some_and(|v| !v.trim().is_empty());
if has_goal_id {
return None;
}
Some(DeterministicToolContractViolation {
reason: format!(
"action `{}` requires `goal_id` for `manage_memories`",
action
),
coaching: "If the user asked about scheduled goals, first get IDs with \
`manage_memories(action='list_scheduled')`, then retry the intended \
`manage_memories` action with a concrete `goal_id`."
.to_string(),
})
}
fn deterministic_tool_contract_violation(
tool_name: &str,
raw_arguments: &str,
) -> Option<DeterministicToolContractViolation> {
match tool_name {
"scheduled_goal_runs" => scheduled_goal_runs_missing_goal_id_violation(raw_arguments),
"manage_memories" => manage_memories_missing_goal_id_violation(raw_arguments),
_ => None,
}
}
fn tool_is_currently_exposed(tool_defs: &[Value], tool_name: &str) -> bool {
tool_defs.iter().any(|def| {
def.get("function")
.and_then(|function| function.get("name"))
.and_then(|name| name.as_str())
.is_some_and(|exposed_name| exposed_name == tool_name)
})
}
fn blocked_for_untrusted_external_reference_message(
tool_name: &str,
active_skills: &[String],
) -> String {
let scope = if active_skills.is_empty() {
"an untrusted external API guide reference".to_string()
} else {
format!(
"untrusted external API guide skill(s): {}",
active_skills.join(", ")
)
};
format!(
"Blocked: `{}` is unavailable while using {}. \
Use API/auth tools directly, or ask explicitly for local file or repository inspection if you want me to read local files or inspect the local environment.",
tool_name, scope
)
}
fn allow_scaffold_parent_dir_for_target(
tool_name: &str,
allowed_target: &ToolTargetHint,
candidate_target: &ToolTargetHint,
) -> bool {
if tool_name != "run_command" {
return false;
}
let Some(scope_path) = crate::tools::fs_utils::validate_path(&allowed_target.value).ok() else {
return false;
};
if scope_path.is_dir() {
return false;
}
let Some(candidate_path) = crate::tools::fs_utils::validate_path(&candidate_target.value).ok()
else {
return false;
};
scope_path
.parent()
.is_some_and(|parent| parent.is_dir() && candidate_path == parent)
}
fn target_hint_allowed_for_step(
tool_name: &str,
allowed_target: &ToolTargetHint,
candidate_target: &ToolTargetHint,
) -> bool {
match (&allowed_target.kind, &candidate_target.kind) {
(ToolTargetHintKind::Url, ToolTargetHintKind::Url) => allowed_target
.value
.eq_ignore_ascii_case(&candidate_target.value),
(
ToolTargetHintKind::Path | ToolTargetHintKind::ProjectScope,
ToolTargetHintKind::Path | ToolTargetHintKind::ProjectScope,
) => {
allowed_target.value == candidate_target.value
|| scope_allows_project_dir(&allowed_target.value, &candidate_target.value)
|| allow_scaffold_parent_dir_for_target(tool_name, allowed_target, candidate_target)
|| is_recognized_project_root(&candidate_target.value)
}
_ => false,
}
}
fn json_contains_string_value(value: &Value, expected: &str) -> bool {
match value {
Value::String(candidate) => candidate.eq_ignore_ascii_case(expected),
Value::Array(items) => items
.iter()
.any(|item| json_contains_string_value(item, expected)),
Value::Object(map) => map
.values()
.any(|value| json_contains_string_value(value, expected)),
_ => false,
}
}
fn linear_intent_step_matches_tool_call(
step: &crate::agent::execution_state::LinearIntentStep,
tool_name: &str,
effective_arguments: &str,
) -> bool {
if !step.tool.eq_ignore_ascii_case(tool_name) {
return false;
}
if step.target.is_empty() {
return true;
}
let candidate_targets = extract_target_hints_from_arguments(effective_arguments);
if candidate_targets
.iter()
.any(|target| target.value.eq_ignore_ascii_case(&step.target))
{
return true;
}
serde_json::from_str::<Value>(effective_arguments)
.ok()
.is_some_and(|value| json_contains_string_value(&value, &step.target))
}
fn target_scope_violation_for_tool_call(
tool_name: &str,
effective_arguments: &str,
step_plan: &StepExecutionPlan,
) -> Option<String> {
if !step_plan.target_scope.hard_fail_outside_scope
|| step_plan.target_scope.allowed_targets.is_empty()
{
return None;
}
let candidate_targets = extract_target_hints_from_arguments(effective_arguments);
if candidate_targets.is_empty() {
return None;
}
let violations: Vec<String> = candidate_targets
.iter()
.filter(|candidate_target| {
!step_plan
.target_scope
.allowed_targets
.iter()
.any(|allowed_target| {
target_hint_allowed_for_step(tool_name, allowed_target, candidate_target)
})
})
.map(|target| target.value.clone())
.collect();
if violations.is_empty() {
None
} else {
let allowed_targets = step_plan
.target_scope
.allowed_targets
.iter()
.map(|target| target.value.as_str())
.collect::<Vec<_>>()
.join(", ");
Some(format!(
"target scope lock violation (allowed target(s): {}, requested target(s): {})",
allowed_targets,
violations.join(", ")
))
}
}
fn is_hard_policy_tool_budget_reached(
total_tool_calls_attempted: usize,
policy_tool_budget: usize,
) -> bool {
policy_tool_budget > 0 && total_tool_calls_attempted >= policy_tool_budget
}
fn tool_result_indicates_background_detach(
tool_name: &str,
result_text: &str,
metadata: &crate::traits::ToolCallMetadata,
) -> bool {
let _ = tool_name;
if metadata.background_started {
return true;
}
result_text.contains("Moved to background")
|| result_text.contains("started in background")
|| result_text.contains("spawned in background")
}
fn build_background_detach_ack(
tool_name: &str,
result_text: &str,
metadata: &crate::traits::ToolCallMetadata,
) -> String {
let default_prefix = match tool_name {
"terminal" => "The command is running in the background.",
"cli_agent" => "The CLI agent task is running in the background.",
"spawn_agent" => "The spawned sub-agent is running in the background.",
_ => "The task is running in the background.",
};
let first_line = crate::traits::first_primary_message_line(result_text, &[])
.unwrap_or(default_prefix.to_string());
let notifications_active = metadata.completion_notifications_enabled;
if notifications_active {
format!(
"{} Completion notifications are enabled, and the final result will be sent automatically when it finishes.",
first_line
)
} else {
first_line.to_string()
}
}
fn run_command_policy_block_requires_terminal(result_text: &str) -> bool {
let lower = result_text.to_ascii_lowercase();
lower.contains("safe command list")
|| lower.contains("use 'terminal' for this command")
|| lower.contains("use `terminal`")
|| lower.contains("shell operators")
|| lower.contains("daemonization primitives are blocked in run_command")
}
fn shell_single_quote(value: &str) -> String {
if value.is_empty() {
return "''".to_string();
}
let mut quoted = String::with_capacity(value.len() + 2);
quoted.push('\'');
for ch in value.chars() {
if ch == '\'' {
quoted.push_str("'\"'\"'");
} else {
quoted.push(ch);
}
}
quoted.push('\'');
quoted
}
fn build_terminal_fallback_arguments_from_run_command(raw_arguments: &str) -> Option<String> {
let args = serde_json::from_str::<Value>(raw_arguments).ok()?;
let map = args.as_object()?;
let command = map.get("command").and_then(|v| v.as_str())?.trim();
if command.is_empty() {
return None;
}
let working_dir = map
.get("working_dir")
.and_then(|v| v.as_str())
.map(str::trim)
.filter(|v| !v.is_empty());
let terminal_command = if let Some(dir) = working_dir {
format!("cd {} && {}", shell_single_quote(dir), command)
} else {
command.to_string()
};
Some(
json!({
"action": "run",
"command": terminal_command,
})
.to_string(),
)
}
fn is_trivial_success_excerpt(s: &str) -> bool {
let lower = s.trim().to_ascii_lowercase();
lower.is_empty()
|| lower == "ok"
|| lower == "done"
|| lower == "success"
|| lower == "completed"
|| lower == "completed successfully"
|| lower == "request completed successfully"
}
fn summarize_completed_tool_result(result_text: &str) -> String {
let summary = crate::traits::first_primary_message_line(result_text, &[])
.filter(|line| !line.trim().is_empty())
.unwrap_or_else(|| "Completed".to_string());
truncate_str(summary.trim(), TOOL_COMPLETE_SUMMARY_MAX_CHARS)
}
fn has_nonzero_exit_code(text: &str) -> bool {
if let Some(pos) = text.to_ascii_lowercase().find("[exit code:") {
let after = &text[pos + 11..];
let code_str: String = after
.chars()
.take_while(|c| c.is_ascii_digit() || *c == ' ')
.collect();
if let Ok(code) = code_str.trim().parse::<i32>() {
return code != 0;
}
}
false
}
fn build_external_action_completion_ack(result_text: &str) -> String {
let primary = crate::traits::extract_primary_message_content(result_text, &[]);
let excerpt = primary.trim();
let has_error = has_nonzero_exit_code(excerpt);
let status = if has_error {
"The requested action finished with errors."
} else {
"The requested action completed successfully."
};
if excerpt.is_empty() || is_trivial_success_excerpt(excerpt) {
status.to_string()
} else {
format!(
"{}\n\nLatest result:\n{}",
status,
truncate_with_note(excerpt, EXTERNAL_ACTION_ACK_MAX_CHARS)
)
}
}
fn should_build_external_action_ack(result_text: &str) -> bool {
let primary = crate::traits::extract_primary_message_content(result_text, &[]);
let lower = primary.trim_start().to_ascii_lowercase();
!lower.starts_with("request blocked:")
&& !lower.starts_with("blocked:")
&& !lower.starts_with("[system] blocked:")
&& !lower.starts_with("error:")
&& !lower.starts_with("failed to ")
}
fn tool_result_contains_verifiable_evidence(
semantics: &ToolCallSemantics,
result_text: &str,
) -> bool {
if !semantics.can_verify_with_result_content() {
return false;
}
let primary = crate::traits::extract_primary_message_content(result_text, &[]);
let primary = primary.trim();
!primary.is_empty()
&& !matches!(
primary.to_ascii_lowercase().as_str(),
"ok" | "done" | "success" | "completed" | "completed successfully"
)
}
fn normalized_target_value(value: &str) -> String {
value
.trim()
.trim_end_matches('/')
.trim_end_matches(['.', ',', ';'])
.to_ascii_lowercase()
}
fn tool_target_hint_matches_contract_target(
target_hint: &ToolTargetHint,
contract_target: &VerificationTarget,
) -> bool {
let compatible_kind = matches!(
(target_hint.kind, contract_target.kind),
(ToolTargetHintKind::Url, VerificationTargetKind::Url)
| (ToolTargetHintKind::Path, VerificationTargetKind::Path)
| (
ToolTargetHintKind::ProjectScope,
VerificationTargetKind::ProjectScope
)
| (
ToolTargetHintKind::Path,
VerificationTargetKind::ProjectScope
)
| (
ToolTargetHintKind::ProjectScope,
VerificationTargetKind::Path
)
);
if !compatible_kind {
return false;
}
let hint = normalized_target_value(&target_hint.value);
let contract = normalized_target_value(&contract_target.value);
if hint.is_empty() || contract.is_empty() {
return false;
}
hint == contract || hint.contains(&contract) || contract.contains(&hint)
}
fn verification_target_matches_haystack(target: &VerificationTarget, haystack: &str) -> bool {
let haystack = haystack.to_ascii_lowercase();
let needle = normalized_target_value(&target.value);
if needle.is_empty() {
return false;
}
if haystack.contains(&needle) {
return true;
}
match target.kind {
VerificationTargetKind::ProjectScope | VerificationTargetKind::Path => target
.value
.rsplit(['/', '\\'])
.find(|segment| !segment.is_empty())
.map(normalized_target_value)
.is_some_and(|tail| !tail.is_empty() && haystack.contains(&tail)),
VerificationTargetKind::Url => false,
}
}
fn observation_matches_completion_contract(
contract: &CompletionContract,
semantics: &ToolCallSemantics,
raw_arguments: &str,
result_text: &str,
) -> bool {
if contract.verification_targets.is_empty() {
return true;
}
if semantics.target_hints.iter().any(|hint| {
contract
.verification_targets
.iter()
.any(|target| tool_target_hint_matches_contract_target(hint, target))
}) {
return true;
}
let mut haystacks = vec![
raw_arguments.to_string(),
crate::traits::extract_primary_message_content(result_text, &[]).to_string(),
result_text.to_string(),
];
if let Some(command) = extract_command_from_args(raw_arguments) {
haystacks.push(command);
}
contract.verification_targets.iter().any(|target| {
haystacks
.iter()
.any(|haystack| verification_target_matches_haystack(target, haystack))
})
}
impl Agent {
pub(in crate::agent) async fn run_tool_execution_phase(
&self,
ctx: &mut ToolExecutionCtx<'_>,
) -> anyhow::Result<ToolExecutionOutcome> {
let resp = ctx.resp;
let emitter = ctx.emitter;
let task_id = ctx.task_id;
let session_id = ctx.session_id;
let iteration = ctx.iteration;
let task_start = ctx.task_start;
let learning_ctx = &mut *ctx.learning_ctx;
let task_tokens_used = ctx.task_tokens_used;
let _user_text = ctx.user_text;
let restrict_to_personal_memory_tools = ctx.restrict_to_personal_memory_tools;
let active_skill_names = ctx.active_skill_names;
let active_untrusted_external_reference_skills =
ctx.active_untrusted_external_reference_skills;
let restrict_untrusted_external_reference_tools =
ctx.restrict_untrusted_external_reference_tools;
let is_reaffirmation_challenge_turn = ctx.is_reaffirmation_challenge_turn;
let personal_memory_tool_call_cap = ctx.personal_memory_tool_call_cap;
let base_tool_defs = ctx.base_tool_defs;
let available_capabilities = ctx.available_capabilities;
let policy_bundle = ctx.policy_bundle;
let status_tx = ctx.status_tx.clone();
let channel_ctx = ctx.channel_ctx;
let user_role = ctx.user_role;
let heartbeat = ctx.heartbeat;
let turn_context = ctx.turn_context;
let resolved_goal_id = ctx.resolved_goal_id;
let evidence_state = &mut *ctx.evidence_state;
let validation_state = &mut *ctx.validation_state;
let mut tool_defs = std::mem::take(ctx.tool_defs);
let mut total_tool_calls_attempted = *ctx.total_tool_calls_attempted;
let mut total_successful_tool_calls = *ctx.total_successful_tool_calls;
let mut tool_failure_count = std::mem::take(ctx.tool_failure_count);
let mut tool_failure_signatures = std::mem::take(ctx.tool_failure_signatures);
let mut tool_transient_failure_count = std::mem::take(ctx.tool_transient_failure_count);
let mut tool_cooldown_until_iteration = std::mem::take(ctx.tool_cooldown_until_iteration);
let mut tool_call_count = std::mem::take(ctx.tool_call_count);
let mut personal_memory_tool_calls = *ctx.personal_memory_tool_calls;
let mut no_evidence_result_streak = *ctx.no_evidence_result_streak;
let mut no_evidence_tools_seen = std::mem::take(ctx.no_evidence_tools_seen);
let mut evidence_gain_count = *ctx.evidence_gain_count;
let mut pending_error_solution_ids = std::mem::take(ctx.pending_error_solution_ids);
let mut tool_error_history = std::mem::take(ctx.tool_error_history);
let mut reflection_completed = std::mem::take(ctx.reflection_completed);
let mut pending_reflection_recoveries = std::mem::take(ctx.pending_reflection_recoveries);
let mut tool_failure_patterns = std::mem::take(ctx.tool_failure_patterns);
let mut last_tool_failure = std::mem::take(ctx.last_tool_failure);
let mut in_session_learned = std::mem::take(ctx.in_session_learned);
let mut unknown_tools = std::mem::take(ctx.unknown_tools);
let mut recent_tool_calls = std::mem::take(ctx.recent_tool_calls);
let mut consecutive_same_tool = std::mem::take(ctx.consecutive_same_tool);
let mut consecutive_same_tool_arg_hashes =
std::mem::take(ctx.consecutive_same_tool_arg_hashes);
let mut force_text_response = *ctx.force_text_response;
let mut pending_system_messages = std::mem::take(ctx.pending_system_messages);
let mut recent_tool_names = std::mem::take(ctx.recent_tool_names);
let mut successful_send_file_keys = std::mem::take(ctx.successful_send_file_keys);
let mut cli_agent_boundary_injected = *ctx.cli_agent_boundary_injected;
let mut pending_background_ack = std::mem::take(ctx.pending_background_ack);
let mut pending_external_action_ack: Option<String> = None;
let mut stall_count = *ctx.stall_count;
let mut deferred_no_tool_streak = *ctx.deferred_no_tool_streak;
let mut consecutive_clean_iterations = *ctx.consecutive_clean_iterations;
let mut fallback_expanded_once = *ctx.fallback_expanded_once;
let mut known_project_dir = std::mem::take(ctx.known_project_dir);
let mut dirs_with_project_inspect_file_evidence =
std::mem::take(ctx.dirs_with_project_inspect_file_evidence);
let mut dirs_with_search_no_matches = std::mem::take(ctx.dirs_with_search_no_matches);
let mut require_file_recheck_before_answer = *ctx.require_file_recheck_before_answer;
let mut completion_progress = ctx.completion_progress.clone();
let mut tool_result_cache = std::mem::take(ctx.tool_result_cache);
let execution_state = &mut *ctx.execution_state;
macro_rules! commit_state {
() => {
*ctx.tool_defs = tool_defs;
*ctx.total_tool_calls_attempted = total_tool_calls_attempted;
*ctx.total_successful_tool_calls = total_successful_tool_calls;
*ctx.tool_failure_count = tool_failure_count;
*ctx.tool_failure_signatures = tool_failure_signatures;
*ctx.tool_transient_failure_count = tool_transient_failure_count;
*ctx.tool_cooldown_until_iteration = tool_cooldown_until_iteration;
*ctx.tool_call_count = tool_call_count;
*ctx.personal_memory_tool_calls = personal_memory_tool_calls;
*ctx.no_evidence_result_streak = no_evidence_result_streak;
*ctx.no_evidence_tools_seen = no_evidence_tools_seen;
*ctx.evidence_gain_count = evidence_gain_count;
*ctx.pending_error_solution_ids = pending_error_solution_ids;
*ctx.tool_error_history = tool_error_history;
*ctx.reflection_completed = reflection_completed;
*ctx.pending_reflection_recoveries = pending_reflection_recoveries;
*ctx.tool_failure_patterns = tool_failure_patterns;
*ctx.last_tool_failure = last_tool_failure;
*ctx.in_session_learned = in_session_learned;
*ctx.unknown_tools = unknown_tools;
*ctx.recent_tool_calls = recent_tool_calls;
*ctx.consecutive_same_tool = consecutive_same_tool;
*ctx.consecutive_same_tool_arg_hashes = consecutive_same_tool_arg_hashes;
*ctx.force_text_response = force_text_response;
*ctx.pending_system_messages = pending_system_messages;
*ctx.recent_tool_names = recent_tool_names;
*ctx.successful_send_file_keys = successful_send_file_keys;
*ctx.cli_agent_boundary_injected = cli_agent_boundary_injected;
*ctx.pending_background_ack = pending_background_ack;
*ctx.pending_external_action_ack = pending_external_action_ack;
*ctx.stall_count = stall_count;
*ctx.deferred_no_tool_streak = deferred_no_tool_streak;
*ctx.consecutive_clean_iterations = consecutive_clean_iterations;
*ctx.fallback_expanded_once = fallback_expanded_once;
*ctx.known_project_dir = known_project_dir;
*ctx.dirs_with_project_inspect_file_evidence =
dirs_with_project_inspect_file_evidence;
*ctx.dirs_with_search_no_matches = dirs_with_search_no_matches;
*ctx.require_file_recheck_before_answer = require_file_recheck_before_answer;
*ctx.completion_progress = completion_progress.clone();
*ctx.tool_result_cache = tool_result_cache;
};
}
if known_project_dir.is_none() {
known_project_dir =
extract_project_dir_hint_with_aliases(_user_text, &self.path_aliases.projects);
}
let mut successful_tool_calls = 0;
let mut iteration_had_tool_failures = false;
let mut hard_block_streak: usize = 0;
info!(
session_id,
iteration,
tool_count = resp.tool_calls.len(),
total_successful_tool_calls,
"Tool execution phase starting"
);
super::result_learning::expire_stale_pending_reflection_recoveries(
&mut pending_reflection_recoveries,
iteration,
);
for tc in &resp.tool_calls {
if let Some(limit) =
execution_state.exhausted_limit(task_tokens_used, task_start.elapsed())
{
force_text_response = true;
self.emit_warning_decision_point(
emitter,
task_id,
iteration,
DecisionType::ExecutionStateSnapshot,
format!(
"Stopped additional tool execution because {} is exhausted",
limit.as_str()
),
json!({
"condition": "execution_budget_exhausted_mid_iteration",
"budget_limit": limit,
"execution_state": execution_state.clone(),
"next_tool_blocked": tc.name,
}),
)
.await;
break;
}
let policy_tool_budget = policy_bundle.policy.tool_budget;
if self.policy_config.policy_enforce
&& is_hard_policy_tool_budget_reached(
total_tool_calls_attempted,
policy_tool_budget,
)
{
force_text_response = true;
pending_system_messages
.push(SystemDirective::HardPolicyToolBudgetReached { policy_tool_budget });
self.emit_decision_point(
emitter,
task_id,
iteration,
DecisionType::ToolBudgetBlock,
format!(
"Blocked tool {} because hard tool budget was reached",
tc.name
),
json!({
"tool": tc.name,
"policy_tool_budget": policy_tool_budget,
"total_tool_calls_attempted": total_tool_calls_attempted,
"reason": "hard_policy_tool_budget_reached"
}),
)
.await;
let result_text = ToolResultNotice::HardPolicyToolBudgetBlocked {
policy_tool_budget,
tool_name: tc.name.clone(),
}
.render();
let tool_msg = Message {
id: Uuid::new_v4().to_string(),
session_id: session_id.to_string(),
role: "tool".to_string(),
content: Some(result_text),
tool_call_id: Some(tc.id.clone()),
tool_name: Some(tc.name.clone()),
tool_calls_json: None,
created_at: Utc::now(),
importance: 0.2,
..Message::runtime_defaults()
};
self.append_tool_message_with_result_event(
emitter,
&tool_msg,
true,
0,
None,
Some(task_id),
)
.await?;
continue;
}
total_tool_calls_attempted = total_tool_calls_attempted.saturating_add(1);
let send_file_key = if tc.name == "send_file" {
extract_send_file_dedupe_key_from_args(&tc.arguments)
} else {
None
};
let is_personal_memory_tool_call = is_personal_memory_tool(&tc.name);
if restrict_to_personal_memory_tools {
if !is_personal_memory_tool_call {
let result_text = ToolResultNotice::PersonalMemoryToolsOnly {
tool_name: tc.name.clone(),
}
.render();
let tool_msg = Message {
id: Uuid::new_v4().to_string(),
session_id: session_id.to_string(),
role: "tool".to_string(),
content: Some(result_text),
tool_call_id: Some(tc.id.clone()),
tool_name: Some(tc.name.clone()),
tool_calls_json: None,
created_at: Utc::now(),
importance: 0.1,
..Message::runtime_defaults()
};
self.append_tool_message_with_result_event(
emitter,
&tool_msg,
true,
0,
None,
Some(task_id),
)
.await?;
continue;
}
if personal_memory_tool_calls >= personal_memory_tool_call_cap {
force_text_response = true;
pending_system_messages
.push(SystemDirective::PersonalMemoryRecheckLimitReached);
let result_text =
"Targeted personal-memory re-check limit reached. No further tool calls are allowed for this question."
.to_string();
let tool_msg = Message {
id: Uuid::new_v4().to_string(),
session_id: session_id.to_string(),
role: "tool".to_string(),
content: Some(result_text),
tool_call_id: Some(tc.id.clone()),
tool_name: Some(tc.name.clone()),
tool_calls_json: None,
created_at: Utc::now(),
importance: 0.2,
..Message::runtime_defaults()
};
self.append_tool_message_with_result_event(
emitter,
&tool_msg,
true,
0,
None,
Some(task_id),
)
.await?;
continue;
}
personal_memory_tool_calls = personal_memory_tool_calls.saturating_add(1);
}
if restrict_untrusted_external_reference_tools
&& crate::agent::is_untrusted_external_reference_blocked_tool(&tc.name)
{
let result_text = blocked_for_untrusted_external_reference_message(
&tc.name,
active_untrusted_external_reference_skills,
);
let tool_msg = Message {
id: Uuid::new_v4().to_string(),
session_id: session_id.to_string(),
role: "tool".to_string(),
content: Some(result_text),
tool_call_id: Some(tc.id.clone()),
tool_name: Some(tc.name.clone()),
tool_calls_json: None,
created_at: Utc::now(),
importance: 0.15,
..Message::runtime_defaults()
};
self.append_tool_message_with_result_event(
emitter,
&tool_msg,
true,
0,
None,
Some(task_id),
)
.await?;
iteration_had_tool_failures = true;
continue;
}
let tool_is_known_but_hidden = !tool_is_currently_exposed(&tool_defs, &tc.name)
&& (tool_is_currently_exposed(base_tool_defs, &tc.name)
|| self.has_registered_tool(&tc.name));
if tool_is_known_but_hidden {
*tool_call_count.entry(tc.name.clone()).or_insert(0) += 1;
self.emit_decision_point(
emitter,
task_id,
iteration,
DecisionType::ToolBudgetBlock,
format!(
"Blocked tool {} because it is not currently exposed",
tc.name
),
json!({
"tool": tc.name,
"reason": "tool_not_currently_exposed",
}),
)
.await;
let result_text = ToolResultNotice::ToolNotCurrentlyExposed {
tool_name: tc.name.clone(),
}
.render();
let tool_msg = Message {
id: Uuid::new_v4().to_string(),
session_id: session_id.to_string(),
role: "tool".to_string(),
content: Some(result_text),
tool_call_id: Some(tc.id.clone()),
tool_name: Some(tc.name.clone()),
tool_calls_json: None,
created_at: Utc::now(),
importance: 0.15,
..Message::runtime_defaults()
};
self.append_tool_message_with_result_event(
emitter,
&tool_msg,
true,
0,
None,
Some(task_id),
)
.await?;
iteration_had_tool_failures = true;
continue;
}
let mut effective_arguments = tc.arguments.clone();
let mut injected_project_dir: Option<String> = None;
if let Some(explicit_dir) = project_dir_from_tool_args(&tc.name, &effective_arguments) {
known_project_dir = Some(explicit_dir);
}
if let Some((updated_args, injected)) = maybe_inject_project_dir_into_tool_args(
&tc.name,
&effective_arguments,
known_project_dir.as_deref(),
) {
effective_arguments = updated_args;
injected_project_dir = Some(injected);
}
let attempted_required_file_recheck = require_file_recheck_before_answer
&& is_file_recheck_tool(&tc.name)
&& tool_call_includes_project_path(&tc.name, &effective_arguments);
let internal_scope_violation =
raw_internal_scope_violation(&tc.arguments, session_id, resolved_goal_id);
let allowed_project_scope = (!turn_context.allow_multi_project_scope)
.then_some(turn_context.primary_project_scope.as_deref())
.flatten();
let call_semantics = self
.tools
.iter()
.find(|tool| tool.name() == tc.name && tool.is_available())
.map(|tool| tool.call_semantics(&effective_arguments))
.unwrap_or_default();
let tool_caps = available_capabilities
.get(&tc.name)
.copied()
.unwrap_or_default();
let step_plan = compile_step_execution_plan(
&execution_state.execution_id,
execution_state.current_plan_version.unwrap_or(1),
iteration,
&tc.id,
&tc.name,
&effective_arguments,
&call_semantics,
tool_caps,
allowed_project_scope,
);
execution_state.begin_step(step_plan.clone());
if matches!(
step_plan.approval_requirement,
ApprovalRequirement::Required { .. }
) || call_semantics.mutates_state()
|| tool_caps.external_side_effect
{
execution_state.promote_persistence(ExecutionPersistence::Durable);
}
execution_state.mark_persisted_now();
self.emit_decision_point(
emitter,
task_id,
iteration,
DecisionType::ExecutionStateSnapshot,
format!("Compiled execution step for {}", tc.name),
json!({
"condition": "step_compiled",
"execution_id": execution_state.execution_id,
"current_step": step_plan,
"execution_state": execution_state.clone(),
}),
)
.await;
let step_scope_violation =
target_scope_violation_for_tool_call(&tc.name, &effective_arguments, &step_plan);
if let Some(scope_reason) = internal_scope_violation.or(step_scope_violation) {
POLICY_METRICS
.cross_scope_blocked_total
.fetch_add(1, Ordering::Relaxed);
*tool_call_count.entry(tc.name.clone()).or_insert(0) += 1;
let result_text = ToolResultNotice::ScopeLockBlockedResult {
tool_name: tc.name.clone(),
reason: scope_reason.clone(),
}
.render();
let tool_msg = Message {
id: Uuid::new_v4().to_string(),
session_id: session_id.to_string(),
role: "tool".to_string(),
content: Some(result_text.clone()),
tool_call_id: Some(tc.id.clone()),
tool_name: Some(tc.name.clone()),
tool_calls_json: None,
created_at: Utc::now(),
importance: 0.2,
..Message::runtime_defaults()
};
self.append_tool_message_with_result_event(
emitter,
&tool_msg,
true,
0,
None,
Some(task_id),
)
.await?;
validation_state.record_failure(ValidationFailure::ScopeViolation);
validation_state.note_replan();
learning_ctx.record_replay_note(
ReplayNoteCategory::ValidationFailure,
"target_scope_violation",
format!(
"Blocked {} because the requested target fell outside the compiled step scope.",
tc.name
),
true,
);
learning_ctx.record_replay_note(
ReplayNoteCategory::RetryReason,
"replan_required",
format!(
"Replanned because {} attempted to act outside the allowed target scope.",
tc.name
),
true,
);
pending_system_messages.push(SystemDirective::ScopeLockBlocked {
tool_name: tc.name.clone(),
reason: scope_reason,
});
self.emit_warning_decision_point(
emitter,
task_id,
iteration,
DecisionType::ExecutionFailureClassification,
format!("Classified scope violation for {}", tc.name),
json!({
"condition": "target_scope_violation",
"tool": tc.name,
"execution_failure_kind": ExecutionFailureKind::LogicFailure,
"failure_class": "semantic",
"key_error_line": "scope violation",
"loop_repetition_reason": validation_state.loop_repetition_reason,
}),
)
.await;
execution_state.complete_current_step(StepExecutionOutcome::NonrecoverableFailure);
execution_state.mark_persisted_now();
iteration_had_tool_failures = true;
continue;
}
if let Some(contract_violation) =
deterministic_tool_contract_violation(&tc.name, &effective_arguments)
{
*tool_call_count.entry(tc.name.clone()).or_insert(0) += 1;
let result_text = ToolResultNotice::DeterministicArgumentContractBlocked {
tool_name: tc.name.clone(),
reason: contract_violation.reason.clone(),
}
.render();
let tool_msg = Message {
id: Uuid::new_v4().to_string(),
session_id: session_id.to_string(),
role: "tool".to_string(),
content: Some(result_text.clone()),
tool_call_id: Some(tc.id.clone()),
tool_name: Some(tc.name.clone()),
tool_calls_json: None,
created_at: Utc::now(),
importance: 0.2,
..Message::runtime_defaults()
};
self.append_tool_message_with_result_event(
emitter,
&tool_msg,
true,
0,
None,
Some(task_id),
)
.await?;
pending_system_messages.push(SystemDirective::ArgumentContractBlocked {
tool_name: tc.name.clone(),
reason: contract_violation.reason.to_string(),
coaching: contract_violation.coaching.to_string(),
});
learning_ctx.record_replay_note(
ReplayNoteCategory::ValidationFailure,
"tool_contract_violation",
format!(
"Blocked {} because its arguments violated a deterministic contract: {}.",
tc.name, contract_violation.reason
),
true,
);
learning_ctx.record_replay_note(
ReplayNoteCategory::RetryReason,
"retry_step",
format!(
"Retried locally after deterministic contract failure on {}.",
tc.name
),
true,
);
self.emit_warning_decision_point(
emitter,
task_id,
iteration,
DecisionType::ExecutionFailureClassification,
format!("Classified deterministic contract failure for {}", tc.name),
json!({
"condition": "tool_contract_violation",
"tool": tc.name,
"execution_failure_kind": ExecutionFailureKind::ToolContractFailure,
"failure_class": "semantic",
"key_error_line": contract_violation.reason,
"loop_repetition_reason": "retry_step",
}),
)
.await;
execution_state.complete_current_step(StepExecutionOutcome::NonrecoverableFailure);
execution_state.mark_persisted_now();
iteration_had_tool_failures = true;
continue;
}
match self
.maybe_block_tool_by_budget(
tc,
&mut ToolBudgetBlockCtx {
emitter,
task_id,
session_id,
iteration,
tool_failure_count: &tool_failure_count,
tool_transient_failure_count: &tool_transient_failure_count,
tool_cooldown_until_iteration: &mut tool_cooldown_until_iteration,
tool_call_count: &tool_call_count,
unknown_tools: &unknown_tools,
},
)
.await?
{
ToolBlockKind::NotBlocked => {}
ToolBlockKind::Cooldown => {
continue;
}
ToolBlockKind::HardBlock => {
hard_block_streak += 1;
if hard_block_streak >= 3 {
force_text_response = true;
pending_system_messages.push(SystemDirective::HardToolLimitReached);
} else {
pending_system_messages.push(SystemDirective::SpecificToolBlocked {
tool_name: tc.name.clone(),
});
}
continue;
}
}
if let Some(guard_outcome) = self
.maybe_handle_loop_pattern_guards(
tc,
emitter,
task_id,
session_id,
iteration,
task_start,
task_tokens_used,
learning_ctx,
&mut recent_tool_calls,
&mut recent_tool_names,
&mut consecutive_same_tool,
&mut consecutive_same_tool_arg_hashes,
&tool_result_cache,
)
.await?
{
match guard_outcome {
LoopPatternGuardOutcome::ContinueLoop => {
continue;
}
LoopPatternGuardOutcome::Return(outcome) => {
if let Some(ref mut plan) = execution_state.active_linear_intent_plan {
if !plan.all_steps_complete() {
plan.complete_current_step_with_evidence(
"Force-advanced: stall detected".to_string(),
);
tracing::info!(
session_id,
advanced_step = plan.current_step_cursor - 1,
"Plan step force-advanced due to stall detection — \
returning to main loop for next iteration"
);
commit_state!();
return Ok(ToolExecutionOutcome::NextIteration);
}
}
commit_state!();
return Ok(outcome);
}
}
}
if self
.maybe_handle_duplicate_send_file_noop(
tc,
&mut DuplicateSendFileNoopCtx {
send_file_key: send_file_key.as_ref(),
successful_send_file_keys: &successful_send_file_keys,
session_id,
iteration,
effective_arguments: &effective_arguments,
force_text_response: &mut force_text_response,
pending_system_messages: &mut pending_system_messages,
successful_tool_calls: &mut successful_tool_calls,
total_successful_tool_calls: &mut total_successful_tool_calls,
tool_call_count: &mut tool_call_count,
learning_ctx,
emitter,
task_id,
policy_bundle,
},
)
.await?
{
continue;
}
let io = self
.execute_tool_call_io(
tc,
&ToolExecutionIoCtx {
effective_arguments: &effective_arguments,
idempotency_key: execution_state
.current_step
.as_ref()
.and_then(|step| step.idempotency_key.as_deref()),
injected_project_dir: injected_project_dir.as_deref(),
project_scope: allowed_project_scope,
session_id,
task_id,
status_tx: &status_tx,
channel_ctx,
user_role,
heartbeat,
emitter,
policy_bundle,
},
)
.await;
execution_state.record_tool_call();
execution_state.mark_persisted_now();
let mut result_text = io.result_text;
let mut tool_duration_ms = io.tool_duration_ms;
let mut result_metadata = io.result_metadata;
if tc.name == "run_command" && run_command_policy_block_requires_terminal(&result_text)
{
if let Some(terminal_args) =
build_terminal_fallback_arguments_from_run_command(&effective_arguments)
{
let fallback_started = Instant::now();
let terminal_result = self
.execute_tool_with_watchdog_outcome(
"terminal",
&terminal_args,
&tool_exec::ToolExecCtx {
session_id,
task_id: Some(task_id),
status_tx: status_tx.clone(),
channel_visibility: channel_ctx.visibility,
channel_id: channel_ctx.channel_id.as_deref(),
project_scope: allowed_project_scope,
trusted: channel_ctx.trusted,
user_role,
},
)
.await;
let fallback_duration =
fallback_started.elapsed().as_millis().min(u64::MAX as u128) as u64;
tool_duration_ms = tool_duration_ms.saturating_add(fallback_duration);
let fallback_note = ToolResultNotice::RunCommandPolicyAutoRoutedToTerminal;
result_text = match terminal_result {
Ok(outcome) => {
result_metadata = outcome.metadata;
format!("{}\n\n{}", outcome.output, fallback_note.render())
}
Err(e) => {
result_metadata.transport_error = Some(e.to_string());
format!("Error: {}\n\n{}", e, fallback_note.render())
}
};
if self.context_window_config.enabled {
result_text = crate::memory::context_window::compress_tool_result(
"terminal",
&result_text,
self.context_window_config.max_tool_result_chars,
);
}
}
}
let background_detached =
tool_result_indicates_background_detach(&tc.name, &result_text, &result_metadata);
if background_detached {
pending_background_ack = Some(build_background_detach_ack(
&tc.name,
&result_text,
&result_metadata,
));
force_text_response = true;
let notifications_active = result_metadata.completion_notifications_enabled;
let system_msg = SystemDirective::BackgroundHandoff {
notifications_active,
};
pending_system_messages.push(system_msg.clone());
result_text = format!("{}\n\n{}", result_text, system_msg.render());
}
*tool_call_count.entry(tc.name.clone()).or_insert(0) += 1;
if matches!(tc.name.as_str(), "read_file" | "search_files") {
let cache_hash = hash_tool_call(&tc.name, &tc.arguments);
let max_cache_chars = 8000;
let primary_result_text =
crate::traits::extract_primary_message_content(&result_text, &[]);
if primary_result_text.len() <= max_cache_chars
&& !result_text.starts_with("Error")
&& !crate::traits::message_content_is_structural_only(&result_text, &[])
{
tool_result_cache.insert(cache_hash, primary_result_text.into_owned());
} else if primary_result_text.len() > max_cache_chars {
let mut boundary = max_cache_chars;
while boundary > 0 && !primary_result_text.is_char_boundary(boundary) {
boundary -= 1;
}
tool_result_cache.insert(
cache_hash,
format!(
"{}…\n[truncated — {} total chars]",
&primary_result_text[..boundary],
primary_result_text.len()
),
);
}
const MAX_CACHE_ENTRIES: usize = 20;
if tool_result_cache.len() > MAX_CACHE_ENTRIES {
if let Some(key) = tool_result_cache.keys().next().copied() {
tool_result_cache.remove(&key);
}
}
}
let failure_class = classify_tool_result_failure_with_context(
&tc.name,
&result_text,
Some(&effective_arguments),
Some(&result_metadata),
);
let execution_failure_kind = classify_execution_failure_kind(
&tc.name,
&result_text,
Some(&effective_arguments),
Some(&result_metadata),
false,
);
let is_error = failure_class.is_some();
let tool_summary = format!(
"{}({})",
tc.name,
summarize_tool_args(&tc.name, &effective_arguments)
);
if is_error {
learning_ctx
.tool_calls
.push(format!("{} [FAILED]", tool_summary));
} else {
learning_ctx.tool_calls.push(tool_summary.clone());
}
execution_state.complete_current_step(classify_step_execution_outcome(
is_error,
background_detached,
));
execution_state.mark_persisted_now();
match execution_failure_kind {
Some(ExecutionFailureKind::ToolContractFailure)
| Some(ExecutionFailureKind::ToolInvocationFailure) => {
validation_state.note_retry(LoopRepetitionReason::RetryStep);
learning_ctx.record_replay_note(
ReplayNoteCategory::RetryReason,
"retry_step",
format!(
"Retried {} locally after {:?}.",
tc.name, execution_failure_kind
),
true,
);
}
Some(ExecutionFailureKind::EnvironmentFailure)
| Some(ExecutionFailureKind::LogicFailure) => {
validation_state.note_replan();
learning_ctx.record_replay_note(
ReplayNoteCategory::RetryReason,
"replan_required",
format!(
"Replanned after {:?} on {}.",
execution_failure_kind, tc.name
),
true,
);
}
None => {
validation_state.clear_loop_repetition_reason();
}
}
{
let caps = available_capabilities
.get(&tc.name)
.copied()
.unwrap_or_default();
let is_external_mutation =
caps.external_side_effect && result_metadata.semantics.mutates_state();
let error_summary = if is_error {
extract_error_summary_line(&result_text)
} else {
None
};
let planned_step = if is_external_mutation {
execution_state
.current_linear_intent_step()
.filter(|step| {
linear_intent_step_matches_tool_call(
step,
&tc.name,
&effective_arguments,
)
})
.cloned()
} else {
None
};
let expected_step_count = execution_state
.active_linear_intent_plan
.as_ref()
.map(|plan| plan.steps.len());
let plan_version = execution_state
.active_linear_intent_plan
.as_ref()
.map(|plan| plan.plan_version);
execution_state.record_outcome(OutcomeEntry {
tool_name: tc.name.clone(),
success: !is_error,
http_status: result_metadata.http_status,
is_external_mutation,
error_summary,
iteration,
plan_version,
planned_step_id: planned_step.as_ref().map(|s| s.step_id.clone()),
planned_step_index: planned_step.as_ref().map(|s| s.step_index),
planned_step_description: planned_step.as_ref().map(|s| s.description.clone()),
expected_step_count,
});
if !is_error && planned_step.is_some() {
execution_state.advance_linear_intent_step_after_external_success();
}
}
self.emit_decision_point(
emitter,
task_id,
iteration,
DecisionType::ExecutionStateSnapshot,
format!("Recorded step outcome for {}", tc.name),
json!({
"condition": "step_completed",
"tool": tc.name,
"outcome": execution_state.last_outcome,
"execution_state": execution_state.clone(),
"background_detached": background_detached,
"is_error": is_error,
}),
)
.await;
info!(
session_id,
iteration,
tool = %tc.name,
is_error,
execution_failure_kind = ?execution_failure_kind,
result_len = result_text.len(),
result_preview = &result_text.chars().take(80).collect::<String>() as &str,
"Tool execution completed"
);
if let Some(execution_failure_kind) = execution_failure_kind {
self.emit_warning_decision_point(
emitter,
task_id,
iteration,
DecisionType::ExecutionFailureClassification,
format!("Classified execution failure for {}", tc.name),
json!({
"condition": match execution_failure_kind {
ExecutionFailureKind::ToolContractFailure => "tool_contract_failure",
ExecutionFailureKind::ToolInvocationFailure => "tool_invocation_failure",
ExecutionFailureKind::EnvironmentFailure => "environment_failure",
ExecutionFailureKind::LogicFailure => "logic_failure",
},
"tool": tc.name,
"execution_failure_kind": execution_failure_kind,
"failure_class": failure_class.map(|class| match class {
ToolFailureClass::Semantic => "semantic",
ToolFailureClass::Transient => "transient",
}),
"key_error_line": extract_key_error_line(&result_text),
"loop_repetition_reason": validation_state.loop_repetition_reason,
}),
)
.await;
}
let learning_env = ResultLearningEnv {
attempted_required_file_recheck,
send_file_key,
restrict_to_personal_memory_tools,
is_reaffirmation_challenge_turn,
session_id,
task_id,
emitter,
task_start,
iteration,
tool_arguments: &effective_arguments,
tool_summary: &tool_summary,
};
let mut learning_state = ResultLearningState {
learning_ctx,
no_evidence_result_streak: &mut no_evidence_result_streak,
iteration_had_tool_failures: &mut iteration_had_tool_failures,
no_evidence_tools_seen: &mut no_evidence_tools_seen,
evidence_gain_count: &mut evidence_gain_count,
unknown_tools: &mut unknown_tools,
tool_failure_count: &mut tool_failure_count,
tool_failure_signatures: &mut tool_failure_signatures,
tool_transient_failure_count: &mut tool_transient_failure_count,
tool_cooldown_until_iteration: &mut tool_cooldown_until_iteration,
pending_error_solution_ids: &mut pending_error_solution_ids,
tool_error_history: &mut tool_error_history,
pending_reflection_recoveries: &mut pending_reflection_recoveries,
tool_failure_patterns: &mut tool_failure_patterns,
last_tool_failure: &mut last_tool_failure,
in_session_learned: &mut in_session_learned,
force_text_response: &mut force_text_response,
pending_system_messages: &mut pending_system_messages,
successful_tool_calls: &mut successful_tool_calls,
total_successful_tool_calls: &mut total_successful_tool_calls,
successful_send_file_keys: &mut successful_send_file_keys,
cli_agent_boundary_injected: &mut cli_agent_boundary_injected,
recent_tool_calls: &mut recent_tool_calls,
consecutive_same_tool: &mut consecutive_same_tool,
consecutive_same_tool_arg_hashes: &mut consecutive_same_tool_arg_hashes,
recent_tool_names: &mut recent_tool_names,
require_file_recheck_before_answer: &mut require_file_recheck_before_answer,
known_project_dir: &mut known_project_dir,
dirs_with_project_inspect_file_evidence:
&mut dirs_with_project_inspect_file_evidence,
dirs_with_search_no_matches: &mut dirs_with_search_no_matches,
};
let learning_outcome = self
.apply_result_learning(
tc,
&mut result_text,
is_error,
failure_class,
execution_failure_kind,
&learning_env,
&mut learning_state,
)
.await?;
if let Some(outcome) = learning_outcome.control_flow {
commit_state!();
return Ok(outcome);
}
if let Some(failure) = learning_outcome.semantic_failure.as_ref() {
if let Some(diagnosis) = self
.maybe_trigger_reflection(
&tc.name,
&effective_arguments,
failure,
_user_text,
active_skill_names,
&tool_error_history,
&mut reflection_completed,
session_id,
)
.await
{
let failure_key = (tc.name.clone(), failure.signature.clone());
pending_system_messages.push(SystemDirective::ReflectionDiagnosis {
tool_name: tc.name.clone(),
root_cause: diagnosis.root_cause.clone(),
recommended_action: diagnosis.recommended_action.clone(),
});
if let Some(draft) = diagnosis.learning {
if let Some(solution_id) =
super::reflection::store_reflection_learning(&self.state, draft).await
{
pending_reflection_recoveries.insert(
tc.name.clone(),
super::reflection::PendingReflectionRecovery {
signature: failure_key.1,
solution_ids: vec![solution_id],
verify_on_iteration: iteration.saturating_add(1),
},
);
}
}
}
}
if !is_error {
execution_state.extend_budget_on_progress();
send_status(
&status_tx,
StatusUpdate::ToolComplete {
name: tc.name.clone(),
summary: summarize_completed_tool_result(&result_text),
},
);
let caps = available_capabilities
.get(&tc.name)
.copied()
.unwrap_or_default();
let semantics = &result_metadata.semantics;
let evidence_count_before = evidence_state.records.len();
record_successful_tool_evidence(
evidence_state,
&tc.name,
&effective_arguments,
semantics,
);
let action_target = step_plan
.expected_targets
.first()
.or_else(|| step_plan.target_scope.allowed_targets.first())
.map(|target| target.value.clone());
validation_state.record_action(
Some(&tc.name),
action_target,
evidence_state.records.len() > evidence_count_before,
);
validation_state.clear_loop_repetition_reason();
if semantics.mutates_state() {
completion_progress.mark_mutation(&turn_context.completion_contract);
if caps.external_side_effect {
completion_progress.mark_successful_external_mutation();
}
}
if semantics.observes_state() {
let can_verify =
tool_result_contains_verifiable_evidence(semantics, &result_text);
let matched_contract = observation_matches_completion_contract(
&turn_context.completion_contract,
semantics,
&effective_arguments,
&result_text,
);
completion_progress.mark_observation(
&turn_context.completion_contract,
can_verify && matched_contract,
);
}
if tc.name == "send_file" && completion_progress.verification_pending {
completion_progress.verification_pending = false;
completion_progress.verification_count =
completion_progress.verification_count.saturating_add(1);
info!(
session_id,
iteration, "send_file success cleared verification_pending"
);
}
if completion_progress.verification_pending {
pending_external_action_ack = None;
} else if !background_detached
&& semantics.mutates_state()
&& caps.external_side_effect
&& should_build_external_action_ack(&result_text)
{
pending_external_action_ack =
Some(build_external_action_completion_ack(&result_text));
}
} else {
pending_external_action_ack = None;
let caps = available_capabilities
.get(&tc.name)
.copied()
.unwrap_or_default();
if caps.external_side_effect && result_metadata.semantics.mutates_state() {
completion_progress.mark_failed_external_mutation();
let error_hint = extract_error_summary_line(&result_text)
.unwrap_or_else(|| "request failed".to_string());
pending_system_messages.push(SystemDirective::ExternalMutationFailed {
tool_name: tc.name.clone(),
status_code: result_metadata.http_status,
error_hint,
});
}
if matches!(
execution_state.last_outcome,
Some(StepExecutionOutcome::NonrecoverableFailure)
) {
validation_state.record_failure(ValidationFailure::NonrecoverableFailure);
}
}
let tool_msg = Message {
content: Some(result_text.clone()),
tool_call_id: Some(tc.id.clone()),
tool_name: Some(tc.name.clone()),
importance: 0.3, ..Message::new_runtime(Uuid::new_v4().to_string(), session_id, "tool")
};
self.append_tool_message_with_result_event(
emitter,
&tool_msg,
!is_error,
tool_duration_ms,
if is_error {
Some(result_text.clone())
} else {
None
},
Some(task_id),
)
.await?;
let direct_response = if !is_error
&& self.depth == 0
&& resp.tool_calls.len() == 1
&& !background_detached
{
result_metadata.direct_response.clone()
} else {
None
};
if let Some(reply) = direct_response {
let assistant_msg = Message {
id: Uuid::new_v4().to_string(),
session_id: session_id.to_string(),
role: "assistant".to_string(),
content: Some(reply.clone()),
tool_call_id: None,
tool_name: None,
tool_calls_json: None,
created_at: Utc::now(),
importance: 0.5,
..Message::runtime_defaults()
};
self.append_assistant_message_with_event(
emitter,
&assistant_msg,
"system",
None,
None,
)
.await?;
self.emit_task_end(
emitter,
task_id,
TaskStatus::Completed,
task_start,
iteration,
learning_ctx.tool_calls.len(),
None,
Some(reply.chars().take(200).collect()),
)
.await;
learning_ctx.completed_naturally = true;
let learning_ctx_for_task = learning_ctx.clone();
let state = self.state.clone();
tokio::spawn(async move {
if let Err(e) = post_task::process_learning(&state, learning_ctx_for_task).await
{
warn!("Learning failed: {}", e);
}
});
commit_state!();
return Ok(ToolExecutionOutcome::Return(Ok(reply)));
}
if is_error {
let _ = emitter
.emit(
EventType::Error,
ErrorData::tool_error(
tc.name.clone(),
result_text.clone(),
Some(task_id.to_string()),
),
)
.await;
}
if let Some(ref tid) = self.task_id {
let activity = TaskActivity {
id: 0,
task_id: tid.clone(),
activity_type: "tool_call".to_string(),
tool_name: Some(tc.name.clone()),
tool_args: Some(effective_arguments.chars().take(1000).collect()),
result: Some(result_text.chars().take(2000).collect()),
success: Some(!is_error),
tokens_used: None,
created_at: chrono::Utc::now().to_rfc3339(),
};
if let Err(e) = self.state.log_task_activity(&activity).await {
warn!(task_id = %tid, error = %e, "Failed to log task activity");
}
}
if background_detached {
info!(
session_id,
iteration,
tool = %tc.name,
"Background task detached; ending tool execution phase early and forcing text response"
);
break;
}
}
info!(
session_id,
iteration,
successful_tool_calls,
iteration_had_tool_failures,
total_successful_tool_calls,
stall_count,
"Tool execution phase completed, entering post-loop"
);
self.apply_post_tool_iteration_controls(
super::post_loop::PostToolIterationInputs {
session_id,
iteration,
task_tokens_used,
successful_tool_calls,
iteration_had_tool_failures,
restrict_to_personal_memory_tools,
base_tool_defs,
available_capabilities,
policy_bundle,
total_tool_calls_attempted,
has_active_goal: resolved_goal_id.is_some(),
completed_tool_calls: &learning_ctx.tool_calls,
recent_tool_names: &recent_tool_names,
user_text: _user_text,
},
super::post_loop::PostToolIterationState {
total_successful_tool_calls: &mut total_successful_tool_calls,
force_text_response: &mut force_text_response,
pending_system_messages: &mut pending_system_messages,
tool_defs: &mut tool_defs,
stall_count: &mut stall_count,
deferred_no_tool_streak: &mut deferred_no_tool_streak,
consecutive_clean_iterations: &mut consecutive_clean_iterations,
fallback_expanded_once: &mut fallback_expanded_once,
},
);
commit_state!();
Ok(ToolExecutionOutcome::NextIteration)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::agent::execution_state::{
default_execution_budget, BudgetTier, ExecutionPersistence, ExecutionState, RetryPolicy,
};
use crate::traits::ToolCallEffect;
#[test]
fn internal_scope_violation_detects_session_mismatch() {
let raw = r#"{"_session_id":"other-session"}"#;
let violation = raw_internal_scope_violation(raw, "expected-session", None);
assert!(violation.is_some());
let message = violation.unwrap_or_default();
assert!(message.contains("_session_id mismatch"));
}
#[test]
fn internal_scope_violation_detects_goal_mismatch() {
let raw = r#"{"_goal_id":"goal-2"}"#;
let violation = raw_internal_scope_violation(raw, "s", Some("goal-1"));
assert!(violation.is_some());
let message = violation.unwrap_or_default();
assert!(message.contains("_goal_id mismatch"));
}
#[test]
fn deterministic_tool_contract_violation_blocks_scheduled_goal_runs_without_goal_id() {
let args = r#"{"action":"run_history"}"#;
let violation = deterministic_tool_contract_violation("scheduled_goal_runs", args);
assert!(violation.is_some());
let violation = violation.expect("violation expected");
assert!(violation.reason.contains("requires `goal_id`"));
assert!(violation.coaching.contains("remember_fact"));
}
#[test]
fn deterministic_tool_contract_violation_allows_scheduled_goal_runs_with_goal_id() {
let args = r#"{"action":"run_history","goal_id":"goal-123"}"#;
let violation = deterministic_tool_contract_violation("scheduled_goal_runs", args);
assert!(violation.is_none());
}
#[test]
fn deterministic_tool_contract_violation_blocks_manage_memories_without_goal_id() {
let args = r#"{"action":"diagnose_scheduled"}"#;
let violation = deterministic_tool_contract_violation("manage_memories", args);
assert!(violation.is_some());
let violation = violation.expect("violation expected");
assert!(violation.reason.contains("requires `goal_id`"));
assert!(violation.coaching.contains("list_scheduled"));
}
#[test]
fn deterministic_tool_contract_violation_allows_manage_memories_with_goal_id() {
let args = r#"{"action":"diagnose_scheduled","goal_id":"goal-123"}"#;
let violation = deterministic_tool_contract_violation("manage_memories", args);
assert!(violation.is_none());
}
#[test]
fn tool_is_currently_exposed_matches_current_tool_defs() {
let tool_defs = vec![
json!({
"type": "function",
"function": {
"name": "system_info",
"description": "demo",
"parameters": {
"type": "object",
"properties": {},
"additionalProperties": false
}
}
}),
json!({
"type": "function",
"function": {
"name": "remember_fact",
"description": "demo",
"parameters": {
"type": "object",
"properties": {},
"additionalProperties": false
}
}
}),
];
assert!(tool_is_currently_exposed(&tool_defs, "system_info"));
assert!(!tool_is_currently_exposed(&tool_defs, "cli_agent"));
}
#[test]
fn result_content_verification_requires_semantics_opt_in() {
let verifyable = ToolCallSemantics::observation()
.with_verification_mode(crate::traits::ToolVerificationMode::ResultContent);
let non_verifyable = ToolCallSemantics::observation();
assert!(tool_result_contains_verifiable_evidence(
&verifyable,
"Latest post title: Scheduled reflection"
));
assert!(!tool_result_contains_verifiable_evidence(
&non_verifyable,
"Latest post title: Scheduled reflection"
));
}
#[test]
fn semantics_target_hints_match_contract_targets() {
let contract = CompletionContract {
requires_observation: true,
verification_targets: vec![VerificationTarget {
kind: VerificationTargetKind::Url,
value: "https://blog.aidaemon.ai".to_string(),
}],
..CompletionContract::default()
};
let semantics = ToolCallSemantics::observation()
.with_verification_mode(crate::traits::ToolVerificationMode::ResultContent)
.with_target_hint(ToolTargetHintKind::Url, "https://blog.aidaemon.ai");
assert!(observation_matches_completion_contract(
&contract,
&semantics,
"{}",
"Latest post title: Scheduled reflection"
));
}
#[test]
fn target_scope_violation_flags_out_of_scope_mutation_path() {
let step_plan = StepExecutionPlan {
step_id: "step-1".to_string(),
description: "Edit a scoped file".to_string(),
plan_version: 1,
primary_tool: Some("edit_file".to_string()),
expected_effect: ToolCallEffect::Mutation,
target_scope: TargetScope {
allowed_targets: vec![ToolTargetHint::new(
ToolTargetHintKind::ProjectScope,
"/tmp/project-a",
)
.expect("scope target")],
hard_fail_outside_scope: true,
},
expected_targets: Vec::new(),
retry_policy: RetryPolicy {
max_attempts: 1,
allow_tool_invocation_retry: false,
},
approval_requirement: ApprovalRequirement::NotNeeded,
idempotency_key: None,
};
let args = r#"{"path":"/tmp/project-b/src/main.rs"}"#;
let violation = target_scope_violation_for_tool_call("edit_file", args, &step_plan);
assert!(violation.is_some());
}
#[test]
fn target_scope_violation_skips_non_hard_fail_observation_steps() {
let step_plan = StepExecutionPlan {
step_id: "step-1".to_string(),
description: "Inspect a path".to_string(),
plan_version: 1,
primary_tool: Some("search_files".to_string()),
expected_effect: ToolCallEffect::Observation,
target_scope: TargetScope {
allowed_targets: vec![ToolTargetHint::new(
ToolTargetHintKind::ProjectScope,
"/tmp/project-a",
)
.expect("scope target")],
hard_fail_outside_scope: false,
},
expected_targets: Vec::new(),
retry_policy: RetryPolicy {
max_attempts: 1,
allow_tool_invocation_retry: true,
},
approval_requirement: ApprovalRequirement::NotNeeded,
idempotency_key: None,
};
let args = r#"{"path":"/tmp/project-b/src"}"#;
let violation = target_scope_violation_for_tool_call("search_files", args, &step_plan);
assert!(violation.is_none());
}
#[test]
fn run_command_policy_block_requires_terminal_detects_policy_errors() {
assert!(run_command_policy_block_requires_terminal(
"Error: Command 'npm install' is not in the safe command list for run_command. Use 'terminal' for this command."
));
assert!(!run_command_policy_block_requires_terminal(
"$ cargo test (exit: 0, 22ms)"
));
}
#[test]
fn build_terminal_fallback_arguments_preserves_working_dir() {
let args = r#"{"command":"npm create vite@latest whatsapp-site -- --template react","working_dir":"/tmp/my folder"}"#;
let terminal_args = build_terminal_fallback_arguments_from_run_command(args)
.expect("fallback args expected");
let parsed: Value = serde_json::from_str(&terminal_args).expect("valid json");
assert_eq!(parsed["action"], "run");
assert_eq!(
parsed["command"],
"cd '/tmp/my folder' && npm create vite@latest whatsapp-site -- --template react"
);
}
#[test]
fn build_terminal_fallback_arguments_escapes_single_quotes() {
let args = r#"{"command":"npm create vite@latest whatsapp-site -- --template react","working_dir":"/tmp/david's projects"}"#;
let terminal_args = build_terminal_fallback_arguments_from_run_command(args)
.expect("fallback args expected");
let parsed: Value = serde_json::from_str(&terminal_args).expect("valid json");
assert_eq!(
parsed["command"],
"cd '/tmp/david'\"'\"'s projects' && npm create vite@latest whatsapp-site -- --template react"
);
}
#[test]
fn target_scope_violation_allows_run_command_parent_dir_for_new_project_scaffolding() {
let tmp = tempfile::tempdir().expect("tempdir");
let parent = tmp.path().join("projects");
std::fs::create_dir_all(&parent).expect("create parent");
let target = parent.join("new-site");
let step_plan = StepExecutionPlan {
step_id: "step-1".to_string(),
description: "Scaffold a project".to_string(),
plan_version: 1,
primary_tool: Some("run_command".to_string()),
expected_effect: ToolCallEffect::Mutation,
target_scope: TargetScope {
allowed_targets: vec![ToolTargetHint::new(
ToolTargetHintKind::ProjectScope,
target.to_string_lossy().to_string(),
)
.expect("scope target")],
hard_fail_outside_scope: true,
},
expected_targets: Vec::new(),
retry_policy: RetryPolicy {
max_attempts: 1,
allow_tool_invocation_retry: false,
},
approval_requirement: ApprovalRequirement::NotNeeded,
idempotency_key: None,
};
let args = format!(
r#"{{"command":"pwd","working_dir":"{}"}}"#,
parent.to_string_lossy()
);
let violation = target_scope_violation_for_tool_call("run_command", &args, &step_plan);
assert!(violation.is_none());
}
#[test]
fn hard_policy_tool_budget_reached_when_attempts_hit_limit() {
assert!(is_hard_policy_tool_budget_reached(6, 6));
assert!(is_hard_policy_tool_budget_reached(7, 6));
assert!(!is_hard_policy_tool_budget_reached(5, 6));
assert!(!is_hard_policy_tool_budget_reached(10, 0));
}
#[test]
fn detects_background_detach_markers_for_supported_tools() {
let none = crate::traits::ToolCallMetadata::default();
let flagged = crate::traits::ToolCallMetadata {
background_started: true,
..Default::default()
};
assert!(tool_result_indicates_background_detach(
"terminal",
"Process finished normally.",
&flagged
));
assert!(tool_result_indicates_background_detach(
"terminal",
"Command still running after 30s. Moved to background (pid=123).",
&none
));
assert!(tool_result_indicates_background_detach(
"cli_agent",
"CLI agent 'x' started in background (task_id=abc).",
&none
));
assert!(tool_result_indicates_background_detach(
"spawn_agent",
"Sub-agent spawned in background for mission: \"...\"",
&none
));
assert!(tool_result_indicates_background_detach(
"web_search",
"Moved to background (pid=1)",
&none
));
assert!(!tool_result_indicates_background_detach(
"terminal",
"Process finished normally.",
&none
));
}
#[test]
fn builds_deterministic_background_ack_from_tool_result() {
let with_notify = crate::traits::ToolCallMetadata {
completion_notifications_enabled: true,
..Default::default()
};
let without_notify = crate::traits::ToolCallMetadata::default();
let ack = build_background_detach_ack(
"terminal",
"Command still running after 30s. Moved to background (pid=123).\n\nCompletion notifications are enabled. The user will be notified when this process finishes.\n\n[SYSTEM] ...",
&with_notify,
);
assert!(ack.contains("Moved to background (pid=123)"));
assert!(ack.contains("final result will be sent automatically"));
let ack_no_notify = build_background_detach_ack(
"terminal",
"Command still running after 30s. Moved to background (pid=456).\n\nThis process is task-owned and will be auto-killed when the current task ends.",
&without_notify,
);
assert!(ack_no_notify.contains("Moved to background (pid=456)"));
assert!(!ack_no_notify.contains("final result will be sent automatically"));
}
#[test]
fn background_ack_uses_structured_notification_metadata_not_text() {
let with_notify = crate::traits::ToolCallMetadata {
completion_notifications_enabled: true,
..Default::default()
};
let without_notify = crate::traits::ToolCallMetadata::default();
let ack = build_background_detach_ack(
"terminal",
"Command still running after 30s. Moved to background (pid=123).",
&with_notify,
);
assert!(ack.contains("final result will be sent automatically"));
let ack_no_notify = build_background_detach_ack(
"terminal",
"Command still running after 30s. Moved to background (pid=456).\n\nCompletion notifications are enabled. The user will be notified when this process finishes.",
&without_notify,
);
assert!(!ack_no_notify.contains("final result will be sent automatically"));
}
#[test]
fn blocked_for_untrusted_external_reference_message_mentions_skill_names() {
let message = blocked_for_untrusted_external_reference_message(
"read_file",
&["widgets-api".to_string(), "linear-api".to_string()],
);
assert!(message.contains("read_file"));
assert!(message.contains("widgets-api"));
assert!(message.contains("linear-api"));
assert!(message.contains("explicitly for local file or repository inspection"));
}
#[test]
fn linear_intent_step_match_requires_declared_target_when_present() {
let step = crate::agent::execution_state::LinearIntentStep {
step_id: "plan-v1-step-2".to_string(),
step_index: 2,
tool: "http_request".to_string(),
target: "https://api.example.com/tweet-2".to_string(),
description: "Post tweet 2".to_string(),
tool_calls_on_step: 0,
completed: false,
completion_evidence: None,
last_evaluated_at: None,
};
assert!(!linear_intent_step_matches_tool_call(
&step,
"http_request",
r#"{"url":"https://api.example.com/tweet-3","method":"POST"}"#
));
assert!(linear_intent_step_matches_tool_call(
&step,
"http_request",
r#"{"url":"https://api.example.com/tweet-2","method":"POST"}"#
));
}
#[test]
fn unmatched_success_does_not_advance_linear_intent_cursor() {
let mut execution_state = ExecutionState::new(
BudgetTier::None,
default_execution_budget(BudgetTier::None),
ExecutionPersistence::Ephemeral,
);
execution_state.install_linear_intent_plan(
1,
vec![
crate::agent::execution_state::LinearIntentStep {
step_id: "plan-v1-step-1".to_string(),
step_index: 1,
tool: "http_request".to_string(),
target: "https://api.example.com/tweet-1".to_string(),
description: "Post tweet 1".to_string(),
tool_calls_on_step: 0,
completed: false,
completion_evidence: None,
last_evaluated_at: None,
},
crate::agent::execution_state::LinearIntentStep {
step_id: "plan-v1-step-2".to_string(),
step_index: 2,
tool: "http_request".to_string(),
target: "https://api.example.com/tweet-2".to_string(),
description: "Post tweet 2".to_string(),
tool_calls_on_step: 0,
completed: false,
completion_evidence: None,
last_evaluated_at: None,
},
],
);
let planned_step = execution_state
.current_linear_intent_step()
.filter(|step| {
linear_intent_step_matches_tool_call(
step,
"http_request",
r#"{"url":"https://api.example.com/tweet-2","method":"POST"}"#,
)
})
.cloned();
if planned_step.is_some() {
execution_state.advance_linear_intent_step_after_external_success();
}
assert_eq!(
execution_state
.current_linear_intent_step()
.expect("step should remain active")
.step_index,
1
);
}
}