use super::execution_state::LinearIntentStep;
use super::*;
use crate::events::TaskOutcome;
use crate::execution_policy::{PolicyBundle, VerifyLevel};
use crate::traits::ProviderResponse;
use serde::{Deserialize, Serialize};
pub(super) enum ToolPreludeOutcome {
ContinueLoop,
Return(anyhow::Result<String>),
Proceed,
}
pub(super) struct ToolPreludeCtx<'a> {
pub resp: &'a ProviderResponse,
pub emitter: &'a crate::events::EventEmitter,
pub task_id: &'a str,
pub session_id: &'a str,
pub model: &'a str,
pub llm_provider: Arc<dyn ModelProvider>,
pub iteration: usize,
pub task_start: Instant,
pub learning_ctx: &'a mut LearningContext,
pub evidence_state: &'a EvidenceState,
pub user_text: &'a str,
pub policy_bundle: &'a PolicyBundle,
pub available_capabilities: &'a HashMap<String, ToolCapabilities>,
pub execution_state: &'a mut ExecutionState,
pub validation_state: &'a mut ValidationState,
pub pending_system_messages: &'a mut Vec<SystemDirective>,
pub force_text_response: &'a mut bool,
pub turn_context: &'a TurnContext,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub(crate) struct PlannedAction {
pub(crate) tool: String,
pub(crate) target: String,
pub(crate) description: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub(crate) struct PlanState {
pub(crate) goal: String,
pub(crate) success_criteria: Vec<String>,
pub(crate) first_action: PlannedAction,
pub(crate) requires_verification: bool,
pub(crate) risky_actions: Vec<String>,
pub(crate) version: u32,
#[serde(default)]
pub(crate) planned_steps: Vec<PlannedAction>,
}
impl PlanState {
fn normalize(mut self) -> Self {
self.goal = self.goal.trim().to_string();
self.success_criteria = self
.success_criteria
.into_iter()
.map(|criterion| criterion.trim().to_string())
.filter(|criterion| !criterion.is_empty())
.collect();
self.first_action.tool = self.first_action.tool.trim().to_string();
self.first_action.target = self.first_action.target.trim().to_string();
self.first_action.description = self.first_action.description.trim().to_string();
self.risky_actions = self
.risky_actions
.into_iter()
.map(|action| action.trim().to_string())
.filter(|action| !action.is_empty())
.collect();
self.planned_steps = self
.planned_steps
.into_iter()
.map(|mut step| {
step.tool = step.tool.trim().to_string();
step.target = step.target.trim().to_string();
step.description = step.description.trim().to_string();
step
})
.filter(|step| !step.tool.is_empty())
.collect();
if self.version == 0 {
self.version = 1;
}
self
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
enum CritiqueVerdict {
Accept,
Replan,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
struct CritiqueState {
verdict: CritiqueVerdict,
issues: Vec<String>,
summary: String,
}
impl CritiqueState {
fn normalize(mut self) -> Self {
self.issues = self
.issues
.into_iter()
.map(|issue| issue.trim().to_string())
.filter(|issue| !issue.is_empty())
.collect();
self.summary = self.summary.trim().to_string();
self
}
}
fn pre_execution_plan_schema_json() -> Value {
json!({
"type": "object",
"properties": {
"goal": { "type": "string" },
"success_criteria": {
"type": "array",
"items": { "type": "string" }
},
"first_action": {
"type": "object",
"properties": {
"tool": { "type": "string" },
"target": { "type": "string" },
"description": { "type": "string" }
},
"required": ["tool", "target", "description"],
"additionalProperties": false
},
"requires_verification": { "type": "boolean" },
"risky_actions": {
"type": "array",
"items": { "type": "string" }
},
"version": { "type": "integer", "minimum": 1 },
"planned_steps": {
"type": "array",
"items": {
"type": "object",
"properties": {
"tool": { "type": "string" },
"target": { "type": "string" },
"description": { "type": "string" }
},
"required": ["tool", "target", "description"],
"additionalProperties": false
}
}
},
"required": [
"goal",
"success_criteria",
"first_action",
"requires_verification",
"risky_actions",
"version"
],
"additionalProperties": false
})
}
fn pre_execution_critique_schema_json() -> Value {
json!({
"type": "object",
"properties": {
"verdict": {
"type": "string",
"enum": ["accept", "replan"]
},
"issues": {
"type": "array",
"items": { "type": "string" }
},
"summary": { "type": "string" }
},
"required": ["verdict", "issues", "summary"],
"additionalProperties": false
})
}
fn tool_call_is_side_effecting(
agent: &Agent,
tc: &ToolCall,
available_capabilities: &HashMap<String, ToolCapabilities>,
) -> bool {
let semantics = agent
.tools
.iter()
.find(|tool| tool.name() == tc.name && tool.is_available())
.map(|tool| tool.call_semantics(&tc.arguments))
.unwrap_or_default();
if !semantics.is_empty() {
semantics.mutates_state()
} else {
tool_is_side_effecting(&tc.name, available_capabilities)
}
}
fn first_side_effecting_tool_call<'a>(
agent: &Agent,
resp: &'a ProviderResponse,
available_capabilities: &HashMap<String, ToolCapabilities>,
) -> Option<&'a ToolCall> {
resp.tool_calls
.iter()
.find(|tc| tool_call_is_side_effecting(agent, tc, available_capabilities))
}
fn plain_text_guard_blocks_tool(tool_name: &str, is_side_effecting: bool) -> bool {
user_visible_side_effect_guard_blocks_tool(tool_name, is_side_effecting)
}
fn uncertainty_guard_blocks_tool(tool_name: &str, is_side_effecting: bool) -> bool {
user_visible_side_effect_guard_blocks_tool(tool_name, is_side_effecting)
}
fn user_visible_side_effect_guard_blocks_tool(tool_name: &str, is_side_effecting: bool) -> bool {
is_side_effecting && tool_name != "spawn_agent"
}
fn first_plain_text_blocked_tool_call<'a>(
agent: &Agent,
resp: &'a ProviderResponse,
available_capabilities: &HashMap<String, ToolCapabilities>,
) -> Option<&'a ToolCall> {
resp.tool_calls.iter().find(|tc| {
plain_text_guard_blocks_tool(
&tc.name,
tool_call_is_side_effecting(agent, tc, available_capabilities),
)
})
}
fn extract_target_preview(arguments: &str) -> Option<String> {
let parsed = serde_json::from_str::<Value>(arguments).ok()?;
let Value::Object(map) = parsed else {
return None;
};
for key in [
"path",
"file_path",
"file",
"filename",
"url",
"target",
"target_path",
"project_path",
"project_dir",
"repo_path",
"repo_dir",
"working_dir",
"directory",
"dir",
] {
if let Some(value) = map.get(key).and_then(Value::as_str) {
let trimmed = value.trim();
if !trimmed.is_empty() {
return Some(trimmed.to_string());
}
}
}
None
}
fn normalize_affirmation_text(text: &str) -> String {
text.trim()
.trim_matches(|c: char| c.is_whitespace() || (c.is_ascii_punctuation() && c != '\''))
.to_ascii_lowercase()
}
fn is_short_affirmation_or_approval(text: &str) -> bool {
let normalized = normalize_affirmation_text(text);
if normalized.is_empty() {
return false;
}
if normalized.contains('?') {
return false;
}
if normalized.split_whitespace().count() > 6 {
return false;
}
if contains_keyword_as_words(&normalized, "no")
|| contains_keyword_as_words(&normalized, "don't")
|| contains_keyword_as_words(&normalized, "stop")
{
return false;
}
const SINGLE_WORD_AFFIRMATIONS: &[&str] = &[
"yes", "yeah", "yep", "yup", "sure", "ok", "okay", "proceed", "sgtm", "please",
];
if SINGLE_WORD_AFFIRMATIONS
.iter()
.any(|w| contains_keyword_as_words(&normalized, w))
{
return true;
}
const APPROVAL_PHRASES: &[&str] = &[
"go ahead",
"do it",
"please do",
"please proceed",
"try that",
"try it",
"let's do it",
"lets do it",
"sounds good",
"go for it",
"yes please",
"that works",
];
APPROVAL_PHRASES
.iter()
.any(|phrase| contains_keyword_as_words(&normalized, phrase))
}
fn assistant_proposed_action(recent_messages: &[Value]) -> bool {
let Some(content) = recent_messages
.iter()
.rev()
.find(|m| m.get("role").and_then(Value::as_str) == Some("assistant"))
.and_then(|m| m.get("content").and_then(Value::as_str))
else {
return false;
};
let lower = content.trim().to_ascii_lowercase();
if lower.is_empty() {
return false;
}
const PROPOSAL_PHRASES: &[&str] = &[
"would you like me to",
"want me to",
"do you want me to",
"shall i",
"should i",
"would you like",
"i can run",
"i could run",
"let me know if you'd like",
"want me to run",
"like me to",
];
if PROPOSAL_PHRASES.iter().any(|p| lower.contains(p)) {
return true;
}
if lower.ends_with('?') {
const ACTION_WORDS: &[&str] = &[
"run", "extract", "create", "generate", "build", "fetch", "script", "execute", "try",
];
if ACTION_WORDS
.iter()
.any(|w| contains_keyword_as_words(&lower, w))
{
return true;
}
}
false
}
fn turn_is_approval_of_prior_proposal(turn_context: &TurnContext) -> bool {
is_short_affirmation_or_approval(&turn_context.goal_user_text)
&& assistant_proposed_action(&turn_context.recent_messages)
}
fn turn_prefers_plain_text_completion(turn_context: &TurnContext) -> bool {
if turn_is_approval_of_prior_proposal(turn_context) {
return false;
}
!turn_context.completion_contract.expects_mutation
&& !turn_context.completion_contract.requires_observation
}
fn summarize_tool_arguments(arguments: &str) -> Value {
serde_json::from_str::<Value>(arguments).unwrap_or_else(|_| json!({ "raw": arguments }))
}
fn summarize_evidence_state(evidence_state: &EvidenceState) -> Value {
json!({
"target": evidence_state.target,
"record_count": evidence_state.records.len(),
"records": evidence_state.records.iter().map(|record| {
json!({
"kind": record.kind,
"source": record.source,
"trust": record.trust,
"observed_at": record.observed_at,
"targets": record.targets,
})
}).collect::<Vec<_>>(),
"contradictions": evidence_state.contradictions,
"post_change_verification_done": evidence_state.post_change_verification_done,
})
}
fn validate_pre_execution_plan(
plan: &PlanState,
tool_call: &ToolCall,
expected_target: Option<&str>,
) -> Result<(), &'static str> {
if plan.goal.is_empty() {
return Err("missing_goal");
}
if plan.success_criteria.is_empty() {
return Err("missing_success_criteria");
}
if plan.first_action.tool.is_empty() {
return Err("missing_first_action_tool");
}
if !plan.first_action.tool.eq_ignore_ascii_case(&tool_call.name) {
return Err("first_action_tool_mismatch");
}
if plan.first_action.description.is_empty() {
return Err("missing_first_action_description");
}
if expected_target.is_some() && plan.first_action.target.is_empty() {
return Err("missing_first_action_target");
}
if !plan.requires_verification {
return Err("missing_verification_requirement");
}
if plan.risky_actions.is_empty() {
return Err("missing_risk_acknowledgment");
}
if plan.version == 0 {
return Err("missing_plan_version");
}
if !plan.planned_steps.is_empty() {
let first_step = &plan.planned_steps[0];
if !first_step
.tool
.eq_ignore_ascii_case(&plan.first_action.tool)
{
return Err("planned_steps_first_action_mismatch");
}
if !plan.first_action.target.is_empty()
&& (first_step.target.is_empty()
|| !first_step
.target
.eq_ignore_ascii_case(&plan.first_action.target))
{
return Err("planned_steps_first_action_target_mismatch");
}
for step in &plan.planned_steps {
if step.tool.is_empty() || step.description.is_empty() {
return Err("planned_steps_incomplete_entry");
}
}
}
Ok(())
}
fn validate_pre_execution_critique(critique: &CritiqueState) -> Result<(), &'static str> {
if critique.summary.is_empty() {
return Err("missing_summary");
}
if matches!(critique.verdict, CritiqueVerdict::Replan) && critique.issues.is_empty() {
return Err("missing_issues_for_replan");
}
Ok(())
}
fn critique_budget_available(execution_state: &ExecutionState) -> bool {
(execution_state.budget.max_llm_calls == 0
|| execution_state.llm_calls_used < execution_state.budget.max_llm_calls)
&& (execution_state.budget.max_validation_rounds == 0
|| execution_state.validation_rounds_used
< execution_state.budget.max_validation_rounds)
}
fn should_run_pre_execution_critique(
policy_bundle: &PolicyBundle,
capabilities: ToolCapabilities,
execution_state: &ExecutionState,
) -> bool {
if !critique_budget_available(execution_state) {
return false;
}
capabilities.high_impact_write
|| capabilities.external_side_effect
|| matches!(policy_bundle.policy.verify_level, VerifyLevel::Full)
|| policy_bundle.risk_score >= 0.67
|| policy_bundle.uncertainty_score >= 0.45
}
async fn inject_prelude_retry_messages(
agent: &Agent,
emitter: &crate::events::EventEmitter,
session_id: &str,
task_id: &str,
tool_calls: &[ToolCall],
result_text: String,
) -> anyhow::Result<()> {
for tc in tool_calls {
let tool_msg = Message {
id: Uuid::new_v4().to_string(),
session_id: session_id.to_string(),
role: "tool".to_string(),
content: Some(result_text.clone()),
tool_call_id: Some(tc.id.clone()),
tool_name: Some(tc.name.clone()),
tool_calls_json: None,
created_at: Utc::now(),
importance: 0.3,
..Message::runtime_defaults()
};
agent
.append_tool_message_with_result_event(emitter, &tool_msg, true, 0, None, Some(task_id))
.await?;
}
Ok(())
}
async fn request_pre_execution_plan(
llm_provider: Arc<dyn ModelProvider>,
model: &str,
user_text: &str,
assistant_narration: Option<&str>,
tool_call: &ToolCall,
capabilities: ToolCapabilities,
) -> anyhow::Result<PlanState> {
let target = extract_target_preview(&tool_call.arguments).unwrap_or_default();
let messages = vec![
json!({
"role": "system",
"content": "Return only JSON matching the schema. Produce a minimal pre-execution plan for the first risky tool action before execution. The first_action.tool must exactly match the proposed tool call name."
}),
json!({
"role": "user",
"content": format!(
"User request:\n{user_text}\n\nAssistant narration before execution:\n{}\n\nProposed risky tool call:\n{}\n\nReturn a minimal plan for this immediate action. Keep success criteria concrete and short. Mark requires_verification=true for this risky action.",
assistant_narration
.map(str::trim)
.filter(|text| !text.is_empty())
.unwrap_or("<none>"),
serde_json::to_string_pretty(&json!({
"tool": tool_call.name,
"target_hint": target,
"arguments": summarize_tool_arguments(&tool_call.arguments),
"capabilities": {
"read_only": capabilities.read_only,
"external_side_effect": capabilities.external_side_effect,
"needs_approval": capabilities.needs_approval,
"idempotent": capabilities.idempotent,
"high_impact_write": capabilities.high_impact_write,
}
}))
.unwrap_or_else(|_| tool_call.arguments.clone())
)
}),
];
let options = ChatOptions {
response_mode: crate::traits::ResponseMode::JsonSchema {
name: "pre_execution_plan_v1".to_string(),
schema: pre_execution_plan_schema_json(),
strict: true,
},
tool_choice: ToolChoiceMode::None,
..ChatOptions::default()
};
let response = llm_provider
.chat_with_options(model, &messages, &[], &options)
.await?;
let raw = response
.content
.ok_or_else(|| anyhow::anyhow!("pre-execution planning response was empty"))?;
let plan = serde_json::from_str::<PlanState>(&raw)?;
Ok(plan.normalize())
}
#[allow(clippy::too_many_arguments)]
async fn request_pre_execution_critique(
llm_provider: Arc<dyn ModelProvider>,
model: &str,
user_text: &str,
assistant_narration: Option<&str>,
tool_call: &ToolCall,
plan: &PlanState,
evidence_state: &EvidenceState,
capabilities: ToolCapabilities,
expected_target: Option<&str>,
) -> anyhow::Result<CritiqueState> {
let messages = vec![
json!({
"role": "system",
"content": "Return only JSON matching the schema. You are a brief critique pass for a risky first tool action. Focus only on concrete issues in these categories: wrong target, missing evidence, unverifiable success criteria, unsafe first action. Use verdict=replan only when one of those issues is specific and blocking."
}),
json!({
"role": "user",
"content": format!(
"User request:\n{user_text}\n\nAssistant narration before execution:\n{}\n\nProposed risky tool call:\n{}\n\nPlan under review:\n{}\n\nCurrent evidence snapshot:\n{}\n\nOnly flag concrete blockers. Do not ask for generic caution.",
assistant_narration
.map(str::trim)
.filter(|text| !text.is_empty())
.unwrap_or("<none>"),
serde_json::to_string_pretty(&json!({
"tool": tool_call.name,
"target_hint": expected_target,
"arguments": summarize_tool_arguments(&tool_call.arguments),
"capabilities": {
"read_only": capabilities.read_only,
"external_side_effect": capabilities.external_side_effect,
"needs_approval": capabilities.needs_approval,
"idempotent": capabilities.idempotent,
"high_impact_write": capabilities.high_impact_write,
}
}))
.unwrap_or_else(|_| tool_call.arguments.clone()),
serde_json::to_string_pretty(plan).unwrap_or_else(|_| "<plan unavailable>".to_string()),
serde_json::to_string_pretty(&summarize_evidence_state(evidence_state))
.unwrap_or_else(|_| "<evidence unavailable>".to_string()),
)
}),
];
let options = ChatOptions {
response_mode: crate::traits::ResponseMode::JsonSchema {
name: "pre_execution_critique_v1".to_string(),
schema: pre_execution_critique_schema_json(),
strict: true,
},
tool_choice: ToolChoiceMode::None,
..ChatOptions::default()
};
let response = llm_provider
.chat_with_options(model, &messages, &[], &options)
.await?;
let raw = response
.content
.ok_or_else(|| anyhow::anyhow!("pre-execution critique response was empty"))?;
let critique = serde_json::from_str::<CritiqueState>(&raw)?;
Ok(critique.normalize())
}
pub(super) async fn run_tool_prelude_phase(
services: &super::services::AgentServices<'_>,
ctx: &mut ToolPreludeCtx<'_>,
) -> anyhow::Result<ToolPreludeOutcome> {
let agent = services.agent;
let resp = ctx.resp;
let emitter = ctx.emitter;
let task_id = ctx.task_id;
let session_id = ctx.session_id;
let model = ctx.model;
let llm_provider = ctx.llm_provider.clone();
let iteration = ctx.iteration;
let task_start = ctx.task_start;
let learning_ctx = &mut *ctx.learning_ctx;
let evidence_state = ctx.evidence_state;
let user_text = ctx.user_text;
let policy_bundle = ctx.policy_bundle;
let available_capabilities = ctx.available_capabilities;
let execution_state = &mut *ctx.execution_state;
let validation_state = &mut *ctx.validation_state;
let pending_system_messages = &mut *ctx.pending_system_messages;
let force_text_response = &mut *ctx.force_text_response;
let turn_context = ctx.turn_context;
let assistant_msg = Message {
id: Uuid::new_v4().to_string(),
session_id: session_id.to_string(),
role: "assistant".to_string(),
content: resp.content.clone(),
tool_call_id: None,
tool_name: None,
tool_calls_json: Some(serde_json::to_string(&resp.tool_calls)?),
created_at: Utc::now(),
importance: 0.5,
..Message::runtime_defaults()
};
agent
.append_assistant_message_with_event(
emitter,
&assistant_msg,
model,
resp.usage.as_ref().map(|u| u.input_tokens),
resp.usage.as_ref().map(|u| u.output_tokens),
)
.await?;
if iteration == 1
&& agent.depth == 0
&& !resp.tool_calls.is_empty()
&& resp.content.as_ref().is_none_or(|c| c.trim().len() < 20)
{
info!(
session_id,
"Intent gate: requiring narration before tool execution"
);
for tc in &resp.tool_calls {
let result_text = "[SYSTEM] Before executing tools, briefly state what you \
understand the user is asking and what you plan to do. \
Then re-issue the tool calls."
.to_string();
let tool_msg = Message {
id: Uuid::new_v4().to_string(),
session_id: session_id.to_string(),
role: "tool".to_string(),
content: Some(result_text),
tool_call_id: Some(tc.id.clone()),
tool_name: Some(tc.name.clone()),
tool_calls_json: None,
created_at: Utc::now(),
importance: 0.3,
..Message::runtime_defaults()
};
agent
.append_tool_message_with_result_event(
emitter,
&tool_msg,
true,
0,
None,
Some(task_id),
)
.await?;
}
return Ok(ToolPreludeOutcome::ContinueLoop);
}
if let Some(side_effecting_tool_call) =
first_plain_text_blocked_tool_call(agent, resp, available_capabilities)
{
let all_side_effecting_are_memory = resp
.tool_calls
.iter()
.filter(|tc| tool_call_is_side_effecting(agent, tc, available_capabilities))
.all(|tc| crate::agent::recall_guardrails::is_personal_memory_tool(&tc.name));
let is_child_session =
session_id.starts_with("sub-") || session_id.starts_with("specialist:");
if !is_child_session
&& !all_side_effecting_are_memory
&& turn_prefers_plain_text_completion(turn_context)
{
validation_state.note_replan();
learning_ctx.record_replay_note(
ReplayNoteCategory::RetryReason,
"text_only_tool_drift",
format!(
"Retried in plain-text mode after {} drifted into side-effecting execution on a text-only request.",
side_effecting_tool_call.name
),
true,
);
*force_text_response = true;
pending_system_messages.push(SystemDirective::ToolModeDisabledPlainText);
agent
.emit_warning_decision_point(
emitter,
task_id,
iteration,
DecisionType::ExecutionStateSnapshot,
format!(
"Redirecting {} back to plain-text completion",
side_effecting_tool_call.name
),
json!({
"condition": "text_only_turn_side_effecting_tool_drift",
"tool": side_effecting_tool_call.name,
"loop_repetition_reason": validation_state.loop_repetition_reason,
}),
)
.await;
inject_prelude_retry_messages(
agent,
emitter,
session_id,
task_id,
&resp.tool_calls,
"[SYSTEM] This request should be answered directly in plain text. Do not call side-effecting tools for it. Write the requested content instead."
.to_string(),
)
.await?;
return Ok(ToolPreludeOutcome::ContinueLoop);
}
}
if !resp.tool_calls.is_empty() {
execution_state.mark_persisted_now();
agent
.emit_decision_point(
emitter,
task_id,
iteration,
DecisionType::ExecutionStateSnapshot,
"Execution state snapshot before tool execution".to_string(),
json!({
"condition": "prelude_ready_for_execution",
"execution_state": execution_state.clone(),
"tool_count": resp.tool_calls.len(),
}),
)
.await;
}
let uncertainty_threshold =
current_uncertainty_threshold(agent.policy_config.uncertainty_clarify_threshold);
if agent.policy_config.uncertainty_clarify_enforce
&& policy_bundle.uncertainty_score >= uncertainty_threshold
{
let has_side_effecting_call = resp.tool_calls.iter().any(|tc| {
uncertainty_guard_blocks_tool(
&tc.name,
tool_call_is_side_effecting(agent, tc, available_capabilities),
)
});
if has_side_effecting_call {
let clarify = default_clarifying_question(user_text, &[]);
POLICY_METRICS
.uncertainty_clarify_total
.fetch_add(1, Ordering::Relaxed);
info!(
session_id,
iteration,
uncertainty_score = policy_bundle.uncertainty_score,
threshold = uncertainty_threshold,
clarification = %clarify,
"Uncertainty guard triggered before side-effecting tool execution"
);
let assistant_msg = Message {
id: Uuid::new_v4().to_string(),
session_id: session_id.to_string(),
role: "assistant".to_string(),
content: Some(clarify.clone()),
tool_call_id: None,
tool_name: None,
tool_calls_json: None,
created_at: Utc::now(),
importance: 0.5,
..Message::runtime_defaults()
};
agent
.append_assistant_message_with_event(emitter, &assistant_msg, "system", None, None)
.await?;
agent
.emit_task_end(
emitter,
task_id,
TaskStatus::Completed,
TaskOutcome::Partial,
task_start,
iteration,
learning_ctx.tool_calls.len(),
None,
Some("Asked clarification due to uncertainty policy.".to_string()),
)
.await;
return Ok(ToolPreludeOutcome::Return(Ok(clarify)));
}
}
for tc in &resp.tool_calls {
if let Some(violation) =
assess_pre_execution_evidence_gate(&tc.name, &tc.arguments, evidence_state)
{
validation_state.record_failure(ValidationFailure::MissingEvidence);
validation_state.note_retry(LoopRepetitionReason::MissingEvidence);
learning_ctx.record_replay_note(
ReplayNoteCategory::EvidenceGate,
"missing_pre_execution_evidence",
format!(
"Blocked {} until {} evidence exists for {}.",
tc.name,
format!("{:?}", violation.kind).to_ascii_lowercase(),
violation.target.as_deref().unwrap_or("the current target")
),
true,
);
learning_ctx.record_replay_note(
ReplayNoteCategory::RetryReason,
"missing_evidence",
format!(
"Retried after evidence gate blocked {} for {}.",
tc.name,
violation.target.as_deref().unwrap_or("the current target")
),
true,
);
agent
.emit_warning_decision_point(
emitter,
task_id,
iteration,
DecisionType::EvidenceGate,
format!("Blocked {} until required evidence is gathered", tc.name),
json!({
"condition": "missing_pre_execution_evidence",
"tool": tc.name,
"required_evidence_kind": violation.kind,
"target": violation.target,
"reason": violation.reason,
"loop_repetition_reason": validation_state.loop_repetition_reason,
}),
)
.await;
inject_prelude_retry_messages(
agent,
emitter,
session_id,
task_id,
&resp.tool_calls,
format!(
"[SYSTEM] Evidence gate blocked this tool call. {} {}",
violation.reason, violation.coaching
),
)
.await?;
return Ok(ToolPreludeOutcome::ContinueLoop);
}
}
let first_risky_tool_call =
first_side_effecting_tool_call(agent, resp, available_capabilities).cloned();
let plan_already_generated = execution_state.current_plan_version.is_some();
if agent.depth == 0
&& !plan_already_generated
&& !has_completed_side_effecting_tool_call(learning_ctx, available_capabilities)
{
if let Some(first_risky_tool_call) = first_risky_tool_call {
let capabilities = available_capabilities
.get(&first_risky_tool_call.name)
.copied()
.unwrap_or_default();
let expected_target = extract_target_preview(&first_risky_tool_call.arguments);
match request_pre_execution_plan(
llm_provider,
model,
user_text,
resp.content.as_deref(),
&first_risky_tool_call,
capabilities,
)
.await
{
Ok(plan) => match validate_pre_execution_plan(
&plan,
&first_risky_tool_call,
expected_target.as_deref(),
) {
Ok(()) => {
execution_state.set_plan_version(plan.version);
execution_state.active_linear_intent_plan = None;
if !plan.planned_steps.is_empty() {
execution_state.install_linear_intent_plan(
plan.version,
plan.planned_steps
.iter()
.enumerate()
.map(|(idx, step)| LinearIntentStep {
step_id: format!("plan-v{}-step-{}", plan.version, idx + 1),
step_index: idx + 1,
tool: step.tool.clone(),
target: step.target.clone(),
description: step.description.clone(),
tool_calls_on_step: 0,
completed: false,
completion_evidence: None,
last_evaluated_at: None,
})
.collect(),
);
}
validation_state.set_plan(plan.version, &plan.success_criteria);
validation_state.clear_loop_repetition_reason();
learning_ctx.record_replay_note(
ReplayNoteCategory::PlanRevision,
"plan_accepted",
format!(
"Accepted plan v{} for {} targeting {}.",
plan.version,
first_risky_tool_call.name,
expected_target.as_deref().unwrap_or("unspecified target")
),
false,
);
agent
.emit_decision_point(
emitter,
task_id,
iteration,
DecisionType::ExecutionPlanningGate,
format!(
"Structured pre-execution plan accepted for {}",
first_risky_tool_call.name
),
json!({
"condition": "plan_accepted",
"gate_result": "accepted",
"tool": first_risky_tool_call.name,
"target_hint": expected_target.as_deref(),
"requires_verification": plan.requires_verification,
"success_criteria_count": plan.success_criteria.len(),
"risky_action_count": plan.risky_actions.len(),
"plan": &plan,
}),
)
.await;
if should_run_pre_execution_critique(
policy_bundle,
capabilities,
execution_state,
) {
match request_pre_execution_critique(
ctx.llm_provider.clone(),
model,
user_text,
resp.content.as_deref(),
&first_risky_tool_call,
&plan,
evidence_state,
capabilities,
expected_target.as_deref(),
)
.await
{
Ok(critique) => {
match validate_pre_execution_critique(&critique) {
Ok(())
if matches!(
critique.verdict,
CritiqueVerdict::Accept
) =>
{
learning_ctx.record_replay_note(
ReplayNoteCategory::PlanRevision,
"critique_accepted",
format!(
"Critique accepted the first {} step.",
first_risky_tool_call.name
),
false,
);
agent.emit_decision_point(
emitter,
task_id,
iteration,
DecisionType::ExecutionCritiquePass,
format!(
"Pre-execution critique accepted {}",
first_risky_tool_call.name
),
json!({
"condition": "critique_accepted",
"critique_result": "accepted",
"tool": first_risky_tool_call.name,
"target_hint": expected_target.as_deref(),
"summary": &critique.summary,
"issues": &critique.issues,
"risk_score": policy_bundle.risk_score,
"uncertainty_score": policy_bundle.uncertainty_score,
}),
)
.await;
}
Ok(()) => {
execution_state.active_linear_intent_plan = None;
validation_state.record_failure(
ValidationFailure::CritiqueRejected,
);
validation_state.note_replan_for(
LoopRepetitionReason::CritiqueRejected,
);
learning_ctx.record_replay_note(
ReplayNoteCategory::PlanRevision,
"critique_rejected",
format!(
"Critique rejected the first {} step: {}",
first_risky_tool_call.name,
if critique.issues.is_empty() {
critique.summary.clone()
} else {
critique.issues.join("; ")
}
),
true,
);
learning_ctx.record_replay_note(
ReplayNoteCategory::RetryReason,
"critique_rejected",
format!(
"Replanned because critique rejected {}.",
first_risky_tool_call.name
),
true,
);
agent.emit_warning_decision_point(
emitter,
task_id,
iteration,
DecisionType::ExecutionCritiquePass,
format!(
"Pre-execution critique rejected {}",
first_risky_tool_call.name
),
json!({
"condition": "critique_rejected",
"critique_result": "rejected",
"tool": first_risky_tool_call.name,
"target_hint": expected_target.as_deref(),
"summary": &critique.summary,
"issues": &critique.issues,
"risk_score": policy_bundle.risk_score,
"uncertainty_score": policy_bundle.uncertainty_score,
"loop_repetition_reason": validation_state.loop_repetition_reason,
}),
)
.await;
let issues = critique.issues.join("; ");
inject_prelude_retry_messages(
agent,
emitter,
session_id,
task_id,
&resp.tool_calls,
format!(
"[SYSTEM] Critique pass blocked this risky action. \
Issues: {}. Re-plan the first action, gather any missing \
evidence, briefly explain the corrected approach, and then \
re-issue tool calls.",
if issues.is_empty() {
critique.summary
} else {
issues
}
),
)
.await?;
return Ok(ToolPreludeOutcome::ContinueLoop);
}
Err(reason) => {
agent
.emit_warning_decision_point(
emitter,
task_id,
iteration,
DecisionType::ExecutionCritiquePass,
format!(
"Pre-execution critique was invalid for {}",
first_risky_tool_call.name
),
json!({
"condition": "critique_invalid",
"critique_result": "invalid",
"reason": reason,
"tool": first_risky_tool_call.name,
"target_hint": expected_target.as_deref(),
"critique": critique,
}),
)
.await;
}
}
}
Err(error) => {
agent
.emit_warning_decision_point(
emitter,
task_id,
iteration,
DecisionType::ExecutionCritiquePass,
format!(
"Pre-execution critique unavailable for {}",
first_risky_tool_call.name
),
json!({
"condition": "critique_unavailable",
"critique_result": "unavailable",
"reason": "critique_generation_failed",
"tool": first_risky_tool_call.name,
"target_hint": expected_target.as_deref(),
"error": error.to_string(),
}),
)
.await;
warn!(
session_id,
tool = %first_risky_tool_call.name,
error = %error,
"Pre-execution critique pass unavailable; proceeding with existing guards"
);
}
}
} else if capabilities.high_impact_write
|| capabilities.external_side_effect
|| capabilities.needs_approval
{
agent.emit_warning_decision_point(
emitter,
task_id,
iteration,
DecisionType::ExecutionCritiquePass,
format!(
"Pre-execution critique skipped for {} because budget is exhausted",
first_risky_tool_call.name
),
json!({
"condition": "critique_skipped_budget",
"critique_result": "skipped_budget",
"tool": first_risky_tool_call.name,
"target_hint": expected_target.as_deref(),
"risk_score": policy_bundle.risk_score,
"uncertainty_score": policy_bundle.uncertainty_score,
"llm_calls_used": execution_state.llm_calls_used,
"validation_rounds_used": execution_state.validation_rounds_used,
"budget": execution_state.budget.clone(),
}),
)
.await;
}
}
Err(reason) => {
validation_state.record_failure(ValidationFailure::PlanRejected);
validation_state.note_replan_for(LoopRepetitionReason::PlanRejected);
learning_ctx.record_replay_note(
ReplayNoteCategory::PlanRevision,
"plan_rejected",
format!(
"Rejected the first {} step because the structured plan failed validation: {}.",
first_risky_tool_call.name, reason
),
true,
);
learning_ctx.record_replay_note(
ReplayNoteCategory::RetryReason,
"plan_rejected",
format!(
"Replanned because the first structured plan for {} was invalid.",
first_risky_tool_call.name
),
true,
);
agent.emit_warning_decision_point(
emitter,
task_id,
iteration,
DecisionType::ExecutionPlanningGate,
format!(
"Structured pre-execution plan rejected for {}",
first_risky_tool_call.name
),
json!({
"condition": "plan_rejected",
"gate_result": "rejected",
"reason": reason,
"tool": first_risky_tool_call.name,
"target_hint": expected_target.as_deref(),
"plan": &plan,
"loop_repetition_reason": validation_state.loop_repetition_reason,
}),
)
.await;
inject_prelude_retry_messages(
agent,
emitter,
session_id,
task_id,
&resp.tool_calls,
format!(
"[SYSTEM] Pre-execution planning check blocked this risky action. \
Reason: {reason}. Reconsider the first risky action, briefly explain \
the plan in user-facing text, and re-issue corrected tool calls."
),
)
.await?;
return Ok(ToolPreludeOutcome::ContinueLoop);
}
},
Err(error) => {
agent
.emit_warning_decision_point(
emitter,
task_id,
iteration,
DecisionType::ExecutionPlanningGate,
format!(
"Structured pre-execution plan unavailable for {}",
first_risky_tool_call.name
),
json!({
"condition": "plan_unavailable",
"gate_result": "unavailable",
"reason": "plan_generation_failed",
"tool": first_risky_tool_call.name,
"target_hint": expected_target.as_deref(),
"error": error.to_string(),
}),
)
.await;
warn!(
session_id,
tool = %first_risky_tool_call.name,
error = %error,
"Structured pre-execution planning gate unavailable; proceeding with existing guards"
);
}
}
}
}
Ok(ToolPreludeOutcome::Proceed)
}
#[cfg(test)]
mod tests {
use super::*;
fn turn_with(
goal_user_text: &str,
recent_messages: Vec<Value>,
expects_mutation: bool,
requires_observation: bool,
) -> TurnContext {
TurnContext {
goal_user_text: goal_user_text.to_string(),
recent_messages,
completion_contract: CompletionContract {
expects_mutation,
requires_observation,
..CompletionContract::default()
},
..TurnContext::default()
}
}
fn assistant_proposal_msg() -> Value {
json!({
"role": "assistant",
"content": "I found a PDF. Would you like me to try running a Python script to extract the text block-by-block?"
})
}
#[test]
fn plain_text_gate_allows_tools_for_approval_of_prior_proposal() {
let tc = turn_with(
"Yes, try that",
vec![
json!({"role": "user", "content": "Can you extract the text from this PDF?"}),
assistant_proposal_msg(),
],
false,
false,
);
assert!(!turn_prefers_plain_text_completion(&tc));
}
#[test]
fn plain_text_gate_unchanged_for_plain_question() {
let tc = turn_with(
"what's my name?",
vec![assistant_proposal_msg()],
false,
false,
);
assert!(turn_prefers_plain_text_completion(&tc));
}
#[test]
fn plain_text_gate_unchanged_for_bare_yes_without_proposal() {
let tc = turn_with(
"yes",
vec![
json!({"role": "user", "content": "Tell me about cats."}),
json!({"role": "assistant", "content": "Cats are small domesticated mammals."}),
],
false,
false,
);
assert!(turn_prefers_plain_text_completion(&tc));
}
#[test]
fn is_short_affirmation_or_approval_positive() {
assert!(is_short_affirmation_or_approval("Yes, try that"));
assert!(is_short_affirmation_or_approval("go ahead"));
assert!(is_short_affirmation_or_approval("do it"));
assert!(is_short_affirmation_or_approval("sure"));
assert!(is_short_affirmation_or_approval("yes please"));
assert!(is_short_affirmation_or_approval("please proceed"));
assert!(is_short_affirmation_or_approval("sgtm"));
assert!(is_short_affirmation_or_approval("OK!"));
}
#[test]
fn is_short_affirmation_or_approval_negative() {
assert!(!is_short_affirmation_or_approval(""));
assert!(!is_short_affirmation_or_approval("what is the weather?"));
assert!(!is_short_affirmation_or_approval(
"yes can you also generate a chart and email it to my whole team tomorrow morning"
));
assert!(!is_short_affirmation_or_approval("no don't"));
}
#[test]
fn assistant_proposed_action_positive() {
assert!(assistant_proposed_action(&[json!({
"role": "assistant",
"content": "Would you like me to run that for you?"
})]));
assert!(assistant_proposed_action(&[json!({
"role": "assistant",
"content": "I can run the extraction script if you want."
})]));
assert!(assistant_proposed_action(&[json!({
"role": "assistant",
"content": "Should I create the file?"
})]));
}
#[test]
fn assistant_proposed_action_negative() {
assert!(!assistant_proposed_action(&[json!({
"role": "assistant",
"content": "Cats are small domesticated mammals."
})]));
assert!(!assistant_proposed_action(&[json!({
"role": "user",
"content": "Would you like me to run that?"
})]));
}
fn base_plan() -> PlanState {
PlanState {
goal: "Post a thread".to_string(),
success_criteria: vec!["all steps completed".to_string()],
first_action: PlannedAction {
tool: "http_request".to_string(),
target: "https://api.example.com/posts/1".to_string(),
description: "Post tweet 1".to_string(),
},
requires_verification: true,
risky_actions: vec!["Posting externally".to_string()],
version: 1,
planned_steps: vec![PlannedAction {
tool: "http_request".to_string(),
target: "https://api.example.com/posts/1".to_string(),
description: "Post tweet 1".to_string(),
}],
}
}
fn base_tool_call() -> ToolCall {
ToolCall {
id: "tc_1".to_string(),
name: "http_request".to_string(),
arguments: "{}".to_string(),
extra_content: None,
}
}
#[test]
fn validate_pre_execution_plan_rejects_missing_first_step_target() {
let mut plan = base_plan();
plan.planned_steps[0].target.clear();
let tc = base_tool_call();
let result = validate_pre_execution_plan(&plan, &tc, Some(&plan.first_action.target));
assert_eq!(result, Err("planned_steps_first_action_target_mismatch"));
}
#[test]
fn validate_pre_execution_plan_accepts_matching_first_step_target() {
let plan = base_plan();
let tc = base_tool_call();
assert!(validate_pre_execution_plan(&plan, &tc, Some(&plan.first_action.target)).is_ok());
}
#[test]
fn plain_text_guard_allows_internal_delegation() {
assert!(!plain_text_guard_blocks_tool("spawn_agent", true));
assert!(plain_text_guard_blocks_tool("terminal", true));
assert!(!plain_text_guard_blocks_tool("read_file", false));
assert!(!uncertainty_guard_blocks_tool("spawn_agent", true));
assert!(uncertainty_guard_blocks_tool("terminal", true));
}
}