Skip to main content

vtcode_core/core/agent/runner/
execute.rs

1use super::AgentRunner;
2use super::continuation::{
3    CompletionAssessment, ContinuationController, VerificationResult, is_review_like_task,
4};
5use super::helpers::detect_textual_exec_tool_call;
6use super::orchestration::EvaluatorGateOutcome;
7use super::prompt_alignment;
8use crate::config::build_openai_prompt_cache_key;
9use crate::config::constants::tools;
10use crate::config::models::{ModelId, Provider as ModelProvider};
11use crate::config::tool_loop_limit_reached;
12use crate::config::types::{ReasoningEffortLevel, SystemPromptMode, VerbosityLevel};
13use crate::core::agent::blocked_handoff::write_blocked_handoff;
14use crate::core::agent::completion::{check_completion_candidate, check_for_response_loop};
15use crate::core::agent::conversation::{
16    build_conversation, build_messages_from_conversation, conversation_from_messages,
17};
18use crate::core::agent::events::ExecEventRecorder;
19use crate::core::agent::harness_artifacts::existing_harness_artifact_paths;
20use crate::core::agent::harness_kernel::{
21    HarnessRequestPlanInput, SessionToolCatalogSnapshot, build_harness_request_plan,
22};
23use crate::core::agent::runtime::{AgentRuntime, RuntimeControl};
24use crate::core::agent::session::AgentSessionState;
25use crate::core::agent::task::{ContextItem, Task, TaskOutcome, TaskResults};
26use crate::exec::events::HarnessEventKind;
27use crate::llm::model_resolver::ModelResolver;
28use crate::llm::provider::{
29    FinishReason, Message, ResponsesContinuationState, ToolCall, ToolDefinition,
30    prepare_responses_continuation_request, responses_continuation_key,
31    supports_responses_chaining,
32};
33use crate::llm::providers::gemini::wire::Part;
34use crate::prompts::{
35    PromptContext, RuntimePromptContract, append_runtime_mode_sections,
36    append_runtime_tool_prompt_sections, upsert_harness_limits_section,
37};
38use crate::utils::colors::style;
39use anyhow::Result;
40use serde_json::json;
41use std::sync::Arc;
42use tracing::{debug, warn};
43
44fn record_terminal_turn_event(
45    event_recorder: &mut ExecEventRecorder,
46    outcome: &TaskOutcome,
47    usage: vtcode_exec_events::Usage,
48) {
49    if outcome.is_success() {
50        event_recorder.record_thread_event(vtcode_exec_events::ThreadEvent::TurnCompleted(
51            vtcode_exec_events::TurnCompletedEvent { usage },
52        ));
53    } else {
54        event_recorder.record_thread_event(vtcode_exec_events::ThreadEvent::TurnFailed(
55            vtcode_exec_events::TurnFailedEvent {
56                message: outcome.description(),
57                usage: Some(usage),
58            },
59        ));
60    }
61}
62
63fn emit_blocked_handoff_events(
64    event_recorder: &mut ExecEventRecorder,
65    current_path: &std::path::Path,
66    archive_path: &std::path::Path,
67) {
68    for path in [current_path, archive_path] {
69        event_recorder.harness_event(
70            HarnessEventKind::BlockedHandoffWritten,
71            Some("Blocked handoff written".to_string()),
72            None,
73            Some(path.display().to_string()),
74            None,
75        );
76    }
77}
78
79fn summarize_verification_output(result: &serde_json::Value) -> String {
80    result
81        .get("output")
82        .and_then(serde_json::Value::as_str)
83        .or_else(|| result.get("stderr").and_then(serde_json::Value::as_str))
84        .or_else(|| result.get("stdout").and_then(serde_json::Value::as_str))
85        .map(str::trim)
86        .filter(|text| !text.is_empty())
87        .map(|text| {
88            let truncated = text
89                .lines()
90                .take(20)
91                .collect::<Vec<_>>()
92                .join("\n")
93                .trim()
94                .to_string();
95            if truncated.len() < text.len() {
96                format!("{truncated}\n...")
97            } else {
98                truncated
99            }
100        })
101        .unwrap_or_default()
102}
103
104fn prepare_responses_request_messages<'a>(
105    previous_chains: &mut hashbrown::HashMap<(String, String), ResponsesContinuationState>,
106    provider_name: &str,
107    provider_supports_responses_compaction: bool,
108    model: &str,
109    messages: &'a [Message],
110) -> (std::borrow::Cow<'a, [Message]>, Option<String>) {
111    let key = responses_continuation_key(provider_name, model);
112    let continuation = key.as_ref().and_then(|k| previous_chains.get(k));
113    let prepared = prepare_responses_continuation_request(
114        provider_name,
115        provider_supports_responses_compaction,
116        messages,
117        continuation,
118    );
119    if prepared.clear_stale_chain
120        && let Some(key) = key
121    {
122        previous_chains.remove(&key);
123    }
124
125    (prepared.messages, prepared.previous_response_id)
126}
127
128fn stop_reason_from_finish_reason(finish_reason: &FinishReason) -> String {
129    match finish_reason {
130        FinishReason::Stop => "end_turn".to_string(),
131        FinishReason::Length => "max_tokens".to_string(),
132        FinishReason::ToolCalls => "tool_calls".to_string(),
133        FinishReason::ContentFilter => "content_filter".to_string(),
134        FinishReason::Pause => "pause_turn".to_string(),
135        FinishReason::Refusal => "refusal".to_string(),
136        FinishReason::Error(message) => message.clone(),
137    }
138}
139
140fn estimate_session_cost_usd(
141    provider: &str,
142    model: &str,
143    usage: &vtcode_exec_events::Usage,
144) -> Option<f64> {
145    let usage = crate::llm::provider::Usage {
146        prompt_tokens: u32::try_from(usage.input_tokens).unwrap_or(u32::MAX),
147        completion_tokens: u32::try_from(usage.output_tokens).unwrap_or(u32::MAX),
148        total_tokens: u32::try_from(usage.input_tokens.saturating_add(usage.output_tokens))
149            .unwrap_or(u32::MAX),
150        cached_prompt_tokens: Some(u32::try_from(usage.cached_input_tokens).unwrap_or(u32::MAX)),
151        cache_creation_tokens: Some(u32::try_from(usage.cache_creation_tokens).unwrap_or(u32::MAX)),
152        cache_read_tokens: Some(u32::try_from(usage.cached_input_tokens).unwrap_or(u32::MAX)),
153        iterations: None,
154    };
155    let resolved = ModelResolver::resolve(Some(provider), model, &[], None)?;
156    let pricing = resolved.pricing()?;
157    ModelResolver::estimate_cost(pricing, &usage)
158}
159
160struct RuntimePromptBundle {
161    system_instruction: Arc<String>,
162    tool_snapshot: SessionToolCatalogSnapshot,
163    request_tools: Option<Arc<Vec<ToolDefinition>>>,
164}
165
166impl AgentRunner {
167    async fn compose_task_system_prompt(
168        &self,
169        prompt_tools: Arc<Vec<ToolDefinition>>,
170        is_simple_task: bool,
171    ) -> Result<String> {
172        if !is_simple_task {
173            return Ok(self.system_prompt.clone());
174        }
175
176        let mut config = self.config().clone();
177        config.agent.system_prompt_mode = SystemPromptMode::Minimal;
178        let mut prompt_context = PromptContext::from_workspace_tools(
179            self._workspace.as_path(),
180            prompt_tools
181                .iter()
182                .map(|tool| tool.function_name().to_string()),
183        );
184        prompt_context.load_available_skills();
185
186        let prompt = super::helpers::compose_system_prompt_with_appendix(
187            self._workspace.as_path(),
188            &config,
189            &prompt_context,
190        )
191        .await?;
192
193        Ok(prompt)
194    }
195
196    async fn build_runtime_prompt_bundle(
197        &self,
198        is_simple_task: bool,
199    ) -> Result<RuntimePromptBundle> {
200        let tool_snapshot = self.build_universal_tool_snapshot().await?;
201        let request_tools = tool_snapshot.snapshot.clone();
202        let prompt_tools = request_tools
203            .clone()
204            .unwrap_or_else(|| Arc::new(Vec::new()));
205        let mut system_prompt = self
206            .compose_task_system_prompt(prompt_tools, is_simple_task)
207            .await?;
208        self.append_active_primary_agent_context(&mut system_prompt);
209
210        let planning_active = self.tool_registry.is_planning_active();
211        let request_user_input_enabled = self
212            .features()
213            .request_user_input_enabled(planning_active, false);
214        let full_auto_active = self
215            .tool_registry
216            .current_full_auto_allowlist()
217            .await
218            .is_some();
219
220        append_runtime_mode_sections(
221            &mut system_prompt,
222            RuntimePromptContract {
223                full_auto: full_auto_active,
224                planning_active,
225                request_user_input_enabled,
226            },
227        );
228        upsert_harness_limits_section(
229            &mut system_prompt,
230            self.config().agent.harness.max_tool_calls_per_turn,
231            self.config().agent.harness.max_tool_wall_clock_secs,
232            self.config().agent.harness.max_tool_retries,
233        );
234        append_runtime_tool_prompt_sections(&mut system_prompt, &tool_snapshot, true);
235
236        Ok(RuntimePromptBundle {
237            system_instruction: Arc::new(system_prompt),
238            tool_snapshot,
239            request_tools,
240        })
241    }
242
243    fn append_active_primary_agent_context(&self, system_prompt: &mut String) {
244        let Some(active_primary_agent) = self.active_primary_agent.as_ref() else {
245            return;
246        };
247
248        system_prompt.push_str("\n\n## Active Primary Agent Runtime State\n");
249        system_prompt.push_str("- Active agent: ");
250        system_prompt.push_str(&active_primary_agent.display_name);
251        system_prompt.push_str("\n- Spec name: ");
252        system_prompt.push_str(&active_primary_agent.identity.name);
253        if let Some(model) = active_primary_agent
254            .model
255            .as_deref()
256            .map(str::trim)
257            .filter(|model| !model.is_empty() && !model.eq_ignore_ascii_case("inherit"))
258        {
259            system_prompt.push_str("\n- Agent model: ");
260            system_prompt.push_str(model);
261        }
262        if let Some(reasoning_effort) = active_primary_agent
263            .reasoning_effort
264            .as_deref()
265            .map(str::trim)
266            .filter(|reasoning_effort| !reasoning_effort.is_empty())
267        {
268            system_prompt.push_str("\n- Agent reasoning effort: ");
269            system_prompt.push_str(reasoning_effort);
270        }
271        system_prompt.push_str("\n\n## Active Primary Agent Instructions\n");
272        system_prompt.push_str(active_primary_agent.instructions.trim());
273    }
274
275    async fn build_validated_runtime_prompt_bundle(
276        &self,
277        is_simple_task: bool,
278    ) -> Result<RuntimePromptBundle> {
279        let mut runner = self;
280        let bundle = runner.build_runtime_prompt_bundle(is_simple_task).await?;
281        prompt_alignment::rebuild_once_on_alignment_mismatch(
282            &mut runner,
283            bundle,
284            |runner| Box::pin((*runner).build_runtime_prompt_bundle(is_simple_task)),
285            |runner, bundle| {
286                let planning_active = runner.tool_registry.is_planning_active();
287                let request_user_input_enabled = runner
288                    .features()
289                    .request_user_input_enabled(planning_active, false);
290                prompt_alignment::validate_prompt_catalog_alignment(
291                    &bundle.system_instruction,
292                    &bundle.tool_snapshot,
293                    planning_active,
294                    request_user_input_enabled,
295                )
296            },
297            "prompt/catalog alignment mismatch; rebuilding runtime prompt bundle",
298            "prompt/catalog alignment mismatch persisted after rebuild",
299        )
300        .await
301    }
302
303    async fn refresh_runtime_prompt_bundle_if_catalog_changed(
304        &self,
305        bundle: &mut RuntimePromptBundle,
306        is_simple_task: bool,
307    ) -> Result<bool> {
308        let current_version = self.tool_registry.tool_catalog_state().current_version();
309        if current_version == bundle.tool_snapshot.version {
310            return Ok(false);
311        }
312
313        debug!(
314            old_version = bundle.tool_snapshot.version,
315            new_version = current_version,
316            "Tool catalog changed mid-task; refreshing runtime prompt bundle"
317        );
318        *bundle = self
319            .build_validated_runtime_prompt_bundle(is_simple_task)
320            .await?;
321        Ok(true)
322    }
323
324    async fn resolve_completion_acceptance(
325        &mut self,
326        effective_task: &Task,
327        session_state: &mut AgentSessionState,
328        event_recorder: &mut ExecEventRecorder,
329        orchestration_enabled: bool,
330        verification_results: &[VerificationResult],
331        revision_rounds_used: &mut usize,
332        max_revision_rounds: usize,
333        should_write_blocked_handoff: &mut bool,
334    ) -> Result<bool> {
335        if !orchestration_enabled {
336            session_state.is_completed = true;
337            session_state.outcome = TaskOutcome::Success;
338            return Ok(true);
339        }
340
341        match self
342            .apply_evaluator_gate(
343                effective_task,
344                session_state,
345                event_recorder,
346                verification_results,
347                revision_rounds_used,
348                max_revision_rounds,
349            )
350            .await?
351        {
352            EvaluatorGateOutcome::Accept => {
353                session_state.is_completed = true;
354                session_state.outcome = TaskOutcome::Success;
355                Ok(true)
356            }
357            EvaluatorGateOutcome::Continue { prompt } => {
358                session_state.add_user_message(prompt);
359                Ok(false)
360            }
361            EvaluatorGateOutcome::Exhausted { reason } => {
362                session_state.outcome = TaskOutcome::Failed { reason };
363                *should_write_blocked_handoff = true;
364                Ok(true)
365            }
366        }
367    }
368
369    async fn run_verification_commands(
370        &self,
371        commands: &[String],
372        event_recorder: &mut ExecEventRecorder,
373    ) -> Result<Vec<VerificationResult>> {
374        let mut results = Vec::with_capacity(commands.len());
375        for command in commands {
376            let command_event = event_recorder.command_started(command);
377            let payload = json!({
378                "action": "run",
379                "command": command,
380                "workdir": self._workspace.display().to_string(),
381                "yield_time_ms": 1000,
382            });
383            let result = self
384                .tool_registry
385                .execute_harness_unified_exec(payload)
386                .await?;
387            let exit_code = result
388                .get("exit_code")
389                .and_then(serde_json::Value::as_i64)
390                .map(|value| value as i32);
391            let success = exit_code.unwrap_or(0) == 0;
392            let output = summarize_verification_output(&result);
393            event_recorder.command_finished(
394                &command_event,
395                if success {
396                    crate::exec::events::CommandExecutionStatus::Completed
397                } else {
398                    crate::exec::events::CommandExecutionStatus::Failed
399                },
400                exit_code,
401                &output,
402            );
403            results.push(VerificationResult {
404                command: command.clone(),
405                success,
406                exit_code,
407                output,
408            });
409            if !success {
410                break;
411            }
412        }
413        Ok(results)
414    }
415
416    /// Execute a task with this agent
417    pub async fn execute_task(
418        &mut self,
419        task: &Task,
420        contexts: &[ContextItem],
421    ) -> Result<TaskResults> {
422        // Align harness context with runner session/task for structured telemetry
423        self.tool_registry
424            .set_harness_session(self.session_id.clone());
425        self.tool_registry.set_harness_task(Some(task.id.clone()));
426
427        let steering_receiver = self.steering_receiver.lock().take();
428        let runtime_setup = {
429            // Agent execution status
430            let agent_prefix = format!("[{}]", self.agent_type);
431            // OPTIMIZATION: Avoid cloning session_id repeatedly by using reference
432            let mut event_recorder = ExecEventRecorder::new(
433                self.session_id.clone(),
434                self.event_sink.clone(),
435                Some(self.thread_handle.clone()),
436            );
437            event_recorder.turn_started();
438            self.runner_println(format_args!(
439                "{agent_prefix} Analyzing request and planning approach..."
440            ));
441
442            self.runner_println(format_args!(
443                "{} Executing {} task: {}",
444                style("[AGENT]").magenta().bold().on_black(),
445                self.agent_type,
446                task.title
447            ));
448
449            let run_started_at = std::time::Instant::now();
450            let is_simple_task = Self::is_simple_task(task, contexts);
451            let prompt_bundle = self
452                .build_validated_runtime_prompt_bundle(is_simple_task)
453                .await?;
454
455            let review_like = is_review_like_task(task);
456            let full_auto_active = self
457                .tool_registry
458                .current_full_auto_allowlist()
459                .await
460                .is_some();
461
462            let mut conversation = conversation_from_messages(&self.bootstrap_messages);
463            conversation.extend(build_conversation(task, contexts));
464
465            // Maintain a mirrored conversation history for providers that expect
466            // OpenAI/Anthropic style message roles.
467            let conversation_messages = build_messages_from_conversation(&conversation);
468
469            // Track execution results
470            // Determine loop guards via cached configuration
471            let max_tool_loops = self.config().tools.max_tool_loops;
472            let preserve_recent_turns = self.config().context.preserve_recent_turns;
473            let max_context_tokens = self.config().context.max_context_tokens;
474
475            let mut session_state = AgentSessionState::new(
476                self.session_id.clone(),
477                self.max_turns,
478                max_tool_loops,
479                max_context_tokens,
480            );
481            session_state.conversation = conversation;
482            session_state.messages = conversation_messages;
483            session_state.last_processed_message_idx = session_state.conversation.len();
484
485            let mut runtime = AgentRuntime::new(session_state, None, steering_receiver);
486
487            if let Err(err) = self.tool_registry.initialize_async().await {
488                warn!(
489                    error = %err,
490                    "Tool registry initialization failed at task start"
491                );
492                runtime
493                    .state
494                    .warnings
495                    .push(format!("Tool registry init failed: {err}"));
496            }
497            let orchestration_enabled =
498                self.harness_plan_build_evaluate_enabled(full_auto_active, review_like);
499            let planner_artifacts = if orchestration_enabled {
500                Some(self.run_planner_phase(task, &mut event_recorder).await?)
501            } else {
502                None
503            };
504            let effective_task = planner_artifacts
505                .as_ref()
506                .map(|artifacts| self.augment_generator_task(task, artifacts))
507                .unwrap_or_else(|| task.clone());
508
509            let mut continuation_controller = ContinuationController::new(
510                self._workspace.clone(),
511                self.tool_registry.planning_workflow_state(),
512                self.config().agent.harness.continuation_policy.clone(),
513                full_auto_active,
514                self.tool_registry.is_planning_active(),
515                review_like,
516            );
517            continuation_controller.prepare(&effective_task).await?;
518
519            (
520                agent_prefix,
521                event_recorder,
522                run_started_at,
523                is_simple_task,
524                prompt_bundle,
525                preserve_recent_turns,
526                max_tool_loops,
527                max_context_tokens,
528                runtime,
529                continuation_controller,
530                effective_task,
531                orchestration_enabled,
532            )
533        };
534
535        let (
536            agent_prefix,
537            mut event_recorder,
538            run_started_at,
539            is_simple_task,
540            mut prompt_bundle,
541            preserve_recent_turns,
542            max_tool_loops,
543            max_context_tokens,
544            mut runtime,
545            mut continuation_controller,
546            effective_task,
547            orchestration_enabled,
548        ) = runtime_setup;
549        let mut cost_warning_emitted = false;
550        let max_budget_usd = self.config().agent.harness.max_budget_usd;
551        let max_revision_rounds = self.config().agent.harness.max_revision_rounds.max(1);
552        let mut revision_rounds_used = 0usize;
553        let mut should_write_blocked_handoff = false;
554
555        let result = {
556            for turn in 0..self.max_turns {
557                if matches!(
558                    runtime.poll_turn_control().await,
559                    RuntimeControl::StopRequested
560                ) {
561                    self.runner_println(format_args!(
562                        "{} {}",
563                        agent_prefix,
564                        style("Stopped by steering signal.").red().bold()
565                    ));
566                    runtime.state.outcome = TaskOutcome::Cancelled;
567                    break;
568                }
569
570                if let Some(input) = runtime.run_until_idle() {
571                    self.runner_println(format_args!(
572                        "{} {}: {}",
573                        agent_prefix,
574                        style("Follow-up Input").cyan().bold(),
575                        input
576                    ));
577                }
578
579                let utilization = runtime.state.utilization();
580                if utilization > 0.90 {
581                    warn!("Context at {:.1}% - approaching limit", utilization * 100.0);
582                    runtime.state.warnings.push(format!(
583                        "Token budget at {}% - approaching context limit",
584                        (utilization * 100.0) as u32
585                    ));
586                }
587
588                if runtime.state.is_completed {
589                    runtime.state.outcome = TaskOutcome::Success;
590                    break;
591                }
592
593                self.runner_println(format_args!(
594                    "{} {} is processing turn {}...",
595                    agent_prefix,
596                    style("(PROC)").cyan().bold(),
597                    turn + 1
598                ));
599
600                let turn_model = self.get_selected_model();
601                let provider_name = self.provider_client.name().to_string();
602                if std::env::var_os("VTCODE_DEBUG_PROVIDER").is_some() {
603                    tracing::debug!(
604                        provider_client = self.provider_client.name(),
605                        turn_model,
606                        "Provider debug turn selection"
607                    );
608                }
609                let turn_reasoning = if is_simple_task {
610                    Some(ReasoningEffortLevel::Minimal)
611                } else {
612                    self.reasoning_effort
613                };
614                let turn_verbosity = if is_simple_task {
615                    Some(VerbosityLevel::Low)
616                } else {
617                    self.verbosity
618                };
619                let max_tokens = if is_simple_task {
620                    Some(800)
621                } else {
622                    Some(2000)
623                };
624
625                self.summarize_conversation_if_needed(
626                    &mut runtime.state,
627                    preserve_recent_turns,
628                    utilization,
629                );
630
631                let parallel_tool_config = if self.model.len() < 20 {
632                    None
633                } else if self
634                    .provider_client
635                    .supports_parallel_tool_config(&turn_model)
636                {
637                    Some(Box::new(
638                        crate::llm::provider::ParallelToolConfig::anthropic_optimized(),
639                    ))
640                } else {
641                    None
642                };
643
644                let provider_kind = turn_model
645                    .parse::<ModelId>()
646                    .map(|model| model.provider())
647                    .unwrap_or(ModelProvider::Gemini);
648
649                if matches!(provider_kind, ModelProvider::Gemini)
650                    && runtime.state.conversation.len() > runtime.state.last_processed_message_idx
651                {
652                    for content in
653                        &runtime.state.conversation[runtime.state.last_processed_message_idx..]
654                    {
655                        let mut text = String::new();
656                        for part in &content.parts {
657                            if let Part::Text {
658                                text: part_text, ..
659                            } = part
660                            {
661                                if !text.is_empty() {
662                                    text.push('\n');
663                                }
664                                text.push_str(part_text);
665                            }
666                        }
667                        let message = match content.role.as_str() {
668                            "model" => Message::assistant(text),
669                            _ => Message::user(text),
670                        };
671                        runtime.state.messages.push(message);
672                    }
673                    runtime.state.last_processed_message_idx = runtime.state.conversation.len();
674                }
675
676                let reasoning_effort =
677                    if self.provider_client.supports_reasoning_effort(&turn_model) {
678                        turn_reasoning
679                    } else {
680                        None
681                    };
682                let temperature = if reasoning_effort.is_some()
683                    && matches!(
684                        provider_kind,
685                        ModelProvider::Anthropic | ModelProvider::Minimax
686                    ) {
687                    None
688                } else {
689                    Some(0.7)
690                };
691
692                let (request_messages, previous_response_id) = prepare_responses_request_messages(
693                    &mut runtime.state.previous_response_chains,
694                    &provider_name,
695                    self.provider_client
696                        .supports_responses_compaction(&turn_model),
697                    &turn_model,
698                    &runtime.state.messages,
699                );
700
701                let request = build_harness_request_plan(HarnessRequestPlanInput {
702                    messages: request_messages.into_owned(),
703                    system_prompt: prompt_bundle.system_instruction.as_ref().clone(),
704                    tools: prompt_bundle.request_tools.clone(),
705                    model: turn_model.clone(),
706                    max_tokens,
707                    temperature,
708                    stream: self.provider_client.supports_streaming(),
709                    tool_choice: None,
710                    parallel_tool_config,
711                    reasoning_effort,
712                    verbosity: turn_verbosity,
713                    metadata: None,
714                    context_management: None,
715                    previous_response_id,
716                    prompt_cache_key: build_openai_prompt_cache_key(
717                        provider_name.eq_ignore_ascii_case("openai")
718                            && self.config().prompt_cache.enabled
719                            && self.config().prompt_cache.providers.openai.enabled,
720                        &self
721                            .config()
722                            .prompt_cache
723                            .providers
724                            .openai
725                            .prompt_cache_key_mode,
726                        Some(&self.session_id),
727                    ),
728                    prompt_cache_profile: None,
729                    tool_catalog_hash: prompt_bundle.tool_snapshot.tool_catalog_hash,
730                })
731                .request;
732                // Cheap pre-flight: catch malformed requests (empty system
733                // prompt, no messages, duplicate tool names, missing required
734                // properties) before paying for an API round-trip.
735                self.validate_llm_request(&request)?;
736                let previous_response_chain_present = request.previous_response_id.is_some();
737                // Clone messages for set_previous_response_chain while keeping
738                // the original in the request so providers that validate messages
739                // (e.g. MiMo) see a non-empty list during stream().
740                let sent_messages = request.messages.clone();
741                // Compute timeout before the call to avoid simultaneous mutable/immutable
742                // borrows of `self` (provider_client vs config).
743                let streaming_timeout = self
744                    .config()
745                    .timeouts
746                    .ceiling_duration(self.config().timeouts.streaming_ceiling_seconds);
747
748                let turn_output = runtime
749                    .run_turn_once(&mut self.provider_client, request, streaming_timeout)
750                    .await?;
751                event_recorder.record_thread_events(runtime.take_emitted_events());
752                let response = turn_output.response;
753                runtime.state.stop_reason =
754                    Some(stop_reason_from_finish_reason(&response.finish_reason));
755                if supports_responses_chaining(
756                    &provider_name,
757                    self.provider_client
758                        .supports_responses_compaction(&turn_model),
759                ) {
760                    runtime.state.set_previous_response_chain(
761                        &provider_name,
762                        &turn_model,
763                        response.request_id.as_deref(),
764                        sent_messages,
765                    );
766                }
767                match estimate_session_cost_usd(
768                    self.config().agent.provider.as_str(),
769                    &turn_model,
770                    &runtime.state.stats.total_usage,
771                ) {
772                    Some(total_cost_usd) => {
773                        runtime.state.total_cost_usd = Some(total_cost_usd);
774                        if let Some(max_budget_usd) = max_budget_usd
775                            && total_cost_usd > max_budget_usd
776                        {
777                            runtime.state.outcome =
778                                TaskOutcome::budget_limit_reached(max_budget_usd, total_cost_usd);
779                            break;
780                        }
781                    }
782                    None => {
783                        runtime.state.total_cost_usd = None;
784                        if max_budget_usd.is_some() && !cost_warning_emitted {
785                            cost_warning_emitted = true;
786                            let warning_message = format!(
787                                "Budget enforcement disabled for model `{turn_model}` because pricing metadata is unavailable"
788                            );
789                            warn!(
790                                provider = %self.config().agent.provider,
791                                model = %turn_model,
792                                "Budget enforcement disabled because pricing metadata is unavailable"
793                            );
794                            runtime.state.warnings.push(warning_message);
795                        }
796                    }
797                }
798                self.runner_println(format_args!(
799                    "{} {} {} received response, processing...",
800                    agent_prefix,
801                    self.agent_type,
802                    style("(RECV)").green().bold()
803                ));
804
805                self.warn_on_empty_response(
806                    &agent_prefix,
807                    response.content.as_deref().unwrap_or(""),
808                    response
809                        .tool_calls
810                        .as_ref()
811                        .is_some_and(|tool_calls| !tool_calls.is_empty()),
812                );
813
814                let response_text = response.content_string();
815                if !response_text.trim().is_empty() {
816                    self.emit_final_assistant_message(&self.agent_type, &response_text);
817                }
818
819                let mut effective_tool_calls = response.tool_calls.clone();
820                let mut forced_continuation = false;
821
822                if effective_tool_calls.is_none()
823                    && response.content_text().len() < 150
824                    && let Some(args_value) = detect_textual_exec_tool_call(response.content_text())
825                {
826                    effective_tool_calls = Some(vec![ToolCall::function(
827                        format!("call_text_{}", turn),
828                        tools::UNIFIED_EXEC.to_string(),
829                        args_value.to_string(),
830                    )]);
831                }
832
833                let is_gemini = matches!(provider_kind, ModelProvider::Gemini);
834
835                if !runtime.state.is_completed
836                    && effective_tool_calls
837                        .as_ref()
838                        .is_none_or(|tool_calls| tool_calls.is_empty())
839                    && !response.content_text().is_empty()
840                {
841                    if check_for_response_loop(response.content_text(), &mut runtime.state) {
842                        self.runner_println(format_args!(
843                            "[{}] {}",
844                            self.agent_type,
845                            style(
846                                "Repetitive assistant response detected. Breaking potential loop."
847                            )
848                            .red()
849                            .bold()
850                        ));
851                        runtime.state.outcome = TaskOutcome::LoopDetected;
852                        break;
853                    }
854
855                    if check_completion_candidate(response.content_text()) {
856                        self.runner_println(format_args!(
857                            "[{}] {}",
858                            self.agent_type,
859                            style("Completion candidate detected; checking tracker and verification state.")
860                                .green()
861                                .bold()
862                        ));
863                        match continuation_controller
864                            .assess_completion(&effective_task, &runtime.state)
865                            .await?
866                        {
867                            CompletionAssessment::Accept => {
868                                if self
869                                    .resolve_completion_acceptance(
870                                        &effective_task,
871                                        &mut runtime.state,
872                                        &mut event_recorder,
873                                        orchestration_enabled,
874                                        &[],
875                                        &mut revision_rounds_used,
876                                        max_revision_rounds,
877                                        &mut should_write_blocked_handoff,
878                                    )
879                                    .await?
880                                {
881                                    break;
882                                }
883                                forced_continuation = true;
884                            }
885                            CompletionAssessment::SkipAccept { reason } => {
886                                event_recorder.harness_event(
887                                    HarnessEventKind::ContinuationSkipped,
888                                    Some(reason),
889                                    None,
890                                    None,
891                                    None,
892                                );
893                                runtime.state.is_completed = true;
894                                runtime.state.outcome = TaskOutcome::Success;
895                                break;
896                            }
897                            CompletionAssessment::Continue { reason, prompt } => {
898                                event_recorder.harness_event(
899                                    HarnessEventKind::ContinuationStarted,
900                                    Some(reason),
901                                    None,
902                                    None,
903                                    None,
904                                );
905                                runtime.state.add_user_message(prompt);
906                                forced_continuation = true;
907                            }
908                            CompletionAssessment::Verify { commands } => {
909                                event_recorder.harness_event(
910                                    HarnessEventKind::VerificationStarted,
911                                    Some(format!("Running verification: {}", commands.join(", "))),
912                                    commands.first().cloned(),
913                                    None,
914                                    None,
915                                );
916                                let verification_results = self
917                                    .run_verification_commands(&commands, &mut event_recorder)
918                                    .await?;
919                                if let Some(failure) =
920                                    verification_results.iter().find(|result| !result.success)
921                                {
922                                    event_recorder.harness_event(
923                                        HarnessEventKind::VerificationFailed,
924                                        Some(format!(
925                                            "{}{}",
926                                            match failure.exit_code {
927                                                Some(code) => format!(
928                                                    "Verification failed: {} (exit code {}).",
929                                                    failure.command, code
930                                                ),
931                                                None => format!(
932                                                    "Verification failed: {}.",
933                                                    failure.command
934                                                ),
935                                            },
936                                            if failure.output.trim().is_empty() {
937                                                String::new()
938                                            } else {
939                                                format!("\n{}", failure.output.trim())
940                                            }
941                                        )),
942                                        Some(failure.command.clone()),
943                                        None,
944                                        failure.exit_code,
945                                    );
946                                } else {
947                                    event_recorder.harness_event(
948                                        HarnessEventKind::VerificationPassed,
949                                        Some(format!(
950                                            "Verification passed: {}",
951                                            commands.join(", ")
952                                        )),
953                                        commands.last().cloned(),
954                                        None,
955                                        Some(0),
956                                    );
957                                }
958
959                                match continuation_controller
960                                    .after_verification(&verification_results)
961                                    .await?
962                                {
963                                    CompletionAssessment::Accept
964                                    | CompletionAssessment::SkipAccept { .. } => {
965                                        if self
966                                            .resolve_completion_acceptance(
967                                                &effective_task,
968                                                &mut runtime.state,
969                                                &mut event_recorder,
970                                                orchestration_enabled,
971                                                &verification_results,
972                                                &mut revision_rounds_used,
973                                                max_revision_rounds,
974                                                &mut should_write_blocked_handoff,
975                                            )
976                                            .await?
977                                        {
978                                            break;
979                                        }
980                                        forced_continuation = true;
981                                    }
982                                    CompletionAssessment::Continue { reason, prompt } => {
983                                        event_recorder.harness_event(
984                                            HarnessEventKind::ContinuationStarted,
985                                            Some(reason),
986                                            None,
987                                            None,
988                                            None,
989                                        );
990                                        runtime.state.add_user_message(prompt);
991                                        forced_continuation = true;
992                                    }
993                                    CompletionAssessment::Verify { .. } => {}
994                                }
995                            }
996                        }
997                    }
998                }
999
1000                if let Some(tool_calls) = effective_tool_calls
1001                    .as_ref()
1002                    .filter(|tool_calls| !tool_calls.is_empty())
1003                    .cloned()
1004                {
1005                    self.execute_tool_call_batches(
1006                        tool_calls,
1007                        &mut runtime,
1008                        &mut event_recorder,
1009                        &agent_prefix,
1010                        is_gemini,
1011                        previous_response_chain_present,
1012                    )
1013                    .await?;
1014                    event_recorder.record_thread_events(runtime.take_emitted_events());
1015                }
1016
1017                // Refresh tool definitions if the catalog was mutated during tool
1018                // execution (e.g. tools.load / tools.unload / skill activation).
1019                let _ = self
1020                    .refresh_runtime_prompt_bundle_if_catalog_changed(
1021                        &mut prompt_bundle,
1022                        is_simple_task,
1023                    )
1024                    .await?;
1025
1026                let had_effective_shell_tool_call =
1027                    effective_tool_calls.as_ref().is_some_and(|calls| {
1028                        calls.iter().any(|call| {
1029                            call.function
1030                                .as_ref()
1031                                .map(|function| function.name.as_str())
1032                                == Some(tools::UNIFIED_EXEC)
1033                        })
1034                    });
1035                let had_tool_call = response
1036                    .tool_calls
1037                    .as_ref()
1038                    .is_some_and(|tool_calls| !tool_calls.is_empty())
1039                    || had_effective_shell_tool_call;
1040
1041                if had_tool_call {
1042                    let loops = runtime.state.register_tool_loop();
1043                    if tool_loop_limit_reached(loops, runtime.state.constraints.max_tool_loops) {
1044                        let warning_message = format!(
1045                            "Reached tool-call limit of {} iterations; pausing autonomous loop",
1046                            runtime.state.constraints.max_tool_loops
1047                        );
1048                        self.record_warning(
1049                            &agent_prefix,
1050                            &mut runtime.state,
1051                            &mut event_recorder,
1052                            warning_message,
1053                        );
1054                        runtime.state.mark_tool_loop_limit_hit();
1055                        break;
1056                    }
1057                    runtime.state.consecutive_idle_turns = 0;
1058                } else {
1059                    runtime.state.reset_tool_loop_guard();
1060                    if forced_continuation {
1061                        runtime.state.consecutive_idle_turns = 0;
1062                    } else if !runtime.state.is_completed {
1063                        runtime.state.consecutive_idle_turns =
1064                            runtime.state.consecutive_idle_turns.saturating_add(1);
1065                        let idle_turn_limit = self.config().agent.idle_turn_limit;
1066                        if runtime.state.consecutive_idle_turns >= idle_turn_limit {
1067                            let warning_message = format!(
1068                                "No tool calls or completion for {} consecutive turns; halting to avoid idle loop",
1069                                runtime.state.consecutive_idle_turns
1070                            );
1071                            self.record_warning(
1072                                &agent_prefix,
1073                                &mut runtime.state,
1074                                &mut event_recorder,
1075                                warning_message,
1076                            );
1077                            runtime.state.outcome = TaskOutcome::StoppedNoAction;
1078                            break;
1079                        }
1080                    }
1081                }
1082
1083                let should_continue = forced_continuation
1084                    || had_tool_call
1085                    || runtime.has_pending_follow_up_inputs()
1086                    || (!runtime.state.is_completed && (turn + 1) < self.max_turns);
1087
1088                if !should_continue {
1089                    if runtime.state.is_completed {
1090                        runtime.state.outcome = TaskOutcome::Success;
1091                    } else if (turn + 1) >= self.max_turns {
1092                        runtime.state.outcome =
1093                            TaskOutcome::turn_limit_reached(self.max_turns, turn + 1);
1094                    } else {
1095                        runtime.state.outcome = TaskOutcome::StoppedNoAction;
1096                    }
1097                    break;
1098                }
1099            }
1100
1101            runtime.state.finalize_outcome(self.max_turns);
1102
1103            let total_duration_ms = run_started_at.elapsed().as_millis();
1104
1105            // Agent execution completed
1106            self.runner_println(format_args!("{} Done", agent_prefix));
1107
1108            // Generate meaningful summary based on agent actions
1109            let average_turn_duration_ms = if runtime.state.turn_count > 0 {
1110                Some(runtime.state.turn_total_ms as f64 / runtime.state.turn_count as f64)
1111            } else {
1112                None
1113            };
1114
1115            let max_turn_duration_ms = if runtime.state.turn_count > 0 {
1116                Some(runtime.state.turn_max_ms)
1117            } else {
1118                None
1119            };
1120
1121            let outcome = runtime.state.outcome.clone();
1122            self.thread_handle
1123                .replace_messages(runtime.state.messages.clone());
1124            let summary = self.generate_task_summary(
1125                &effective_task,
1126                &runtime.state.modified_files,
1127                &runtime.state.executed_commands,
1128                &runtime.state.warnings,
1129                &runtime.state.messages,
1130                runtime.state.stats.turns_executed,
1131                runtime.state.max_tool_loop_streak,
1132                max_tool_loops,
1133                outcome,
1134                total_duration_ms,
1135                average_turn_duration_ms,
1136                max_turn_duration_ms,
1137            );
1138
1139            if !summary.trim().is_empty() {
1140                // Record summary as agent message for event stream
1141                event_recorder.agent_message(&summary);
1142                // Also display summary prominently for immediate visibility in TUI transcript
1143                self.runner_println(format_args!(
1144                    "\n{} Agent Task Summary\n{}",
1145                    style("[Task]").cyan().bold(),
1146                    summary
1147                ));
1148            }
1149
1150            let runtime_agent_config = self.core_agent_config();
1151            if let Err(err) = crate::persistent_memory::finalize_persistent_memory(
1152                &runtime_agent_config,
1153                Some(self.config()),
1154                &runtime.state.messages,
1155            )
1156            .await
1157            {
1158                warn!(
1159                    error = %err,
1160                    session_id = %self.session_id,
1161                    "Failed to update persistent memory"
1162                );
1163            }
1164
1165            if runtime.state.outcome.is_hard_block() || should_write_blocked_handoff {
1166                let relevant_paths = existing_harness_artifact_paths(&self._workspace);
1167                match write_blocked_handoff(
1168                    &self._workspace,
1169                    &self.session_id,
1170                    runtime.state.outcome.code(),
1171                    &runtime.state.outcome.description(),
1172                    &relevant_paths,
1173                ) {
1174                    Ok(artifacts) => emit_blocked_handoff_events(
1175                        &mut event_recorder,
1176                        &artifacts.current_path,
1177                        &artifacts.archive_path,
1178                    ),
1179                    Err(err) => warn!(
1180                        error = %err,
1181                        session_id = %self.session_id,
1182                        "Failed to persist blocked handoff"
1183                    ),
1184                }
1185            }
1186
1187            let total_usage = runtime.state.stats.total_usage.clone();
1188            record_terminal_turn_event(
1189                &mut event_recorder,
1190                &runtime.state.outcome,
1191                total_usage.clone(),
1192            );
1193            event_recorder.thread_completed(
1194                &self.session_id,
1195                runtime.state.outcome.thread_completion_subtype(),
1196                runtime.state.outcome.code(),
1197                runtime
1198                    .state
1199                    .outcome
1200                    .is_success()
1201                    .then_some(summary.as_str()),
1202                runtime.state.stop_reason.as_deref(),
1203                total_usage,
1204                runtime
1205                    .state
1206                    .total_cost_usd
1207                    .and_then(serde_json::Number::from_f64),
1208                runtime.state.stats.turns_executed,
1209            );
1210            let thread_events = event_recorder.into_events();
1211            let steering_receiver = runtime.take_steering_receiver();
1212            let state = std::mem::replace(
1213                &mut runtime.state,
1214                AgentSessionState::new(
1215                    self.session_id.clone(),
1216                    self.max_turns,
1217                    max_tool_loops,
1218                    max_context_tokens,
1219                ),
1220            );
1221
1222            Ok((
1223                state.into_results(summary, thread_events, total_duration_ms),
1224                steering_receiver,
1225            ))
1226        };
1227
1228        let result = match result {
1229            Ok((task_results, steering_receiver)) => {
1230                *self.steering_receiver.lock() = steering_receiver;
1231                Ok(task_results)
1232            }
1233            Err(err) => {
1234                *self.steering_receiver.lock() = runtime.take_steering_receiver();
1235                Err(err)
1236            }
1237        };
1238
1239        self.tool_registry.set_harness_task(None);
1240        result
1241    }
1242}
1243
1244#[cfg(test)]
1245mod tests {
1246    use super::{
1247        prepare_responses_request_messages, record_terminal_turn_event, tool_loop_limit_reached,
1248    };
1249    use crate::core::agent::events::ExecEventRecorder;
1250    use crate::core::agent::session::AgentSessionState;
1251    use crate::core::agent::task::TaskOutcome;
1252    use crate::exec::events::ThreadEvent;
1253    use crate::llm::provider::Message;
1254
1255    #[test]
1256    fn failed_outcome_emits_only_turn_failed() {
1257        let mut recorder = ExecEventRecorder::new("thread", None, None);
1258        recorder.turn_started();
1259
1260        record_terminal_turn_event(
1261            &mut recorder,
1262            &TaskOutcome::Failed {
1263                reason: "boom".to_string(),
1264            },
1265            Default::default(),
1266        );
1267
1268        let events = recorder.into_events();
1269        assert_eq!(
1270            events
1271                .iter()
1272                .filter(|event| matches!(event, ThreadEvent::TurnFailed(_)))
1273                .count(),
1274            1
1275        );
1276        assert_eq!(
1277            events
1278                .iter()
1279                .filter(|event| matches!(event, ThreadEvent::TurnCompleted(_)))
1280                .count(),
1281            0
1282        );
1283    }
1284
1285    #[test]
1286    fn successful_outcome_emits_only_turn_completed() {
1287        let mut recorder = ExecEventRecorder::new("thread", None, None);
1288        recorder.turn_started();
1289
1290        record_terminal_turn_event(&mut recorder, &TaskOutcome::Success, Default::default());
1291
1292        let events = recorder.into_events();
1293        assert_eq!(
1294            events
1295                .iter()
1296                .filter(|event| matches!(event, ThreadEvent::TurnCompleted(_)))
1297                .count(),
1298            1
1299        );
1300        assert_eq!(
1301            events
1302                .iter()
1303                .filter(|event| matches!(event, ThreadEvent::TurnFailed(_)))
1304                .count(),
1305            0
1306        );
1307    }
1308
1309    #[test]
1310    fn disabled_tool_loop_limit_never_trips() {
1311        assert!(!tool_loop_limit_reached(1, 0));
1312        assert!(!tool_loop_limit_reached(32, 0));
1313    }
1314
1315    #[test]
1316    fn openai_prepare_responses_request_messages_uses_incremental_suffix() {
1317        let mut state = AgentSessionState::new("session".to_string(), 4, 4, 16_000);
1318        let prior_messages = vec![Message::user("hello".to_string())];
1319        let current_messages = vec![
1320            Message::user("hello".to_string()),
1321            Message::user("continue".to_string()),
1322        ];
1323        state.set_previous_response_chain("openai", "gpt-5.4", Some("resp_123"), prior_messages);
1324
1325        let (request_messages, previous_response_id) = prepare_responses_request_messages(
1326            &mut state.previous_response_chains,
1327            "openai",
1328            false,
1329            "gpt-5.4",
1330            &current_messages,
1331        );
1332
1333        assert_eq!(previous_response_id.as_deref(), Some("resp_123"));
1334        assert_eq!(
1335            request_messages.as_ref(),
1336            [Message::user("continue".to_string())].as_slice()
1337        );
1338    }
1339
1340    #[test]
1341    fn gemini_prepare_responses_request_messages_keeps_full_history() {
1342        let mut state = AgentSessionState::new("session".to_string(), 4, 4, 16_000);
1343        let prior_messages = vec![Message::user("hello".to_string())];
1344        let current_messages = vec![
1345            Message::user("hello".to_string()),
1346            Message::user("continue".to_string()),
1347        ];
1348        state.set_previous_response_chain(
1349            "gemini",
1350            "gemini-2.5-pro",
1351            Some("resp_123"),
1352            prior_messages,
1353        );
1354
1355        let (request_messages, previous_response_id) = prepare_responses_request_messages(
1356            &mut state.previous_response_chains,
1357            "gemini",
1358            false,
1359            "gemini-2.5-pro",
1360            &current_messages,
1361        );
1362
1363        assert_eq!(previous_response_id.as_deref(), Some("resp_123"));
1364        assert_eq!(request_messages.as_ref(), current_messages.as_slice());
1365    }
1366
1367    #[test]
1368    fn compatible_prepare_responses_request_messages_uses_incremental_suffix() {
1369        let mut state = AgentSessionState::new("session".to_string(), 4, 4, 16_000);
1370        let prior_messages = vec![Message::user("hello".to_string())];
1371        let current_messages = vec![
1372            Message::user("hello".to_string()),
1373            Message::user("continue".to_string()),
1374        ];
1375        state.set_previous_response_chain("mycorp", "gpt-5.4", Some("resp_123"), prior_messages);
1376
1377        let (request_messages, previous_response_id) = prepare_responses_request_messages(
1378            &mut state.previous_response_chains,
1379            "mycorp",
1380            true,
1381            "gpt-5.4",
1382            &current_messages,
1383        );
1384
1385        assert_eq!(previous_response_id.as_deref(), Some("resp_123"));
1386        assert_eq!(
1387            request_messages.as_ref(),
1388            [Message::user("continue".to_string())].as_slice()
1389        );
1390    }
1391}