Skip to main content

vtcode_core/core/agent/runner/
execute.rs

1use super::AgentRunner;
2use super::continuation::{
3    CompletionAssessment, ContinuationController, VerificationResult, is_review_like_task,
4};
5use super::helpers::detect_textual_exec_tool_call;
6use super::orchestration::EvaluatorGateOutcome;
7use super::prompt_alignment;
8use crate::config::build_openai_prompt_cache_key;
9use crate::config::constants::tools;
10use crate::config::models::{ModelId, Provider as ModelProvider};
11use crate::config::tool_loop_limit_reached;
12use crate::config::types::{ReasoningEffortLevel, SystemPromptMode, VerbosityLevel};
13use crate::core::agent::blocked_handoff::write_blocked_handoff;
14use crate::core::agent::completion::{check_completion_candidate, check_for_response_loop};
15use crate::core::agent::conversation::{
16    build_conversation, build_messages_from_conversation, conversation_from_messages,
17};
18use crate::core::agent::events::ExecEventRecorder;
19use crate::core::agent::harness_artifacts::existing_harness_artifact_paths;
20use crate::core::agent::harness_kernel::{
21    HarnessRequestPlanInput, SessionToolCatalogSnapshot, build_harness_request_plan,
22};
23use crate::core::agent::runtime::{AgentRuntime, RuntimeControl};
24use crate::core::agent::session::AgentSessionState;
25use crate::core::agent::task::{ContextItem, Task, TaskOutcome, TaskResults};
26use crate::exec::events::HarnessEventKind;
27use crate::llm::model_resolver::ModelResolver;
28use crate::llm::provider::{
29    FinishReason, Message, ResponsesContinuationState, ToolCall, ToolDefinition,
30    prepare_responses_continuation_request, responses_continuation_key,
31    supports_responses_chaining,
32};
33use crate::llm::providers::gemini::wire::Part;
34use crate::prompts::{
35    PromptContext, RuntimePromptContract, append_runtime_mode_sections,
36    append_runtime_tool_prompt_sections, upsert_harness_limits_section,
37};
38use crate::utils::colors::style;
39use anyhow::Result;
40use serde_json::json;
41use std::sync::Arc;
42use tracing::{debug, warn};
43
44fn record_terminal_turn_event(
45    event_recorder: &mut ExecEventRecorder,
46    outcome: &TaskOutcome,
47    usage: vtcode_exec_events::Usage,
48) {
49    if outcome.is_success() {
50        event_recorder.record_thread_event(vtcode_exec_events::ThreadEvent::TurnCompleted(
51            vtcode_exec_events::TurnCompletedEvent { usage },
52        ));
53    } else {
54        event_recorder.record_thread_event(vtcode_exec_events::ThreadEvent::TurnFailed(
55            vtcode_exec_events::TurnFailedEvent {
56                message: outcome.description(),
57                usage: Some(usage),
58            },
59        ));
60    }
61}
62
63fn emit_blocked_handoff_events(
64    event_recorder: &mut ExecEventRecorder,
65    current_path: &std::path::Path,
66    archive_path: &std::path::Path,
67) {
68    for path in [current_path, archive_path] {
69        event_recorder.harness_event(
70            HarnessEventKind::BlockedHandoffWritten,
71            Some("Blocked handoff written".to_string()),
72            None,
73            Some(path.display().to_string()),
74            None,
75        );
76    }
77}
78
79fn summarize_verification_output(result: &serde_json::Value) -> String {
80    result
81        .get("output")
82        .and_then(serde_json::Value::as_str)
83        .or_else(|| result.get("stderr").and_then(serde_json::Value::as_str))
84        .or_else(|| result.get("stdout").and_then(serde_json::Value::as_str))
85        .map(str::trim)
86        .filter(|text| !text.is_empty())
87        .map(|text| {
88            let truncated = text
89                .lines()
90                .take(20)
91                .collect::<Vec<_>>()
92                .join("\n")
93                .trim()
94                .to_string();
95            if truncated.len() < text.len() {
96                format!("{truncated}\n...")
97            } else {
98                truncated
99            }
100        })
101        .unwrap_or_default()
102}
103
104fn prepare_responses_request_messages<'a>(
105    previous_chains: &mut hashbrown::HashMap<(String, String), ResponsesContinuationState>,
106    provider_name: &str,
107    provider_supports_responses_compaction: bool,
108    model: &str,
109    messages: &'a [Message],
110) -> (std::borrow::Cow<'a, [Message]>, Option<String>) {
111    let key = responses_continuation_key(provider_name, model);
112    let continuation = key.as_ref().and_then(|k| previous_chains.get(k));
113    let prepared = prepare_responses_continuation_request(
114        provider_name,
115        provider_supports_responses_compaction,
116        messages,
117        continuation,
118    );
119    if prepared.clear_stale_chain
120        && let Some(key) = key
121    {
122        previous_chains.remove(&key);
123    }
124
125    (prepared.messages, prepared.previous_response_id)
126}
127
128fn stop_reason_from_finish_reason(finish_reason: &FinishReason) -> String {
129    match finish_reason {
130        FinishReason::Stop => "end_turn".to_string(),
131        FinishReason::Length => "max_tokens".to_string(),
132        FinishReason::ToolCalls => "tool_calls".to_string(),
133        FinishReason::ContentFilter => "content_filter".to_string(),
134        FinishReason::Pause => "pause_turn".to_string(),
135        FinishReason::Refusal => "refusal".to_string(),
136        FinishReason::Error(message) => message.clone(),
137    }
138}
139
140fn estimate_session_cost_usd(
141    provider: &str,
142    model: &str,
143    usage: &vtcode_exec_events::Usage,
144) -> Option<f64> {
145    let usage = crate::llm::provider::Usage {
146        prompt_tokens: u32::try_from(usage.input_tokens).unwrap_or(u32::MAX),
147        completion_tokens: u32::try_from(usage.output_tokens).unwrap_or(u32::MAX),
148        total_tokens: u32::try_from(usage.input_tokens.saturating_add(usage.output_tokens))
149            .unwrap_or(u32::MAX),
150        cached_prompt_tokens: Some(u32::try_from(usage.cached_input_tokens).unwrap_or(u32::MAX)),
151        cache_creation_tokens: Some(u32::try_from(usage.cache_creation_tokens).unwrap_or(u32::MAX)),
152        cache_read_tokens: Some(u32::try_from(usage.cached_input_tokens).unwrap_or(u32::MAX)),
153    };
154    let resolved = ModelResolver::resolve(Some(provider), model, &[], None)?;
155    let pricing = resolved.pricing()?;
156    ModelResolver::estimate_cost(pricing, &usage)
157}
158
159struct RuntimePromptBundle {
160    system_instruction: Arc<String>,
161    tool_snapshot: SessionToolCatalogSnapshot,
162    request_tools: Option<Arc<Vec<ToolDefinition>>>,
163}
164
165impl AgentRunner {
166    async fn compose_task_system_prompt(
167        &self,
168        prompt_tools: Arc<Vec<ToolDefinition>>,
169        is_simple_task: bool,
170    ) -> Result<String> {
171        if !is_simple_task {
172            return Ok(self.system_prompt.clone());
173        }
174
175        let mut config = self.config().clone();
176        config.agent.system_prompt_mode = SystemPromptMode::Minimal;
177        let mut prompt_context = PromptContext::from_workspace_tools(
178            self._workspace.as_path(),
179            prompt_tools
180                .iter()
181                .map(|tool| tool.function_name().to_string()),
182        );
183        prompt_context.load_available_skills();
184
185        let prompt = super::helpers::compose_system_prompt_with_appendix(
186            self._workspace.as_path(),
187            &config,
188            &prompt_context,
189        )
190        .await?;
191
192        Ok(prompt)
193    }
194
195    async fn build_runtime_prompt_bundle(
196        &self,
197        is_simple_task: bool,
198    ) -> Result<RuntimePromptBundle> {
199        let tool_snapshot = self.build_universal_tool_snapshot().await?;
200        let request_tools = tool_snapshot.snapshot.clone();
201        let prompt_tools = request_tools
202            .clone()
203            .unwrap_or_else(|| Arc::new(Vec::new()));
204        let mut system_prompt = self
205            .compose_task_system_prompt(prompt_tools, is_simple_task)
206            .await?;
207
208        let plan_mode = self.tool_registry.is_plan_mode();
209        let request_user_input_enabled =
210            self.features().request_user_input_enabled(plan_mode, false);
211        let full_auto_active = self
212            .tool_registry
213            .current_full_auto_allowlist()
214            .await
215            .is_some();
216
217        append_runtime_mode_sections(
218            &mut system_prompt,
219            RuntimePromptContract {
220                full_auto: full_auto_active,
221                plan_mode,
222                request_user_input_enabled,
223            },
224        );
225        upsert_harness_limits_section(
226            &mut system_prompt,
227            self.config().agent.harness.max_tool_calls_per_turn,
228            self.config().agent.harness.max_tool_wall_clock_secs,
229            self.config().agent.harness.max_tool_retries,
230        );
231        append_runtime_tool_prompt_sections(&mut system_prompt, &tool_snapshot, true);
232
233        Ok(RuntimePromptBundle {
234            system_instruction: Arc::new(system_prompt),
235            tool_snapshot,
236            request_tools,
237        })
238    }
239
240    async fn build_validated_runtime_prompt_bundle(
241        &self,
242        is_simple_task: bool,
243    ) -> Result<RuntimePromptBundle> {
244        let mut runner = self;
245        let bundle = runner.build_runtime_prompt_bundle(is_simple_task).await?;
246        prompt_alignment::rebuild_once_on_alignment_mismatch(
247            &mut runner,
248            bundle,
249            |runner| Box::pin((*runner).build_runtime_prompt_bundle(is_simple_task)),
250            |runner, bundle| {
251                let plan_mode = runner.tool_registry.is_plan_mode();
252                let request_user_input_enabled = runner
253                    .features()
254                    .request_user_input_enabled(plan_mode, false);
255                prompt_alignment::validate_prompt_catalog_alignment(
256                    &bundle.system_instruction,
257                    &bundle.tool_snapshot,
258                    plan_mode,
259                    request_user_input_enabled,
260                )
261            },
262            "prompt/catalog alignment mismatch; rebuilding runtime prompt bundle",
263            "prompt/catalog alignment mismatch persisted after rebuild",
264        )
265        .await
266    }
267
268    async fn refresh_runtime_prompt_bundle_if_catalog_changed(
269        &self,
270        bundle: &mut RuntimePromptBundle,
271        is_simple_task: bool,
272    ) -> Result<bool> {
273        let current_version = self.tool_registry.tool_catalog_state().current_version();
274        if current_version == bundle.tool_snapshot.version {
275            return Ok(false);
276        }
277
278        debug!(
279            old_version = bundle.tool_snapshot.version,
280            new_version = current_version,
281            "Tool catalog changed mid-task; refreshing runtime prompt bundle"
282        );
283        *bundle = self
284            .build_validated_runtime_prompt_bundle(is_simple_task)
285            .await?;
286        Ok(true)
287    }
288
289    async fn resolve_completion_acceptance(
290        &mut self,
291        effective_task: &Task,
292        session_state: &mut AgentSessionState,
293        event_recorder: &mut ExecEventRecorder,
294        orchestration_enabled: bool,
295        verification_results: &[VerificationResult],
296        revision_rounds_used: &mut usize,
297        max_revision_rounds: usize,
298        should_write_blocked_handoff: &mut bool,
299    ) -> Result<bool> {
300        if !orchestration_enabled {
301            session_state.is_completed = true;
302            session_state.outcome = TaskOutcome::Success;
303            return Ok(true);
304        }
305
306        match self
307            .apply_evaluator_gate(
308                effective_task,
309                session_state,
310                event_recorder,
311                verification_results,
312                revision_rounds_used,
313                max_revision_rounds,
314            )
315            .await?
316        {
317            EvaluatorGateOutcome::Accept => {
318                session_state.is_completed = true;
319                session_state.outcome = TaskOutcome::Success;
320                Ok(true)
321            }
322            EvaluatorGateOutcome::Continue { prompt } => {
323                session_state.add_user_message(prompt);
324                Ok(false)
325            }
326            EvaluatorGateOutcome::Exhausted { reason } => {
327                session_state.outcome = TaskOutcome::Failed { reason };
328                *should_write_blocked_handoff = true;
329                Ok(true)
330            }
331        }
332    }
333
334    async fn run_verification_commands(
335        &self,
336        commands: &[String],
337        event_recorder: &mut ExecEventRecorder,
338    ) -> Result<Vec<VerificationResult>> {
339        let mut results = Vec::with_capacity(commands.len());
340        for command in commands {
341            let command_event = event_recorder.command_started(command);
342            let payload = json!({
343                "action": "run",
344                "command": command,
345                "workdir": self._workspace.display().to_string(),
346                "yield_time_ms": 1000,
347            });
348            let result = self
349                .tool_registry
350                .execute_harness_unified_exec(payload)
351                .await?;
352            let exit_code = result
353                .get("exit_code")
354                .and_then(serde_json::Value::as_i64)
355                .map(|value| value as i32);
356            let success = exit_code.unwrap_or(0) == 0;
357            let output = summarize_verification_output(&result);
358            event_recorder.command_finished(
359                &command_event,
360                if success {
361                    crate::exec::events::CommandExecutionStatus::Completed
362                } else {
363                    crate::exec::events::CommandExecutionStatus::Failed
364                },
365                exit_code,
366                &output,
367            );
368            results.push(VerificationResult {
369                command: command.clone(),
370                success,
371                exit_code,
372                output,
373            });
374            if !success {
375                break;
376            }
377        }
378        Ok(results)
379    }
380
381    /// Execute a task with this agent
382    pub async fn execute_task(
383        &mut self,
384        task: &Task,
385        contexts: &[ContextItem],
386    ) -> Result<TaskResults> {
387        // Align harness context with runner session/task for structured telemetry
388        self.tool_registry
389            .set_harness_session(self.session_id.clone());
390        self.tool_registry.set_harness_task(Some(task.id.clone()));
391
392        // Ensure the tool registry is ready before entering the turn loop to avoid per-turn reinit.
393        if let Err(err) = self.tool_registry.initialize_async().await {
394            warn!(
395                error = %err,
396                "Tool registry initialization failed at task start"
397            );
398        }
399
400        let steering_receiver = self.steering_receiver.lock().take();
401        let runtime_setup = {
402            // Agent execution status
403            let agent_prefix = format!("[{}]", self.agent_type);
404            // OPTIMIZATION: Avoid cloning session_id repeatedly by using reference
405            let mut event_recorder = ExecEventRecorder::new(
406                self.session_id.clone(),
407                self.event_sink.clone(),
408                Some(self.thread_handle.clone()),
409            );
410            event_recorder.turn_started();
411            self.runner_println(format_args!(
412                "{agent_prefix} Analyzing request and planning approach..."
413            ));
414
415            self.runner_println(format_args!(
416                "{} Executing {} task: {}",
417                style("[AGENT]").magenta().bold().on_black(),
418                self.agent_type,
419                task.title
420            ));
421
422            let run_started_at = std::time::Instant::now();
423            let is_simple_task = Self::is_simple_task(task, contexts);
424            let prompt_bundle = self
425                .build_validated_runtime_prompt_bundle(is_simple_task)
426                .await?;
427
428            let review_like = is_review_like_task(task);
429            let full_auto_active = self
430                .tool_registry
431                .current_full_auto_allowlist()
432                .await
433                .is_some();
434
435            let mut conversation = conversation_from_messages(&self.bootstrap_messages);
436            conversation.extend(build_conversation(task, contexts));
437
438            // Maintain a mirrored conversation history for providers that expect
439            // OpenAI/Anthropic style message roles.
440            let conversation_messages = build_messages_from_conversation(&conversation);
441
442            // Track execution results
443            // Determine loop guards via cached configuration
444            let max_tool_loops = self.config().tools.max_tool_loops;
445            let preserve_recent_turns = self.config().context.preserve_recent_turns;
446            let max_context_tokens = self.config().context.max_context_tokens;
447
448            let mut session_state = AgentSessionState::new(
449                self.session_id.clone(),
450                self.max_turns,
451                max_tool_loops,
452                max_context_tokens,
453            );
454            session_state.conversation = conversation;
455            session_state.messages = conversation_messages;
456            session_state.last_processed_message_idx = session_state.conversation.len();
457
458            let mut runtime = AgentRuntime::new(session_state, None, steering_receiver);
459
460            if let Err(err) = self.tool_registry.initialize_async().await {
461                warn!(
462                    error = %err,
463                    "Tool registry initialization failed at task start"
464                );
465                runtime
466                    .state
467                    .warnings
468                    .push(format!("Tool registry init failed: {err}"));
469            }
470            let orchestration_enabled =
471                self.harness_plan_build_evaluate_enabled(full_auto_active, review_like);
472            let planner_artifacts = if orchestration_enabled {
473                Some(self.run_planner_phase(task, &mut event_recorder).await?)
474            } else {
475                None
476            };
477            let effective_task = planner_artifacts
478                .as_ref()
479                .map(|artifacts| self.augment_generator_task(task, artifacts))
480                .unwrap_or_else(|| task.clone());
481
482            let mut continuation_controller = ContinuationController::new(
483                self._workspace.clone(),
484                self.tool_registry.plan_mode_state(),
485                self.config().agent.harness.continuation_policy.clone(),
486                full_auto_active,
487                self.tool_registry.is_plan_mode(),
488                review_like,
489            );
490            continuation_controller.prepare(&effective_task).await?;
491
492            (
493                agent_prefix,
494                event_recorder,
495                run_started_at,
496                is_simple_task,
497                prompt_bundle,
498                preserve_recent_turns,
499                max_tool_loops,
500                max_context_tokens,
501                runtime,
502                continuation_controller,
503                effective_task,
504                orchestration_enabled,
505            )
506        };
507
508        let (
509            agent_prefix,
510            mut event_recorder,
511            run_started_at,
512            is_simple_task,
513            mut prompt_bundle,
514            preserve_recent_turns,
515            max_tool_loops,
516            max_context_tokens,
517            mut runtime,
518            mut continuation_controller,
519            effective_task,
520            orchestration_enabled,
521        ) = runtime_setup;
522        let mut cost_warning_emitted = false;
523        let max_budget_usd = self.config().agent.harness.max_budget_usd;
524        let max_revision_rounds = self.config().agent.harness.max_revision_rounds.max(1);
525        let mut revision_rounds_used = 0usize;
526        let mut should_write_blocked_handoff = false;
527
528        let result = {
529            for turn in 0..self.max_turns {
530                if matches!(
531                    runtime.poll_turn_control().await,
532                    RuntimeControl::StopRequested
533                ) {
534                    self.runner_println(format_args!(
535                        "{} {}",
536                        agent_prefix,
537                        style("Stopped by steering signal.").red().bold()
538                    ));
539                    runtime.state.outcome = TaskOutcome::Cancelled;
540                    break;
541                }
542
543                if let Some(input) = runtime.run_until_idle() {
544                    self.runner_println(format_args!(
545                        "{} {}: {}",
546                        agent_prefix,
547                        style("Follow-up Input").cyan().bold(),
548                        input
549                    ));
550                }
551
552                let utilization = runtime.state.utilization();
553                if utilization > 0.90 {
554                    warn!("Context at {:.1}% - approaching limit", utilization * 100.0);
555                    runtime.state.warnings.push(format!(
556                        "Token budget at {}% - approaching context limit",
557                        (utilization * 100.0) as u32
558                    ));
559                }
560
561                if runtime.state.is_completed {
562                    runtime.state.outcome = TaskOutcome::Success;
563                    break;
564                }
565
566                self.runner_println(format_args!(
567                    "{} {} is processing turn {}...",
568                    agent_prefix,
569                    style("(PROC)").cyan().bold(),
570                    turn + 1
571                ));
572
573                let turn_model = self.get_selected_model();
574                let provider_name = self.provider_client.name().to_string();
575                if std::env::var_os("VTCODE_DEBUG_PROVIDER").is_some() {
576                    tracing::debug!(
577                        provider_client = self.provider_client.name(),
578                        turn_model,
579                        "Provider debug turn selection"
580                    );
581                }
582                let turn_reasoning = if is_simple_task {
583                    Some(ReasoningEffortLevel::Minimal)
584                } else {
585                    self.reasoning_effort
586                };
587                let turn_verbosity = if is_simple_task {
588                    Some(VerbosityLevel::Low)
589                } else {
590                    self.verbosity
591                };
592                let max_tokens = if is_simple_task {
593                    Some(800)
594                } else {
595                    Some(2000)
596                };
597
598                self.summarize_conversation_if_needed(
599                    &mut runtime.state,
600                    preserve_recent_turns,
601                    utilization,
602                );
603
604                let parallel_tool_config = if self.model.len() < 20 {
605                    None
606                } else if self
607                    .provider_client
608                    .supports_parallel_tool_config(&turn_model)
609                {
610                    Some(Box::new(
611                        crate::llm::provider::ParallelToolConfig::anthropic_optimized(),
612                    ))
613                } else {
614                    None
615                };
616
617                let provider_kind = turn_model
618                    .parse::<ModelId>()
619                    .map(|model| model.provider())
620                    .unwrap_or(ModelProvider::Gemini);
621
622                if matches!(provider_kind, ModelProvider::Gemini)
623                    && runtime.state.conversation.len() > runtime.state.last_processed_message_idx
624                {
625                    for content in
626                        &runtime.state.conversation[runtime.state.last_processed_message_idx..]
627                    {
628                        let mut text = String::new();
629                        for part in &content.parts {
630                            if let Part::Text {
631                                text: part_text, ..
632                            } = part
633                            {
634                                if !text.is_empty() {
635                                    text.push('\n');
636                                }
637                                text.push_str(part_text);
638                            }
639                        }
640                        let message = match content.role.as_str() {
641                            "model" => Message::assistant(text),
642                            _ => Message::user(text),
643                        };
644                        runtime.state.messages.push(message);
645                    }
646                    runtime.state.last_processed_message_idx = runtime.state.conversation.len();
647                }
648
649                let reasoning_effort =
650                    if self.provider_client.supports_reasoning_effort(&turn_model) {
651                        turn_reasoning
652                    } else {
653                        None
654                    };
655                let temperature = if reasoning_effort.is_some()
656                    && matches!(
657                        provider_kind,
658                        ModelProvider::Anthropic | ModelProvider::Minimax
659                    ) {
660                    None
661                } else {
662                    Some(0.7)
663                };
664
665                let (request_messages, previous_response_id) = prepare_responses_request_messages(
666                    &mut runtime.state.previous_response_chains,
667                    &provider_name,
668                    self.provider_client
669                        .supports_responses_compaction(&turn_model),
670                    &turn_model,
671                    &runtime.state.messages,
672                );
673
674                let request = build_harness_request_plan(HarnessRequestPlanInput {
675                    messages: request_messages.into_owned(),
676                    system_prompt: prompt_bundle.system_instruction.as_ref().clone(),
677                    tools: prompt_bundle.request_tools.clone(),
678                    model: turn_model.clone(),
679                    max_tokens,
680                    temperature,
681                    stream: self.provider_client.supports_streaming(),
682                    tool_choice: None,
683                    parallel_tool_config,
684                    reasoning_effort,
685                    verbosity: turn_verbosity,
686                    metadata: None,
687                    context_management: None,
688                    previous_response_id,
689                    prompt_cache_key: build_openai_prompt_cache_key(
690                        provider_name.eq_ignore_ascii_case("openai")
691                            && self.config().prompt_cache.enabled
692                            && self.config().prompt_cache.providers.openai.enabled,
693                        &self
694                            .config()
695                            .prompt_cache
696                            .providers
697                            .openai
698                            .prompt_cache_key_mode,
699                        Some(&self.session_id),
700                    ),
701                    prompt_cache_profile: None,
702                    tool_catalog_hash: prompt_bundle.tool_snapshot.tool_catalog_hash,
703                })
704                .request;
705                // Cheap pre-flight: catch malformed requests (empty system
706                // prompt, no messages, duplicate tool names, missing required
707                // properties) before paying for an API round-trip.
708                self.validate_llm_request(&request)?;
709                let previous_response_chain_present = request.previous_response_id.is_some();
710                // Clone messages for set_previous_response_chain while keeping
711                // the original in the request so providers that validate messages
712                // (e.g. MiMo) see a non-empty list during stream().
713                let sent_messages = request.messages.clone();
714                // Compute timeout before the call to avoid simultaneous mutable/immutable
715                // borrows of `self` (provider_client vs config).
716                let streaming_timeout = self
717                    .config()
718                    .timeouts
719                    .ceiling_duration(self.config().timeouts.streaming_ceiling_seconds);
720
721                let turn_output = runtime
722                    .run_turn_once(&mut self.provider_client, request, streaming_timeout)
723                    .await?;
724                event_recorder.record_thread_events(runtime.take_emitted_events());
725                let response = turn_output.response;
726                runtime.state.stop_reason =
727                    Some(stop_reason_from_finish_reason(&response.finish_reason));
728                if supports_responses_chaining(
729                    &provider_name,
730                    self.provider_client
731                        .supports_responses_compaction(&turn_model),
732                ) {
733                    runtime.state.set_previous_response_chain(
734                        &provider_name,
735                        &turn_model,
736                        response.request_id.as_deref(),
737                        sent_messages,
738                    );
739                }
740                match estimate_session_cost_usd(
741                    self.config().agent.provider.as_str(),
742                    &turn_model,
743                    &runtime.state.stats.total_usage,
744                ) {
745                    Some(total_cost_usd) => {
746                        runtime.state.total_cost_usd = Some(total_cost_usd);
747                        if let Some(max_budget_usd) = max_budget_usd
748                            && total_cost_usd > max_budget_usd
749                        {
750                            runtime.state.outcome =
751                                TaskOutcome::budget_limit_reached(max_budget_usd, total_cost_usd);
752                            break;
753                        }
754                    }
755                    None => {
756                        runtime.state.total_cost_usd = None;
757                        if max_budget_usd.is_some() && !cost_warning_emitted {
758                            cost_warning_emitted = true;
759                            let warning_message = format!(
760                                "Budget enforcement disabled for model `{turn_model}` because pricing metadata is unavailable"
761                            );
762                            warn!(
763                                provider = %self.config().agent.provider,
764                                model = %turn_model,
765                                "Budget enforcement disabled because pricing metadata is unavailable"
766                            );
767                            runtime.state.warnings.push(warning_message);
768                        }
769                    }
770                }
771                self.runner_println(format_args!(
772                    "{} {} {} received response, processing...",
773                    agent_prefix,
774                    self.agent_type,
775                    style("(RECV)").green().bold()
776                ));
777
778                self.warn_on_empty_response(
779                    &agent_prefix,
780                    response.content.as_deref().unwrap_or(""),
781                    response
782                        .tool_calls
783                        .as_ref()
784                        .is_some_and(|tool_calls| !tool_calls.is_empty()),
785                );
786
787                let response_text = response.content_string();
788                if !response_text.trim().is_empty() {
789                    self.emit_final_assistant_message(&self.agent_type, &response_text);
790                }
791
792                let mut effective_tool_calls = response.tool_calls.clone();
793                let mut forced_continuation = false;
794
795                if effective_tool_calls.is_none()
796                    && response.content_text().len() < 150
797                    && let Some(args_value) = detect_textual_exec_tool_call(response.content_text())
798                {
799                    effective_tool_calls = Some(vec![ToolCall::function(
800                        format!("call_text_{}", turn),
801                        tools::UNIFIED_EXEC.to_string(),
802                        args_value.to_string(),
803                    )]);
804                }
805
806                let is_gemini = matches!(provider_kind, ModelProvider::Gemini);
807
808                if !runtime.state.is_completed
809                    && effective_tool_calls
810                        .as_ref()
811                        .is_none_or(|tool_calls| tool_calls.is_empty())
812                    && !response.content_text().is_empty()
813                {
814                    if check_for_response_loop(response.content_text(), &mut runtime.state) {
815                        self.runner_println(format_args!(
816                            "[{}] {}",
817                            self.agent_type,
818                            style(
819                                "Repetitive assistant response detected. Breaking potential loop."
820                            )
821                            .red()
822                            .bold()
823                        ));
824                        runtime.state.outcome = TaskOutcome::LoopDetected;
825                        break;
826                    }
827
828                    if check_completion_candidate(response.content_text()) {
829                        self.runner_println(format_args!(
830                            "[{}] {}",
831                            self.agent_type,
832                            style("Completion candidate detected; checking tracker and verification state.")
833                                .green()
834                                .bold()
835                        ));
836                        match continuation_controller
837                            .assess_completion(&effective_task, &runtime.state)
838                            .await?
839                        {
840                            CompletionAssessment::Accept => {
841                                if self
842                                    .resolve_completion_acceptance(
843                                        &effective_task,
844                                        &mut runtime.state,
845                                        &mut event_recorder,
846                                        orchestration_enabled,
847                                        &[],
848                                        &mut revision_rounds_used,
849                                        max_revision_rounds,
850                                        &mut should_write_blocked_handoff,
851                                    )
852                                    .await?
853                                {
854                                    break;
855                                }
856                                forced_continuation = true;
857                            }
858                            CompletionAssessment::SkipAccept { reason } => {
859                                event_recorder.harness_event(
860                                    HarnessEventKind::ContinuationSkipped,
861                                    Some(reason),
862                                    None,
863                                    None,
864                                    None,
865                                );
866                                runtime.state.is_completed = true;
867                                runtime.state.outcome = TaskOutcome::Success;
868                                break;
869                            }
870                            CompletionAssessment::Continue { reason, prompt } => {
871                                event_recorder.harness_event(
872                                    HarnessEventKind::ContinuationStarted,
873                                    Some(reason),
874                                    None,
875                                    None,
876                                    None,
877                                );
878                                runtime.state.add_user_message(prompt);
879                                forced_continuation = true;
880                            }
881                            CompletionAssessment::Verify { commands } => {
882                                event_recorder.harness_event(
883                                    HarnessEventKind::VerificationStarted,
884                                    Some(format!("Running verification: {}", commands.join(", "))),
885                                    commands.first().cloned(),
886                                    None,
887                                    None,
888                                );
889                                let verification_results = self
890                                    .run_verification_commands(&commands, &mut event_recorder)
891                                    .await?;
892                                if let Some(failure) =
893                                    verification_results.iter().find(|result| !result.success)
894                                {
895                                    event_recorder.harness_event(
896                                        HarnessEventKind::VerificationFailed,
897                                        Some(format!(
898                                            "{}{}",
899                                            match failure.exit_code {
900                                                Some(code) => format!(
901                                                    "Verification failed: {} (exit code {}).",
902                                                    failure.command, code
903                                                ),
904                                                None => format!(
905                                                    "Verification failed: {}.",
906                                                    failure.command
907                                                ),
908                                            },
909                                            if failure.output.trim().is_empty() {
910                                                String::new()
911                                            } else {
912                                                format!("\n{}", failure.output.trim())
913                                            }
914                                        )),
915                                        Some(failure.command.clone()),
916                                        None,
917                                        failure.exit_code,
918                                    );
919                                } else {
920                                    event_recorder.harness_event(
921                                        HarnessEventKind::VerificationPassed,
922                                        Some(format!(
923                                            "Verification passed: {}",
924                                            commands.join(", ")
925                                        )),
926                                        commands.last().cloned(),
927                                        None,
928                                        Some(0),
929                                    );
930                                }
931
932                                match continuation_controller
933                                    .after_verification(&verification_results)
934                                    .await?
935                                {
936                                    CompletionAssessment::Accept
937                                    | CompletionAssessment::SkipAccept { .. } => {
938                                        if self
939                                            .resolve_completion_acceptance(
940                                                &effective_task,
941                                                &mut runtime.state,
942                                                &mut event_recorder,
943                                                orchestration_enabled,
944                                                &verification_results,
945                                                &mut revision_rounds_used,
946                                                max_revision_rounds,
947                                                &mut should_write_blocked_handoff,
948                                            )
949                                            .await?
950                                        {
951                                            break;
952                                        }
953                                        forced_continuation = true;
954                                    }
955                                    CompletionAssessment::Continue { reason, prompt } => {
956                                        event_recorder.harness_event(
957                                            HarnessEventKind::ContinuationStarted,
958                                            Some(reason),
959                                            None,
960                                            None,
961                                            None,
962                                        );
963                                        runtime.state.add_user_message(prompt);
964                                        forced_continuation = true;
965                                    }
966                                    CompletionAssessment::Verify { .. } => {}
967                                }
968                            }
969                        }
970                    }
971                }
972
973                if let Some(tool_calls) = effective_tool_calls
974                    .as_ref()
975                    .filter(|tool_calls| !tool_calls.is_empty())
976                    .cloned()
977                {
978                    self.execute_tool_call_batches(
979                        tool_calls,
980                        &mut runtime,
981                        &mut event_recorder,
982                        &agent_prefix,
983                        is_gemini,
984                        previous_response_chain_present,
985                    )
986                    .await?;
987                    event_recorder.record_thread_events(runtime.take_emitted_events());
988                }
989
990                // Refresh tool definitions if the catalog was mutated during tool
991                // execution (e.g. tools.load / tools.unload / skill activation).
992                let _ = self
993                    .refresh_runtime_prompt_bundle_if_catalog_changed(
994                        &mut prompt_bundle,
995                        is_simple_task,
996                    )
997                    .await?;
998
999                let had_effective_shell_tool_call =
1000                    effective_tool_calls.as_ref().is_some_and(|calls| {
1001                        calls.iter().any(|call| {
1002                            call.function
1003                                .as_ref()
1004                                .map(|function| function.name.as_str())
1005                                == Some(tools::UNIFIED_EXEC)
1006                        })
1007                    });
1008                let had_tool_call = response
1009                    .tool_calls
1010                    .as_ref()
1011                    .is_some_and(|tool_calls| !tool_calls.is_empty())
1012                    || had_effective_shell_tool_call;
1013
1014                if had_tool_call {
1015                    let loops = runtime.state.register_tool_loop();
1016                    if tool_loop_limit_reached(loops, runtime.state.constraints.max_tool_loops) {
1017                        let warning_message = format!(
1018                            "Reached tool-call limit of {} iterations; pausing autonomous loop",
1019                            runtime.state.constraints.max_tool_loops
1020                        );
1021                        self.record_warning(
1022                            &agent_prefix,
1023                            &mut runtime.state,
1024                            &mut event_recorder,
1025                            warning_message,
1026                        );
1027                        runtime.state.mark_tool_loop_limit_hit();
1028                        break;
1029                    }
1030                    runtime.state.consecutive_idle_turns = 0;
1031                } else {
1032                    runtime.state.reset_tool_loop_guard();
1033                    if forced_continuation {
1034                        runtime.state.consecutive_idle_turns = 0;
1035                    } else if !runtime.state.is_completed {
1036                        runtime.state.consecutive_idle_turns =
1037                            runtime.state.consecutive_idle_turns.saturating_add(1);
1038                        let idle_turn_limit = self.config().agent.idle_turn_limit;
1039                        if runtime.state.consecutive_idle_turns >= idle_turn_limit {
1040                            let warning_message = format!(
1041                                "No tool calls or completion for {} consecutive turns; halting to avoid idle loop",
1042                                runtime.state.consecutive_idle_turns
1043                            );
1044                            self.record_warning(
1045                                &agent_prefix,
1046                                &mut runtime.state,
1047                                &mut event_recorder,
1048                                warning_message,
1049                            );
1050                            runtime.state.outcome = TaskOutcome::StoppedNoAction;
1051                            break;
1052                        }
1053                    }
1054                }
1055
1056                let should_continue = forced_continuation
1057                    || had_tool_call
1058                    || runtime.has_pending_follow_up_inputs()
1059                    || (!runtime.state.is_completed && (turn + 1) < self.max_turns);
1060
1061                if !should_continue {
1062                    if runtime.state.is_completed {
1063                        runtime.state.outcome = TaskOutcome::Success;
1064                    } else if (turn + 1) >= self.max_turns {
1065                        runtime.state.outcome =
1066                            TaskOutcome::turn_limit_reached(self.max_turns, turn + 1);
1067                    } else {
1068                        runtime.state.outcome = TaskOutcome::StoppedNoAction;
1069                    }
1070                    break;
1071                }
1072            }
1073
1074            runtime.state.finalize_outcome(self.max_turns);
1075
1076            let total_duration_ms = run_started_at.elapsed().as_millis();
1077
1078            // Agent execution completed
1079            self.runner_println(format_args!("{} Done", agent_prefix));
1080
1081            // Generate meaningful summary based on agent actions
1082            let average_turn_duration_ms = if runtime.state.turn_count > 0 {
1083                Some(runtime.state.turn_total_ms as f64 / runtime.state.turn_count as f64)
1084            } else {
1085                None
1086            };
1087
1088            let max_turn_duration_ms = if runtime.state.turn_count > 0 {
1089                Some(runtime.state.turn_max_ms)
1090            } else {
1091                None
1092            };
1093
1094            let outcome = runtime.state.outcome.clone();
1095            self.thread_handle
1096                .replace_messages(runtime.state.messages.clone());
1097            let summary = self.generate_task_summary(
1098                &effective_task,
1099                &runtime.state.modified_files,
1100                &runtime.state.executed_commands,
1101                &runtime.state.warnings,
1102                &runtime.state.messages,
1103                runtime.state.stats.turns_executed,
1104                runtime.state.max_tool_loop_streak,
1105                max_tool_loops,
1106                outcome,
1107                total_duration_ms,
1108                average_turn_duration_ms,
1109                max_turn_duration_ms,
1110            );
1111
1112            if !summary.trim().is_empty() {
1113                // Record summary as agent message for event stream
1114                event_recorder.agent_message(&summary);
1115                // Also display summary prominently for immediate visibility in TUI transcript
1116                self.runner_println(format_args!(
1117                    "\n{} Agent Task Summary\n{}",
1118                    style("[Task]").cyan().bold(),
1119                    summary
1120                ));
1121            }
1122
1123            let runtime_agent_config = self.core_agent_config();
1124            if let Err(err) = crate::persistent_memory::finalize_persistent_memory(
1125                &runtime_agent_config,
1126                Some(self.config()),
1127                &runtime.state.messages,
1128            )
1129            .await
1130            {
1131                warn!(
1132                    error = %err,
1133                    session_id = %self.session_id,
1134                    "Failed to update persistent memory"
1135                );
1136            }
1137
1138            if runtime.state.outcome.is_hard_block() || should_write_blocked_handoff {
1139                let relevant_paths = existing_harness_artifact_paths(&self._workspace);
1140                match write_blocked_handoff(
1141                    &self._workspace,
1142                    &self.session_id,
1143                    runtime.state.outcome.code(),
1144                    &runtime.state.outcome.description(),
1145                    &relevant_paths,
1146                ) {
1147                    Ok(artifacts) => emit_blocked_handoff_events(
1148                        &mut event_recorder,
1149                        &artifacts.current_path,
1150                        &artifacts.archive_path,
1151                    ),
1152                    Err(err) => warn!(
1153                        error = %err,
1154                        session_id = %self.session_id,
1155                        "Failed to persist blocked handoff"
1156                    ),
1157                }
1158            }
1159
1160            let total_usage = runtime.state.stats.total_usage.clone();
1161            record_terminal_turn_event(
1162                &mut event_recorder,
1163                &runtime.state.outcome,
1164                total_usage.clone(),
1165            );
1166            event_recorder.thread_completed(
1167                &self.session_id,
1168                runtime.state.outcome.thread_completion_subtype(),
1169                runtime.state.outcome.code(),
1170                runtime
1171                    .state
1172                    .outcome
1173                    .is_success()
1174                    .then_some(summary.as_str()),
1175                runtime.state.stop_reason.as_deref(),
1176                total_usage,
1177                runtime
1178                    .state
1179                    .total_cost_usd
1180                    .and_then(serde_json::Number::from_f64),
1181                runtime.state.stats.turns_executed,
1182            );
1183            let thread_events = event_recorder.into_events();
1184            let steering_receiver = runtime.take_steering_receiver();
1185            let state = std::mem::replace(
1186                &mut runtime.state,
1187                AgentSessionState::new(
1188                    self.session_id.clone(),
1189                    self.max_turns,
1190                    max_tool_loops,
1191                    max_context_tokens,
1192                ),
1193            );
1194
1195            Ok((
1196                state.into_results(summary, thread_events, total_duration_ms),
1197                steering_receiver,
1198            ))
1199        };
1200
1201        let result = match result {
1202            Ok((task_results, steering_receiver)) => {
1203                *self.steering_receiver.lock() = steering_receiver;
1204                Ok(task_results)
1205            }
1206            Err(err) => {
1207                *self.steering_receiver.lock() = runtime.take_steering_receiver();
1208                Err(err)
1209            }
1210        };
1211
1212        self.tool_registry.set_harness_task(None);
1213        result
1214    }
1215}
1216
1217#[cfg(test)]
1218mod tests {
1219    use super::{
1220        prepare_responses_request_messages, record_terminal_turn_event, tool_loop_limit_reached,
1221    };
1222    use crate::core::agent::events::ExecEventRecorder;
1223    use crate::core::agent::session::AgentSessionState;
1224    use crate::core::agent::task::TaskOutcome;
1225    use crate::exec::events::ThreadEvent;
1226    use crate::llm::provider::Message;
1227
1228    #[test]
1229    fn failed_outcome_emits_only_turn_failed() {
1230        let mut recorder = ExecEventRecorder::new("thread", None, None);
1231        recorder.turn_started();
1232
1233        record_terminal_turn_event(
1234            &mut recorder,
1235            &TaskOutcome::Failed {
1236                reason: "boom".to_string(),
1237            },
1238            Default::default(),
1239        );
1240
1241        let events = recorder.into_events();
1242        assert_eq!(
1243            events
1244                .iter()
1245                .filter(|event| matches!(event, ThreadEvent::TurnFailed(_)))
1246                .count(),
1247            1
1248        );
1249        assert_eq!(
1250            events
1251                .iter()
1252                .filter(|event| matches!(event, ThreadEvent::TurnCompleted(_)))
1253                .count(),
1254            0
1255        );
1256    }
1257
1258    #[test]
1259    fn successful_outcome_emits_only_turn_completed() {
1260        let mut recorder = ExecEventRecorder::new("thread", None, None);
1261        recorder.turn_started();
1262
1263        record_terminal_turn_event(&mut recorder, &TaskOutcome::Success, Default::default());
1264
1265        let events = recorder.into_events();
1266        assert_eq!(
1267            events
1268                .iter()
1269                .filter(|event| matches!(event, ThreadEvent::TurnCompleted(_)))
1270                .count(),
1271            1
1272        );
1273        assert_eq!(
1274            events
1275                .iter()
1276                .filter(|event| matches!(event, ThreadEvent::TurnFailed(_)))
1277                .count(),
1278            0
1279        );
1280    }
1281
1282    #[test]
1283    fn disabled_tool_loop_limit_never_trips() {
1284        assert!(!tool_loop_limit_reached(1, 0));
1285        assert!(!tool_loop_limit_reached(32, 0));
1286    }
1287
1288    #[test]
1289    fn openai_prepare_responses_request_messages_uses_incremental_suffix() {
1290        let mut state = AgentSessionState::new("session".to_string(), 4, 4, 16_000);
1291        let prior_messages = vec![Message::user("hello".to_string())];
1292        let current_messages = vec![
1293            Message::user("hello".to_string()),
1294            Message::user("continue".to_string()),
1295        ];
1296        state.set_previous_response_chain("openai", "gpt-5.4", Some("resp_123"), prior_messages);
1297
1298        let (request_messages, previous_response_id) = prepare_responses_request_messages(
1299            &mut state.previous_response_chains,
1300            "openai",
1301            false,
1302            "gpt-5.4",
1303            &current_messages,
1304        );
1305
1306        assert_eq!(previous_response_id.as_deref(), Some("resp_123"));
1307        assert_eq!(
1308            request_messages.as_ref(),
1309            [Message::user("continue".to_string())].as_slice()
1310        );
1311    }
1312
1313    #[test]
1314    fn gemini_prepare_responses_request_messages_keeps_full_history() {
1315        let mut state = AgentSessionState::new("session".to_string(), 4, 4, 16_000);
1316        let prior_messages = vec![Message::user("hello".to_string())];
1317        let current_messages = vec![
1318            Message::user("hello".to_string()),
1319            Message::user("continue".to_string()),
1320        ];
1321        state.set_previous_response_chain(
1322            "gemini",
1323            "gemini-2.5-pro",
1324            Some("resp_123"),
1325            prior_messages,
1326        );
1327
1328        let (request_messages, previous_response_id) = prepare_responses_request_messages(
1329            &mut state.previous_response_chains,
1330            "gemini",
1331            false,
1332            "gemini-2.5-pro",
1333            &current_messages,
1334        );
1335
1336        assert_eq!(previous_response_id.as_deref(), Some("resp_123"));
1337        assert_eq!(request_messages.as_ref(), current_messages.as_slice());
1338    }
1339
1340    #[test]
1341    fn compatible_prepare_responses_request_messages_uses_incremental_suffix() {
1342        let mut state = AgentSessionState::new("session".to_string(), 4, 4, 16_000);
1343        let prior_messages = vec![Message::user("hello".to_string())];
1344        let current_messages = vec![
1345            Message::user("hello".to_string()),
1346            Message::user("continue".to_string()),
1347        ];
1348        state.set_previous_response_chain("mycorp", "gpt-5.4", Some("resp_123"), prior_messages);
1349
1350        let (request_messages, previous_response_id) = prepare_responses_request_messages(
1351            &mut state.previous_response_chains,
1352            "mycorp",
1353            true,
1354            "gpt-5.4",
1355            &current_messages,
1356        );
1357
1358        assert_eq!(previous_response_id.as_deref(), Some("resp_123"));
1359        assert_eq!(
1360            request_messages.as_ref(),
1361            [Message::user("continue".to_string())].as_slice()
1362        );
1363    }
1364}