1use super::AgentRunner;
2use super::continuation::{
3 CompletionAssessment, ContinuationController, VerificationResult, is_review_like_task,
4};
5use super::helpers::detect_textual_exec_tool_call;
6use super::orchestration::EvaluatorGateOutcome;
7use super::prompt_alignment;
8use crate::config::build_openai_prompt_cache_key;
9use crate::config::constants::tools;
10use crate::config::models::{ModelId, Provider as ModelProvider};
11use crate::config::tool_loop_limit_reached;
12use crate::config::types::{ReasoningEffortLevel, SystemPromptMode, VerbosityLevel};
13use crate::core::agent::blocked_handoff::write_blocked_handoff;
14use crate::core::agent::completion::{check_completion_candidate, check_for_response_loop};
15use crate::core::agent::conversation::{
16 build_conversation, build_messages_from_conversation, conversation_from_messages,
17};
18use crate::core::agent::events::ExecEventRecorder;
19use crate::core::agent::harness_artifacts::existing_harness_artifact_paths;
20use crate::core::agent::harness_kernel::{
21 HarnessRequestPlanInput, SessionToolCatalogSnapshot, build_harness_request_plan,
22};
23use crate::core::agent::runtime::{AgentRuntime, RuntimeControl};
24use crate::core::agent::session::AgentSessionState;
25use crate::core::agent::task::{ContextItem, Task, TaskOutcome, TaskResults};
26use crate::exec::events::HarnessEventKind;
27use crate::llm::model_resolver::ModelResolver;
28use crate::llm::provider::{
29 FinishReason, Message, ResponsesContinuationState, ToolCall, ToolDefinition,
30 prepare_responses_continuation_request, responses_continuation_key,
31 supports_responses_chaining,
32};
33use crate::llm::providers::gemini::wire::Part;
34use crate::prompts::{
35 PromptContext, RuntimePromptContract, append_runtime_mode_sections,
36 append_runtime_tool_prompt_sections, upsert_harness_limits_section,
37};
38use crate::utils::colors::style;
39use anyhow::Result;
40use serde_json::json;
41use std::sync::Arc;
42use tracing::{debug, warn};
43
44fn record_terminal_turn_event(
45 event_recorder: &mut ExecEventRecorder,
46 outcome: &TaskOutcome,
47 usage: vtcode_exec_events::Usage,
48) {
49 if outcome.is_success() {
50 event_recorder.record_thread_event(vtcode_exec_events::ThreadEvent::TurnCompleted(
51 vtcode_exec_events::TurnCompletedEvent { usage },
52 ));
53 } else {
54 event_recorder.record_thread_event(vtcode_exec_events::ThreadEvent::TurnFailed(
55 vtcode_exec_events::TurnFailedEvent {
56 message: outcome.description(),
57 usage: Some(usage),
58 },
59 ));
60 }
61}
62
63fn emit_blocked_handoff_events(
64 event_recorder: &mut ExecEventRecorder,
65 current_path: &std::path::Path,
66 archive_path: &std::path::Path,
67) {
68 for path in [current_path, archive_path] {
69 event_recorder.harness_event(
70 HarnessEventKind::BlockedHandoffWritten,
71 Some("Blocked handoff written".to_string()),
72 None,
73 Some(path.display().to_string()),
74 None,
75 );
76 }
77}
78
79fn summarize_verification_output(result: &serde_json::Value) -> String {
80 result
81 .get("output")
82 .and_then(serde_json::Value::as_str)
83 .or_else(|| result.get("stderr").and_then(serde_json::Value::as_str))
84 .or_else(|| result.get("stdout").and_then(serde_json::Value::as_str))
85 .map(str::trim)
86 .filter(|text| !text.is_empty())
87 .map(|text| {
88 let truncated = text
89 .lines()
90 .take(20)
91 .collect::<Vec<_>>()
92 .join("\n")
93 .trim()
94 .to_string();
95 if truncated.len() < text.len() {
96 format!("{truncated}\n...")
97 } else {
98 truncated
99 }
100 })
101 .unwrap_or_default()
102}
103
104fn prepare_responses_request_messages<'a>(
105 previous_chains: &mut hashbrown::HashMap<(String, String), ResponsesContinuationState>,
106 provider_name: &str,
107 provider_supports_responses_compaction: bool,
108 model: &str,
109 messages: &'a [Message],
110) -> (std::borrow::Cow<'a, [Message]>, Option<String>) {
111 let key = responses_continuation_key(provider_name, model);
112 let continuation = key.as_ref().and_then(|k| previous_chains.get(k));
113 let prepared = prepare_responses_continuation_request(
114 provider_name,
115 provider_supports_responses_compaction,
116 messages,
117 continuation,
118 );
119 if prepared.clear_stale_chain
120 && let Some(key) = key
121 {
122 previous_chains.remove(&key);
123 }
124
125 (prepared.messages, prepared.previous_response_id)
126}
127
128fn stop_reason_from_finish_reason(finish_reason: &FinishReason) -> String {
129 match finish_reason {
130 FinishReason::Stop => "end_turn".to_string(),
131 FinishReason::Length => "max_tokens".to_string(),
132 FinishReason::ToolCalls => "tool_calls".to_string(),
133 FinishReason::ContentFilter => "content_filter".to_string(),
134 FinishReason::Pause => "pause_turn".to_string(),
135 FinishReason::Refusal => "refusal".to_string(),
136 FinishReason::Error(message) => message.clone(),
137 }
138}
139
140fn estimate_session_cost_usd(
141 provider: &str,
142 model: &str,
143 usage: &vtcode_exec_events::Usage,
144) -> Option<f64> {
145 let usage = crate::llm::provider::Usage {
146 prompt_tokens: u32::try_from(usage.input_tokens).unwrap_or(u32::MAX),
147 completion_tokens: u32::try_from(usage.output_tokens).unwrap_or(u32::MAX),
148 total_tokens: u32::try_from(usage.input_tokens.saturating_add(usage.output_tokens))
149 .unwrap_or(u32::MAX),
150 cached_prompt_tokens: Some(u32::try_from(usage.cached_input_tokens).unwrap_or(u32::MAX)),
151 cache_creation_tokens: Some(u32::try_from(usage.cache_creation_tokens).unwrap_or(u32::MAX)),
152 cache_read_tokens: Some(u32::try_from(usage.cached_input_tokens).unwrap_or(u32::MAX)),
153 iterations: None,
154 };
155 let resolved = ModelResolver::resolve(Some(provider), model, &[], None)?;
156 let pricing = resolved.pricing()?;
157 ModelResolver::estimate_cost(pricing, &usage)
158}
159
160struct RuntimePromptBundle {
161 system_instruction: Arc<String>,
162 tool_snapshot: SessionToolCatalogSnapshot,
163 request_tools: Option<Arc<Vec<ToolDefinition>>>,
164}
165
166impl AgentRunner {
167 async fn compose_task_system_prompt(
168 &self,
169 prompt_tools: Arc<Vec<ToolDefinition>>,
170 is_simple_task: bool,
171 ) -> Result<String> {
172 if !is_simple_task {
173 return Ok(self.system_prompt.clone());
174 }
175
176 let mut config = self.config().clone();
177 config.agent.system_prompt_mode = SystemPromptMode::Minimal;
178 let mut prompt_context = PromptContext::from_workspace_tools(
179 self._workspace.as_path(),
180 prompt_tools
181 .iter()
182 .map(|tool| tool.function_name().to_string()),
183 );
184 prompt_context.load_available_skills();
185
186 let prompt = super::helpers::compose_system_prompt_with_appendix(
187 self._workspace.as_path(),
188 &config,
189 &prompt_context,
190 )
191 .await?;
192
193 Ok(prompt)
194 }
195
196 async fn build_runtime_prompt_bundle(
197 &self,
198 is_simple_task: bool,
199 ) -> Result<RuntimePromptBundle> {
200 let tool_snapshot = self.build_universal_tool_snapshot().await?;
201 let request_tools = tool_snapshot.snapshot.clone();
202 let prompt_tools = request_tools
203 .clone()
204 .unwrap_or_else(|| Arc::new(Vec::new()));
205 let mut system_prompt = self
206 .compose_task_system_prompt(prompt_tools, is_simple_task)
207 .await?;
208 self.append_active_primary_agent_context(&mut system_prompt);
209
210 let planning_active = self.tool_registry.is_planning_active();
211 let request_user_input_enabled = self
212 .features()
213 .request_user_input_enabled(planning_active, false);
214 let full_auto_active = self
215 .tool_registry
216 .current_full_auto_allowlist()
217 .await
218 .is_some();
219
220 append_runtime_mode_sections(
221 &mut system_prompt,
222 RuntimePromptContract {
223 full_auto: full_auto_active,
224 planning_active,
225 request_user_input_enabled,
226 },
227 );
228 upsert_harness_limits_section(
229 &mut system_prompt,
230 self.config().agent.harness.max_tool_calls_per_turn,
231 self.config().agent.harness.max_tool_wall_clock_secs,
232 self.config().agent.harness.max_tool_retries,
233 );
234 append_runtime_tool_prompt_sections(&mut system_prompt, &tool_snapshot, true);
235
236 Ok(RuntimePromptBundle {
237 system_instruction: Arc::new(system_prompt),
238 tool_snapshot,
239 request_tools,
240 })
241 }
242
243 fn append_active_primary_agent_context(&self, system_prompt: &mut String) {
244 let Some(active_primary_agent) = self.active_primary_agent.as_ref() else {
245 return;
246 };
247
248 system_prompt.push_str("\n\n## Active Primary Agent Runtime State\n");
249 system_prompt.push_str("- Active agent: ");
250 system_prompt.push_str(&active_primary_agent.display_name);
251 system_prompt.push_str("\n- Spec name: ");
252 system_prompt.push_str(&active_primary_agent.identity.name);
253 if let Some(model) = active_primary_agent
254 .model
255 .as_deref()
256 .map(str::trim)
257 .filter(|model| !model.is_empty() && !model.eq_ignore_ascii_case("inherit"))
258 {
259 system_prompt.push_str("\n- Agent model: ");
260 system_prompt.push_str(model);
261 }
262 if let Some(reasoning_effort) = active_primary_agent
263 .reasoning_effort
264 .as_deref()
265 .map(str::trim)
266 .filter(|reasoning_effort| !reasoning_effort.is_empty())
267 {
268 system_prompt.push_str("\n- Agent reasoning effort: ");
269 system_prompt.push_str(reasoning_effort);
270 }
271 system_prompt.push_str("\n\n## Active Primary Agent Instructions\n");
272 system_prompt.push_str(active_primary_agent.instructions.trim());
273 }
274
275 async fn build_validated_runtime_prompt_bundle(
276 &self,
277 is_simple_task: bool,
278 ) -> Result<RuntimePromptBundle> {
279 let mut runner = self;
280 let bundle = runner.build_runtime_prompt_bundle(is_simple_task).await?;
281 prompt_alignment::rebuild_once_on_alignment_mismatch(
282 &mut runner,
283 bundle,
284 |runner| Box::pin((*runner).build_runtime_prompt_bundle(is_simple_task)),
285 |runner, bundle| {
286 let planning_active = runner.tool_registry.is_planning_active();
287 let request_user_input_enabled = runner
288 .features()
289 .request_user_input_enabled(planning_active, false);
290 prompt_alignment::validate_prompt_catalog_alignment(
291 &bundle.system_instruction,
292 &bundle.tool_snapshot,
293 planning_active,
294 request_user_input_enabled,
295 )
296 },
297 "prompt/catalog alignment mismatch; rebuilding runtime prompt bundle",
298 "prompt/catalog alignment mismatch persisted after rebuild",
299 )
300 .await
301 }
302
303 async fn refresh_runtime_prompt_bundle_if_catalog_changed(
304 &self,
305 bundle: &mut RuntimePromptBundle,
306 is_simple_task: bool,
307 ) -> Result<bool> {
308 let current_version = self.tool_registry.tool_catalog_state().current_version();
309 if current_version == bundle.tool_snapshot.version {
310 return Ok(false);
311 }
312
313 debug!(
314 old_version = bundle.tool_snapshot.version,
315 new_version = current_version,
316 "Tool catalog changed mid-task; refreshing runtime prompt bundle"
317 );
318 *bundle = self
319 .build_validated_runtime_prompt_bundle(is_simple_task)
320 .await?;
321 Ok(true)
322 }
323
324 async fn resolve_completion_acceptance(
325 &mut self,
326 effective_task: &Task,
327 session_state: &mut AgentSessionState,
328 event_recorder: &mut ExecEventRecorder,
329 orchestration_enabled: bool,
330 verification_results: &[VerificationResult],
331 revision_rounds_used: &mut usize,
332 max_revision_rounds: usize,
333 should_write_blocked_handoff: &mut bool,
334 ) -> Result<bool> {
335 if !orchestration_enabled {
336 session_state.is_completed = true;
337 session_state.outcome = TaskOutcome::Success;
338 return Ok(true);
339 }
340
341 match self
342 .apply_evaluator_gate(
343 effective_task,
344 session_state,
345 event_recorder,
346 verification_results,
347 revision_rounds_used,
348 max_revision_rounds,
349 )
350 .await?
351 {
352 EvaluatorGateOutcome::Accept => {
353 session_state.is_completed = true;
354 session_state.outcome = TaskOutcome::Success;
355 Ok(true)
356 }
357 EvaluatorGateOutcome::Continue { prompt } => {
358 session_state.add_user_message(prompt);
359 Ok(false)
360 }
361 EvaluatorGateOutcome::Exhausted { reason } => {
362 session_state.outcome = TaskOutcome::Failed { reason };
363 *should_write_blocked_handoff = true;
364 Ok(true)
365 }
366 }
367 }
368
369 async fn run_verification_commands(
370 &self,
371 commands: &[String],
372 event_recorder: &mut ExecEventRecorder,
373 ) -> Result<Vec<VerificationResult>> {
374 let mut results = Vec::with_capacity(commands.len());
375 for command in commands {
376 let command_event = event_recorder.command_started(command);
377 let payload = json!({
378 "action": "run",
379 "command": command,
380 "workdir": self._workspace.display().to_string(),
381 "yield_time_ms": 1000,
382 });
383 let result = self
384 .tool_registry
385 .execute_harness_unified_exec(payload)
386 .await?;
387 let exit_code = result
388 .get("exit_code")
389 .and_then(serde_json::Value::as_i64)
390 .map(|value| value as i32);
391 let success = exit_code.unwrap_or(0) == 0;
392 let output = summarize_verification_output(&result);
393 event_recorder.command_finished(
394 &command_event,
395 if success {
396 crate::exec::events::CommandExecutionStatus::Completed
397 } else {
398 crate::exec::events::CommandExecutionStatus::Failed
399 },
400 exit_code,
401 &output,
402 );
403 results.push(VerificationResult {
404 command: command.clone(),
405 success,
406 exit_code,
407 output,
408 });
409 if !success {
410 break;
411 }
412 }
413 Ok(results)
414 }
415
416 pub async fn execute_task(
418 &mut self,
419 task: &Task,
420 contexts: &[ContextItem],
421 ) -> Result<TaskResults> {
422 self.tool_registry
424 .set_harness_session(self.session_id.clone());
425 self.tool_registry.set_harness_task(Some(task.id.clone()));
426
427 let steering_receiver = self.steering_receiver.lock().take();
428 let runtime_setup = {
429 let agent_prefix = format!("[{}]", self.agent_type);
431 let mut event_recorder = ExecEventRecorder::new(
433 self.session_id.clone(),
434 self.event_sink.clone(),
435 Some(self.thread_handle.clone()),
436 );
437 event_recorder.turn_started();
438 self.runner_println(format_args!(
439 "{agent_prefix} Analyzing request and planning approach..."
440 ));
441
442 self.runner_println(format_args!(
443 "{} Executing {} task: {}",
444 style("[AGENT]").magenta().bold().on_black(),
445 self.agent_type,
446 task.title
447 ));
448
449 let run_started_at = std::time::Instant::now();
450 let is_simple_task = Self::is_simple_task(task, contexts);
451 let prompt_bundle = self
452 .build_validated_runtime_prompt_bundle(is_simple_task)
453 .await?;
454
455 let review_like = is_review_like_task(task);
456 let full_auto_active = self
457 .tool_registry
458 .current_full_auto_allowlist()
459 .await
460 .is_some();
461
462 let mut conversation = conversation_from_messages(&self.bootstrap_messages);
463 conversation.extend(build_conversation(task, contexts));
464
465 let conversation_messages = build_messages_from_conversation(&conversation);
468
469 let max_tool_loops = self.config().tools.max_tool_loops;
472 let preserve_recent_turns = self.config().context.preserve_recent_turns;
473 let max_context_tokens = self.config().context.max_context_tokens;
474
475 let mut session_state = AgentSessionState::new(
476 self.session_id.clone(),
477 self.max_turns,
478 max_tool_loops,
479 max_context_tokens,
480 );
481 session_state.conversation = conversation;
482 session_state.messages = conversation_messages;
483 session_state.last_processed_message_idx = session_state.conversation.len();
484
485 let mut runtime = AgentRuntime::new(session_state, None, steering_receiver);
486
487 if let Err(err) = self.tool_registry.initialize_async().await {
488 warn!(
489 error = %err,
490 "Tool registry initialization failed at task start"
491 );
492 runtime
493 .state
494 .warnings
495 .push(format!("Tool registry init failed: {err}"));
496 }
497 let orchestration_enabled =
498 self.harness_plan_build_evaluate_enabled(full_auto_active, review_like);
499 let planner_artifacts = if orchestration_enabled {
500 Some(self.run_planner_phase(task, &mut event_recorder).await?)
501 } else {
502 None
503 };
504 let effective_task = planner_artifacts
505 .as_ref()
506 .map(|artifacts| self.augment_generator_task(task, artifacts))
507 .unwrap_or_else(|| task.clone());
508
509 let mut continuation_controller = ContinuationController::new(
510 self._workspace.clone(),
511 self.tool_registry.planning_workflow_state(),
512 self.config().agent.harness.continuation_policy.clone(),
513 full_auto_active,
514 self.tool_registry.is_planning_active(),
515 review_like,
516 );
517 continuation_controller.prepare(&effective_task).await?;
518
519 (
520 agent_prefix,
521 event_recorder,
522 run_started_at,
523 is_simple_task,
524 prompt_bundle,
525 preserve_recent_turns,
526 max_tool_loops,
527 max_context_tokens,
528 runtime,
529 continuation_controller,
530 effective_task,
531 orchestration_enabled,
532 )
533 };
534
535 let (
536 agent_prefix,
537 mut event_recorder,
538 run_started_at,
539 is_simple_task,
540 mut prompt_bundle,
541 preserve_recent_turns,
542 max_tool_loops,
543 max_context_tokens,
544 mut runtime,
545 mut continuation_controller,
546 effective_task,
547 orchestration_enabled,
548 ) = runtime_setup;
549 let mut cost_warning_emitted = false;
550 let max_budget_usd = self.config().agent.harness.max_budget_usd;
551 let max_revision_rounds = self.config().agent.harness.max_revision_rounds.max(1);
552 let mut revision_rounds_used = 0usize;
553 let mut should_write_blocked_handoff = false;
554
555 let result = {
556 for turn in 0..self.max_turns {
557 if matches!(
558 runtime.poll_turn_control().await,
559 RuntimeControl::StopRequested
560 ) {
561 self.runner_println(format_args!(
562 "{} {}",
563 agent_prefix,
564 style("Stopped by steering signal.").red().bold()
565 ));
566 runtime.state.outcome = TaskOutcome::Cancelled;
567 break;
568 }
569
570 if let Some(input) = runtime.run_until_idle() {
571 self.runner_println(format_args!(
572 "{} {}: {}",
573 agent_prefix,
574 style("Follow-up Input").cyan().bold(),
575 input
576 ));
577 }
578
579 let utilization = runtime.state.utilization();
580 if utilization > 0.90 {
581 warn!("Context at {:.1}% - approaching limit", utilization * 100.0);
582 runtime.state.warnings.push(format!(
583 "Token budget at {}% - approaching context limit",
584 (utilization * 100.0) as u32
585 ));
586 }
587
588 if runtime.state.is_completed {
589 runtime.state.outcome = TaskOutcome::Success;
590 break;
591 }
592
593 self.runner_println(format_args!(
594 "{} {} is processing turn {}...",
595 agent_prefix,
596 style("(PROC)").cyan().bold(),
597 turn + 1
598 ));
599
600 let turn_model = self.get_selected_model();
601 let provider_name = self.provider_client.name().to_string();
602 if std::env::var_os("VTCODE_DEBUG_PROVIDER").is_some() {
603 tracing::debug!(
604 provider_client = self.provider_client.name(),
605 turn_model,
606 "Provider debug turn selection"
607 );
608 }
609 let turn_reasoning = if is_simple_task {
610 Some(ReasoningEffortLevel::Minimal)
611 } else {
612 self.reasoning_effort
613 };
614 let turn_verbosity = if is_simple_task {
615 Some(VerbosityLevel::Low)
616 } else {
617 self.verbosity
618 };
619 let max_tokens = if is_simple_task {
620 Some(800)
621 } else {
622 Some(2000)
623 };
624
625 self.summarize_conversation_if_needed(
626 &mut runtime.state,
627 preserve_recent_turns,
628 utilization,
629 );
630
631 let parallel_tool_config = if self.model.len() < 20 {
632 None
633 } else if self
634 .provider_client
635 .supports_parallel_tool_config(&turn_model)
636 {
637 Some(Box::new(
638 crate::llm::provider::ParallelToolConfig::anthropic_optimized(),
639 ))
640 } else {
641 None
642 };
643
644 let provider_kind = turn_model
645 .parse::<ModelId>()
646 .map(|model| model.provider())
647 .unwrap_or(ModelProvider::Gemini);
648
649 if matches!(provider_kind, ModelProvider::Gemini)
650 && runtime.state.conversation.len() > runtime.state.last_processed_message_idx
651 {
652 for content in
653 &runtime.state.conversation[runtime.state.last_processed_message_idx..]
654 {
655 let mut text = String::new();
656 for part in &content.parts {
657 if let Part::Text {
658 text: part_text, ..
659 } = part
660 {
661 if !text.is_empty() {
662 text.push('\n');
663 }
664 text.push_str(part_text);
665 }
666 }
667 let message = match content.role.as_str() {
668 "model" => Message::assistant(text),
669 _ => Message::user(text),
670 };
671 runtime.state.messages.push(message);
672 }
673 runtime.state.last_processed_message_idx = runtime.state.conversation.len();
674 }
675
676 let reasoning_effort =
677 if self.provider_client.supports_reasoning_effort(&turn_model) {
678 turn_reasoning
679 } else {
680 None
681 };
682 let temperature = if reasoning_effort.is_some()
683 && matches!(
684 provider_kind,
685 ModelProvider::Anthropic | ModelProvider::Minimax
686 ) {
687 None
688 } else {
689 Some(0.7)
690 };
691
692 let (request_messages, previous_response_id) = prepare_responses_request_messages(
693 &mut runtime.state.previous_response_chains,
694 &provider_name,
695 self.provider_client
696 .supports_responses_compaction(&turn_model),
697 &turn_model,
698 &runtime.state.messages,
699 );
700
701 let request = build_harness_request_plan(HarnessRequestPlanInput {
702 messages: request_messages.into_owned(),
703 system_prompt: prompt_bundle.system_instruction.as_ref().clone(),
704 tools: prompt_bundle.request_tools.clone(),
705 model: turn_model.clone(),
706 max_tokens,
707 temperature,
708 stream: self.provider_client.supports_streaming(),
709 tool_choice: None,
710 parallel_tool_config,
711 reasoning_effort,
712 verbosity: turn_verbosity,
713 metadata: None,
714 context_management: None,
715 previous_response_id,
716 prompt_cache_key: build_openai_prompt_cache_key(
717 provider_name.eq_ignore_ascii_case("openai")
718 && self.config().prompt_cache.enabled
719 && self.config().prompt_cache.providers.openai.enabled,
720 &self
721 .config()
722 .prompt_cache
723 .providers
724 .openai
725 .prompt_cache_key_mode,
726 Some(&self.session_id),
727 ),
728 prompt_cache_profile: None,
729 tool_catalog_hash: prompt_bundle.tool_snapshot.tool_catalog_hash,
730 })
731 .request;
732 self.validate_llm_request(&request)?;
736 let previous_response_chain_present = request.previous_response_id.is_some();
737 let sent_messages = request.messages.clone();
741 let streaming_timeout = self
744 .config()
745 .timeouts
746 .ceiling_duration(self.config().timeouts.streaming_ceiling_seconds);
747
748 let turn_output = runtime
749 .run_turn_once(&mut self.provider_client, request, streaming_timeout)
750 .await?;
751 event_recorder.record_thread_events(runtime.take_emitted_events());
752 let response = turn_output.response;
753 runtime.state.stop_reason =
754 Some(stop_reason_from_finish_reason(&response.finish_reason));
755 if supports_responses_chaining(
756 &provider_name,
757 self.provider_client
758 .supports_responses_compaction(&turn_model),
759 ) {
760 runtime.state.set_previous_response_chain(
761 &provider_name,
762 &turn_model,
763 response.request_id.as_deref(),
764 sent_messages,
765 );
766 }
767 match estimate_session_cost_usd(
768 self.config().agent.provider.as_str(),
769 &turn_model,
770 &runtime.state.stats.total_usage,
771 ) {
772 Some(total_cost_usd) => {
773 runtime.state.total_cost_usd = Some(total_cost_usd);
774 if let Some(max_budget_usd) = max_budget_usd
775 && total_cost_usd > max_budget_usd
776 {
777 runtime.state.outcome =
778 TaskOutcome::budget_limit_reached(max_budget_usd, total_cost_usd);
779 break;
780 }
781 }
782 None => {
783 runtime.state.total_cost_usd = None;
784 if max_budget_usd.is_some() && !cost_warning_emitted {
785 cost_warning_emitted = true;
786 let warning_message = format!(
787 "Budget enforcement disabled for model `{turn_model}` because pricing metadata is unavailable"
788 );
789 warn!(
790 provider = %self.config().agent.provider,
791 model = %turn_model,
792 "Budget enforcement disabled because pricing metadata is unavailable"
793 );
794 runtime.state.warnings.push(warning_message);
795 }
796 }
797 }
798 self.runner_println(format_args!(
799 "{} {} {} received response, processing...",
800 agent_prefix,
801 self.agent_type,
802 style("(RECV)").green().bold()
803 ));
804
805 self.warn_on_empty_response(
806 &agent_prefix,
807 response.content.as_deref().unwrap_or(""),
808 response
809 .tool_calls
810 .as_ref()
811 .is_some_and(|tool_calls| !tool_calls.is_empty()),
812 );
813
814 let response_text = response.content_string();
815 if !response_text.trim().is_empty() {
816 self.emit_final_assistant_message(&self.agent_type, &response_text);
817 }
818
819 let mut effective_tool_calls = response.tool_calls.clone();
820 let mut forced_continuation = false;
821
822 if effective_tool_calls.is_none()
823 && response.content_text().len() < 150
824 && let Some(args_value) = detect_textual_exec_tool_call(response.content_text())
825 {
826 effective_tool_calls = Some(vec![ToolCall::function(
827 format!("call_text_{}", turn),
828 tools::UNIFIED_EXEC.to_string(),
829 args_value.to_string(),
830 )]);
831 }
832
833 let is_gemini = matches!(provider_kind, ModelProvider::Gemini);
834
835 if !runtime.state.is_completed
836 && effective_tool_calls
837 .as_ref()
838 .is_none_or(|tool_calls| tool_calls.is_empty())
839 && !response.content_text().is_empty()
840 {
841 if check_for_response_loop(response.content_text(), &mut runtime.state) {
842 self.runner_println(format_args!(
843 "[{}] {}",
844 self.agent_type,
845 style(
846 "Repetitive assistant response detected. Breaking potential loop."
847 )
848 .red()
849 .bold()
850 ));
851 runtime.state.outcome = TaskOutcome::LoopDetected;
852 break;
853 }
854
855 if check_completion_candidate(response.content_text()) {
856 self.runner_println(format_args!(
857 "[{}] {}",
858 self.agent_type,
859 style("Completion candidate detected; checking tracker and verification state.")
860 .green()
861 .bold()
862 ));
863 match continuation_controller
864 .assess_completion(&effective_task, &runtime.state)
865 .await?
866 {
867 CompletionAssessment::Accept => {
868 if self
869 .resolve_completion_acceptance(
870 &effective_task,
871 &mut runtime.state,
872 &mut event_recorder,
873 orchestration_enabled,
874 &[],
875 &mut revision_rounds_used,
876 max_revision_rounds,
877 &mut should_write_blocked_handoff,
878 )
879 .await?
880 {
881 break;
882 }
883 forced_continuation = true;
884 }
885 CompletionAssessment::SkipAccept { reason } => {
886 event_recorder.harness_event(
887 HarnessEventKind::ContinuationSkipped,
888 Some(reason),
889 None,
890 None,
891 None,
892 );
893 runtime.state.is_completed = true;
894 runtime.state.outcome = TaskOutcome::Success;
895 break;
896 }
897 CompletionAssessment::Continue { reason, prompt } => {
898 event_recorder.harness_event(
899 HarnessEventKind::ContinuationStarted,
900 Some(reason),
901 None,
902 None,
903 None,
904 );
905 runtime.state.add_user_message(prompt);
906 forced_continuation = true;
907 }
908 CompletionAssessment::Verify { commands } => {
909 event_recorder.harness_event(
910 HarnessEventKind::VerificationStarted,
911 Some(format!("Running verification: {}", commands.join(", "))),
912 commands.first().cloned(),
913 None,
914 None,
915 );
916 let verification_results = self
917 .run_verification_commands(&commands, &mut event_recorder)
918 .await?;
919 if let Some(failure) =
920 verification_results.iter().find(|result| !result.success)
921 {
922 event_recorder.harness_event(
923 HarnessEventKind::VerificationFailed,
924 Some(format!(
925 "{}{}",
926 match failure.exit_code {
927 Some(code) => format!(
928 "Verification failed: {} (exit code {}).",
929 failure.command, code
930 ),
931 None => format!(
932 "Verification failed: {}.",
933 failure.command
934 ),
935 },
936 if failure.output.trim().is_empty() {
937 String::new()
938 } else {
939 format!("\n{}", failure.output.trim())
940 }
941 )),
942 Some(failure.command.clone()),
943 None,
944 failure.exit_code,
945 );
946 } else {
947 event_recorder.harness_event(
948 HarnessEventKind::VerificationPassed,
949 Some(format!(
950 "Verification passed: {}",
951 commands.join(", ")
952 )),
953 commands.last().cloned(),
954 None,
955 Some(0),
956 );
957 }
958
959 match continuation_controller
960 .after_verification(&verification_results)
961 .await?
962 {
963 CompletionAssessment::Accept
964 | CompletionAssessment::SkipAccept { .. } => {
965 if self
966 .resolve_completion_acceptance(
967 &effective_task,
968 &mut runtime.state,
969 &mut event_recorder,
970 orchestration_enabled,
971 &verification_results,
972 &mut revision_rounds_used,
973 max_revision_rounds,
974 &mut should_write_blocked_handoff,
975 )
976 .await?
977 {
978 break;
979 }
980 forced_continuation = true;
981 }
982 CompletionAssessment::Continue { reason, prompt } => {
983 event_recorder.harness_event(
984 HarnessEventKind::ContinuationStarted,
985 Some(reason),
986 None,
987 None,
988 None,
989 );
990 runtime.state.add_user_message(prompt);
991 forced_continuation = true;
992 }
993 CompletionAssessment::Verify { .. } => {}
994 }
995 }
996 }
997 }
998 }
999
1000 if let Some(tool_calls) = effective_tool_calls
1001 .as_ref()
1002 .filter(|tool_calls| !tool_calls.is_empty())
1003 .cloned()
1004 {
1005 self.execute_tool_call_batches(
1006 tool_calls,
1007 &mut runtime,
1008 &mut event_recorder,
1009 &agent_prefix,
1010 is_gemini,
1011 previous_response_chain_present,
1012 )
1013 .await?;
1014 event_recorder.record_thread_events(runtime.take_emitted_events());
1015 }
1016
1017 let _ = self
1020 .refresh_runtime_prompt_bundle_if_catalog_changed(
1021 &mut prompt_bundle,
1022 is_simple_task,
1023 )
1024 .await?;
1025
1026 let had_effective_shell_tool_call =
1027 effective_tool_calls.as_ref().is_some_and(|calls| {
1028 calls.iter().any(|call| {
1029 call.function
1030 .as_ref()
1031 .map(|function| function.name.as_str())
1032 == Some(tools::UNIFIED_EXEC)
1033 })
1034 });
1035 let had_tool_call = response
1036 .tool_calls
1037 .as_ref()
1038 .is_some_and(|tool_calls| !tool_calls.is_empty())
1039 || had_effective_shell_tool_call;
1040
1041 if had_tool_call {
1042 let loops = runtime.state.register_tool_loop();
1043 if tool_loop_limit_reached(loops, runtime.state.constraints.max_tool_loops) {
1044 let warning_message = format!(
1045 "Reached tool-call limit of {} iterations; pausing autonomous loop",
1046 runtime.state.constraints.max_tool_loops
1047 );
1048 self.record_warning(
1049 &agent_prefix,
1050 &mut runtime.state,
1051 &mut event_recorder,
1052 warning_message,
1053 );
1054 runtime.state.mark_tool_loop_limit_hit();
1055 break;
1056 }
1057 runtime.state.consecutive_idle_turns = 0;
1058 } else {
1059 runtime.state.reset_tool_loop_guard();
1060 if forced_continuation {
1061 runtime.state.consecutive_idle_turns = 0;
1062 } else if !runtime.state.is_completed {
1063 runtime.state.consecutive_idle_turns =
1064 runtime.state.consecutive_idle_turns.saturating_add(1);
1065 let idle_turn_limit = self.config().agent.idle_turn_limit;
1066 if runtime.state.consecutive_idle_turns >= idle_turn_limit {
1067 let warning_message = format!(
1068 "No tool calls or completion for {} consecutive turns; halting to avoid idle loop",
1069 runtime.state.consecutive_idle_turns
1070 );
1071 self.record_warning(
1072 &agent_prefix,
1073 &mut runtime.state,
1074 &mut event_recorder,
1075 warning_message,
1076 );
1077 runtime.state.outcome = TaskOutcome::StoppedNoAction;
1078 break;
1079 }
1080 }
1081 }
1082
1083 let should_continue = forced_continuation
1084 || had_tool_call
1085 || runtime.has_pending_follow_up_inputs()
1086 || (!runtime.state.is_completed && (turn + 1) < self.max_turns);
1087
1088 if !should_continue {
1089 if runtime.state.is_completed {
1090 runtime.state.outcome = TaskOutcome::Success;
1091 } else if (turn + 1) >= self.max_turns {
1092 runtime.state.outcome =
1093 TaskOutcome::turn_limit_reached(self.max_turns, turn + 1);
1094 } else {
1095 runtime.state.outcome = TaskOutcome::StoppedNoAction;
1096 }
1097 break;
1098 }
1099 }
1100
1101 runtime.state.finalize_outcome(self.max_turns);
1102
1103 let total_duration_ms = run_started_at.elapsed().as_millis();
1104
1105 self.runner_println(format_args!("{} Done", agent_prefix));
1107
1108 let average_turn_duration_ms = if runtime.state.turn_count > 0 {
1110 Some(runtime.state.turn_total_ms as f64 / runtime.state.turn_count as f64)
1111 } else {
1112 None
1113 };
1114
1115 let max_turn_duration_ms = if runtime.state.turn_count > 0 {
1116 Some(runtime.state.turn_max_ms)
1117 } else {
1118 None
1119 };
1120
1121 let outcome = runtime.state.outcome.clone();
1122 self.thread_handle
1123 .replace_messages(runtime.state.messages.clone());
1124 let summary = self.generate_task_summary(
1125 &effective_task,
1126 &runtime.state.modified_files,
1127 &runtime.state.executed_commands,
1128 &runtime.state.warnings,
1129 &runtime.state.messages,
1130 runtime.state.stats.turns_executed,
1131 runtime.state.max_tool_loop_streak,
1132 max_tool_loops,
1133 outcome,
1134 total_duration_ms,
1135 average_turn_duration_ms,
1136 max_turn_duration_ms,
1137 );
1138
1139 if !summary.trim().is_empty() {
1140 event_recorder.agent_message(&summary);
1142 self.runner_println(format_args!(
1144 "\n{} Agent Task Summary\n{}",
1145 style("[Task]").cyan().bold(),
1146 summary
1147 ));
1148 }
1149
1150 let runtime_agent_config = self.core_agent_config();
1151 if let Err(err) = crate::persistent_memory::finalize_persistent_memory(
1152 &runtime_agent_config,
1153 Some(self.config()),
1154 &runtime.state.messages,
1155 )
1156 .await
1157 {
1158 warn!(
1159 error = %err,
1160 session_id = %self.session_id,
1161 "Failed to update persistent memory"
1162 );
1163 }
1164
1165 if runtime.state.outcome.is_hard_block() || should_write_blocked_handoff {
1166 let relevant_paths = existing_harness_artifact_paths(&self._workspace);
1167 match write_blocked_handoff(
1168 &self._workspace,
1169 &self.session_id,
1170 runtime.state.outcome.code(),
1171 &runtime.state.outcome.description(),
1172 &relevant_paths,
1173 ) {
1174 Ok(artifacts) => emit_blocked_handoff_events(
1175 &mut event_recorder,
1176 &artifacts.current_path,
1177 &artifacts.archive_path,
1178 ),
1179 Err(err) => warn!(
1180 error = %err,
1181 session_id = %self.session_id,
1182 "Failed to persist blocked handoff"
1183 ),
1184 }
1185 }
1186
1187 let total_usage = runtime.state.stats.total_usage.clone();
1188 record_terminal_turn_event(
1189 &mut event_recorder,
1190 &runtime.state.outcome,
1191 total_usage.clone(),
1192 );
1193 event_recorder.thread_completed(
1194 &self.session_id,
1195 runtime.state.outcome.thread_completion_subtype(),
1196 runtime.state.outcome.code(),
1197 runtime
1198 .state
1199 .outcome
1200 .is_success()
1201 .then_some(summary.as_str()),
1202 runtime.state.stop_reason.as_deref(),
1203 total_usage,
1204 runtime
1205 .state
1206 .total_cost_usd
1207 .and_then(serde_json::Number::from_f64),
1208 runtime.state.stats.turns_executed,
1209 );
1210 let thread_events = event_recorder.into_events();
1211 let steering_receiver = runtime.take_steering_receiver();
1212 let state = std::mem::replace(
1213 &mut runtime.state,
1214 AgentSessionState::new(
1215 self.session_id.clone(),
1216 self.max_turns,
1217 max_tool_loops,
1218 max_context_tokens,
1219 ),
1220 );
1221
1222 Ok((
1223 state.into_results(summary, thread_events, total_duration_ms),
1224 steering_receiver,
1225 ))
1226 };
1227
1228 let result = match result {
1229 Ok((task_results, steering_receiver)) => {
1230 *self.steering_receiver.lock() = steering_receiver;
1231 Ok(task_results)
1232 }
1233 Err(err) => {
1234 *self.steering_receiver.lock() = runtime.take_steering_receiver();
1235 Err(err)
1236 }
1237 };
1238
1239 self.tool_registry.set_harness_task(None);
1240 result
1241 }
1242}
1243
1244#[cfg(test)]
1245mod tests {
1246 use super::{
1247 prepare_responses_request_messages, record_terminal_turn_event, tool_loop_limit_reached,
1248 };
1249 use crate::core::agent::events::ExecEventRecorder;
1250 use crate::core::agent::session::AgentSessionState;
1251 use crate::core::agent::task::TaskOutcome;
1252 use crate::exec::events::ThreadEvent;
1253 use crate::llm::provider::Message;
1254
1255 #[test]
1256 fn failed_outcome_emits_only_turn_failed() {
1257 let mut recorder = ExecEventRecorder::new("thread", None, None);
1258 recorder.turn_started();
1259
1260 record_terminal_turn_event(
1261 &mut recorder,
1262 &TaskOutcome::Failed {
1263 reason: "boom".to_string(),
1264 },
1265 Default::default(),
1266 );
1267
1268 let events = recorder.into_events();
1269 assert_eq!(
1270 events
1271 .iter()
1272 .filter(|event| matches!(event, ThreadEvent::TurnFailed(_)))
1273 .count(),
1274 1
1275 );
1276 assert_eq!(
1277 events
1278 .iter()
1279 .filter(|event| matches!(event, ThreadEvent::TurnCompleted(_)))
1280 .count(),
1281 0
1282 );
1283 }
1284
1285 #[test]
1286 fn successful_outcome_emits_only_turn_completed() {
1287 let mut recorder = ExecEventRecorder::new("thread", None, None);
1288 recorder.turn_started();
1289
1290 record_terminal_turn_event(&mut recorder, &TaskOutcome::Success, Default::default());
1291
1292 let events = recorder.into_events();
1293 assert_eq!(
1294 events
1295 .iter()
1296 .filter(|event| matches!(event, ThreadEvent::TurnCompleted(_)))
1297 .count(),
1298 1
1299 );
1300 assert_eq!(
1301 events
1302 .iter()
1303 .filter(|event| matches!(event, ThreadEvent::TurnFailed(_)))
1304 .count(),
1305 0
1306 );
1307 }
1308
1309 #[test]
1310 fn disabled_tool_loop_limit_never_trips() {
1311 assert!(!tool_loop_limit_reached(1, 0));
1312 assert!(!tool_loop_limit_reached(32, 0));
1313 }
1314
1315 #[test]
1316 fn openai_prepare_responses_request_messages_uses_incremental_suffix() {
1317 let mut state = AgentSessionState::new("session".to_string(), 4, 4, 16_000);
1318 let prior_messages = vec![Message::user("hello".to_string())];
1319 let current_messages = vec![
1320 Message::user("hello".to_string()),
1321 Message::user("continue".to_string()),
1322 ];
1323 state.set_previous_response_chain("openai", "gpt-5.4", Some("resp_123"), prior_messages);
1324
1325 let (request_messages, previous_response_id) = prepare_responses_request_messages(
1326 &mut state.previous_response_chains,
1327 "openai",
1328 false,
1329 "gpt-5.4",
1330 ¤t_messages,
1331 );
1332
1333 assert_eq!(previous_response_id.as_deref(), Some("resp_123"));
1334 assert_eq!(
1335 request_messages.as_ref(),
1336 [Message::user("continue".to_string())].as_slice()
1337 );
1338 }
1339
1340 #[test]
1341 fn gemini_prepare_responses_request_messages_keeps_full_history() {
1342 let mut state = AgentSessionState::new("session".to_string(), 4, 4, 16_000);
1343 let prior_messages = vec![Message::user("hello".to_string())];
1344 let current_messages = vec![
1345 Message::user("hello".to_string()),
1346 Message::user("continue".to_string()),
1347 ];
1348 state.set_previous_response_chain(
1349 "gemini",
1350 "gemini-2.5-pro",
1351 Some("resp_123"),
1352 prior_messages,
1353 );
1354
1355 let (request_messages, previous_response_id) = prepare_responses_request_messages(
1356 &mut state.previous_response_chains,
1357 "gemini",
1358 false,
1359 "gemini-2.5-pro",
1360 ¤t_messages,
1361 );
1362
1363 assert_eq!(previous_response_id.as_deref(), Some("resp_123"));
1364 assert_eq!(request_messages.as_ref(), current_messages.as_slice());
1365 }
1366
1367 #[test]
1368 fn compatible_prepare_responses_request_messages_uses_incremental_suffix() {
1369 let mut state = AgentSessionState::new("session".to_string(), 4, 4, 16_000);
1370 let prior_messages = vec![Message::user("hello".to_string())];
1371 let current_messages = vec![
1372 Message::user("hello".to_string()),
1373 Message::user("continue".to_string()),
1374 ];
1375 state.set_previous_response_chain("mycorp", "gpt-5.4", Some("resp_123"), prior_messages);
1376
1377 let (request_messages, previous_response_id) = prepare_responses_request_messages(
1378 &mut state.previous_response_chains,
1379 "mycorp",
1380 true,
1381 "gpt-5.4",
1382 ¤t_messages,
1383 );
1384
1385 assert_eq!(previous_response_id.as_deref(), Some("resp_123"));
1386 assert_eq!(
1387 request_messages.as_ref(),
1388 [Message::user("continue".to_string())].as_slice()
1389 );
1390 }
1391}