1use super::AgentRunner;
2use super::continuation::{
3 CompletionAssessment, ContinuationController, VerificationResult, is_review_like_task,
4};
5use super::helpers::detect_textual_exec_tool_call;
6use super::orchestration::EvaluatorGateOutcome;
7use super::prompt_alignment;
8use crate::config::build_openai_prompt_cache_key;
9use crate::config::constants::tools;
10use crate::config::models::{ModelId, Provider as ModelProvider};
11use crate::config::tool_loop_limit_reached;
12use crate::config::types::{ReasoningEffortLevel, SystemPromptMode, VerbosityLevel};
13use crate::core::agent::blocked_handoff::write_blocked_handoff;
14use crate::core::agent::completion::{check_completion_candidate, check_for_response_loop};
15use crate::core::agent::conversation::{
16 build_conversation, build_messages_from_conversation, conversation_from_messages,
17};
18use crate::core::agent::events::ExecEventRecorder;
19use crate::core::agent::harness_artifacts::existing_harness_artifact_paths;
20use crate::core::agent::harness_kernel::{
21 HarnessRequestPlanInput, SessionToolCatalogSnapshot, build_harness_request_plan,
22};
23use crate::core::agent::runtime::{AgentRuntime, RuntimeControl};
24use crate::core::agent::session::AgentSessionState;
25use crate::core::agent::task::{ContextItem, Task, TaskOutcome, TaskResults};
26use crate::exec::events::HarnessEventKind;
27use crate::llm::model_resolver::ModelResolver;
28use crate::llm::provider::{
29 FinishReason, Message, ResponsesContinuationState, ToolCall, ToolDefinition,
30 prepare_responses_continuation_request, responses_continuation_key,
31 supports_responses_chaining,
32};
33use crate::llm::providers::gemini::wire::Part;
34use crate::prompts::{
35 PromptContext, RuntimePromptContract, append_runtime_mode_sections,
36 append_runtime_tool_prompt_sections, upsert_harness_limits_section,
37};
38use crate::utils::colors::style;
39use anyhow::Result;
40use serde_json::json;
41use std::sync::Arc;
42use tracing::{debug, warn};
43
44fn record_terminal_turn_event(
45 event_recorder: &mut ExecEventRecorder,
46 outcome: &TaskOutcome,
47 usage: vtcode_exec_events::Usage,
48) {
49 if outcome.is_success() {
50 event_recorder.record_thread_event(vtcode_exec_events::ThreadEvent::TurnCompleted(
51 vtcode_exec_events::TurnCompletedEvent { usage },
52 ));
53 } else {
54 event_recorder.record_thread_event(vtcode_exec_events::ThreadEvent::TurnFailed(
55 vtcode_exec_events::TurnFailedEvent {
56 message: outcome.description(),
57 usage: Some(usage),
58 },
59 ));
60 }
61}
62
63fn emit_blocked_handoff_events(
64 event_recorder: &mut ExecEventRecorder,
65 current_path: &std::path::Path,
66 archive_path: &std::path::Path,
67) {
68 for path in [current_path, archive_path] {
69 event_recorder.harness_event(
70 HarnessEventKind::BlockedHandoffWritten,
71 Some("Blocked handoff written".to_string()),
72 None,
73 Some(path.display().to_string()),
74 None,
75 );
76 }
77}
78
79fn summarize_verification_output(result: &serde_json::Value) -> String {
80 result
81 .get("output")
82 .and_then(serde_json::Value::as_str)
83 .or_else(|| result.get("stderr").and_then(serde_json::Value::as_str))
84 .or_else(|| result.get("stdout").and_then(serde_json::Value::as_str))
85 .map(str::trim)
86 .filter(|text| !text.is_empty())
87 .map(|text| {
88 let truncated = text
89 .lines()
90 .take(20)
91 .collect::<Vec<_>>()
92 .join("\n")
93 .trim()
94 .to_string();
95 if truncated.len() < text.len() {
96 format!("{truncated}\n...")
97 } else {
98 truncated
99 }
100 })
101 .unwrap_or_default()
102}
103
104fn prepare_responses_request_messages<'a>(
105 previous_chains: &mut hashbrown::HashMap<(String, String), ResponsesContinuationState>,
106 provider_name: &str,
107 provider_supports_responses_compaction: bool,
108 model: &str,
109 messages: &'a [Message],
110) -> (std::borrow::Cow<'a, [Message]>, Option<String>) {
111 let key = responses_continuation_key(provider_name, model);
112 let continuation = key.as_ref().and_then(|k| previous_chains.get(k));
113 let prepared = prepare_responses_continuation_request(
114 provider_name,
115 provider_supports_responses_compaction,
116 messages,
117 continuation,
118 );
119 if prepared.clear_stale_chain
120 && let Some(key) = key
121 {
122 previous_chains.remove(&key);
123 }
124
125 (prepared.messages, prepared.previous_response_id)
126}
127
128fn stop_reason_from_finish_reason(finish_reason: &FinishReason) -> String {
129 match finish_reason {
130 FinishReason::Stop => "end_turn".to_string(),
131 FinishReason::Length => "max_tokens".to_string(),
132 FinishReason::ToolCalls => "tool_calls".to_string(),
133 FinishReason::ContentFilter => "content_filter".to_string(),
134 FinishReason::Pause => "pause_turn".to_string(),
135 FinishReason::Refusal => "refusal".to_string(),
136 FinishReason::Error(message) => message.clone(),
137 }
138}
139
140fn estimate_session_cost_usd(
141 provider: &str,
142 model: &str,
143 usage: &vtcode_exec_events::Usage,
144) -> Option<f64> {
145 let usage = crate::llm::provider::Usage {
146 prompt_tokens: u32::try_from(usage.input_tokens).unwrap_or(u32::MAX),
147 completion_tokens: u32::try_from(usage.output_tokens).unwrap_or(u32::MAX),
148 total_tokens: u32::try_from(usage.input_tokens.saturating_add(usage.output_tokens))
149 .unwrap_or(u32::MAX),
150 cached_prompt_tokens: Some(u32::try_from(usage.cached_input_tokens).unwrap_or(u32::MAX)),
151 cache_creation_tokens: Some(u32::try_from(usage.cache_creation_tokens).unwrap_or(u32::MAX)),
152 cache_read_tokens: Some(u32::try_from(usage.cached_input_tokens).unwrap_or(u32::MAX)),
153 };
154 let resolved = ModelResolver::resolve(Some(provider), model, &[], None)?;
155 let pricing = resolved.pricing()?;
156 ModelResolver::estimate_cost(pricing, &usage)
157}
158
159struct RuntimePromptBundle {
160 system_instruction: Arc<String>,
161 tool_snapshot: SessionToolCatalogSnapshot,
162 request_tools: Option<Arc<Vec<ToolDefinition>>>,
163}
164
165impl AgentRunner {
166 async fn compose_task_system_prompt(
167 &self,
168 prompt_tools: Arc<Vec<ToolDefinition>>,
169 is_simple_task: bool,
170 ) -> Result<String> {
171 if !is_simple_task {
172 return Ok(self.system_prompt.clone());
173 }
174
175 let mut config = self.config().clone();
176 config.agent.system_prompt_mode = SystemPromptMode::Minimal;
177 let mut prompt_context = PromptContext::from_workspace_tools(
178 self._workspace.as_path(),
179 prompt_tools
180 .iter()
181 .map(|tool| tool.function_name().to_string()),
182 );
183 prompt_context.load_available_skills();
184
185 let prompt = super::helpers::compose_system_prompt_with_appendix(
186 self._workspace.as_path(),
187 &config,
188 &prompt_context,
189 )
190 .await?;
191
192 Ok(prompt)
193 }
194
195 async fn build_runtime_prompt_bundle(
196 &self,
197 is_simple_task: bool,
198 ) -> Result<RuntimePromptBundle> {
199 let tool_snapshot = self.build_universal_tool_snapshot().await?;
200 let request_tools = tool_snapshot.snapshot.clone();
201 let prompt_tools = request_tools
202 .clone()
203 .unwrap_or_else(|| Arc::new(Vec::new()));
204 let mut system_prompt = self
205 .compose_task_system_prompt(prompt_tools, is_simple_task)
206 .await?;
207
208 let plan_mode = self.tool_registry.is_plan_mode();
209 let request_user_input_enabled =
210 self.features().request_user_input_enabled(plan_mode, false);
211 let full_auto_active = self
212 .tool_registry
213 .current_full_auto_allowlist()
214 .await
215 .is_some();
216
217 append_runtime_mode_sections(
218 &mut system_prompt,
219 RuntimePromptContract {
220 full_auto: full_auto_active,
221 plan_mode,
222 request_user_input_enabled,
223 },
224 );
225 upsert_harness_limits_section(
226 &mut system_prompt,
227 self.config().agent.harness.max_tool_calls_per_turn,
228 self.config().agent.harness.max_tool_wall_clock_secs,
229 self.config().agent.harness.max_tool_retries,
230 );
231 append_runtime_tool_prompt_sections(&mut system_prompt, &tool_snapshot, true);
232
233 Ok(RuntimePromptBundle {
234 system_instruction: Arc::new(system_prompt),
235 tool_snapshot,
236 request_tools,
237 })
238 }
239
240 async fn build_validated_runtime_prompt_bundle(
241 &self,
242 is_simple_task: bool,
243 ) -> Result<RuntimePromptBundle> {
244 let mut runner = self;
245 let bundle = runner.build_runtime_prompt_bundle(is_simple_task).await?;
246 prompt_alignment::rebuild_once_on_alignment_mismatch(
247 &mut runner,
248 bundle,
249 |runner| Box::pin((*runner).build_runtime_prompt_bundle(is_simple_task)),
250 |runner, bundle| {
251 let plan_mode = runner.tool_registry.is_plan_mode();
252 let request_user_input_enabled = runner
253 .features()
254 .request_user_input_enabled(plan_mode, false);
255 prompt_alignment::validate_prompt_catalog_alignment(
256 &bundle.system_instruction,
257 &bundle.tool_snapshot,
258 plan_mode,
259 request_user_input_enabled,
260 )
261 },
262 "prompt/catalog alignment mismatch; rebuilding runtime prompt bundle",
263 "prompt/catalog alignment mismatch persisted after rebuild",
264 )
265 .await
266 }
267
268 async fn refresh_runtime_prompt_bundle_if_catalog_changed(
269 &self,
270 bundle: &mut RuntimePromptBundle,
271 is_simple_task: bool,
272 ) -> Result<bool> {
273 let current_version = self.tool_registry.tool_catalog_state().current_version();
274 if current_version == bundle.tool_snapshot.version {
275 return Ok(false);
276 }
277
278 debug!(
279 old_version = bundle.tool_snapshot.version,
280 new_version = current_version,
281 "Tool catalog changed mid-task; refreshing runtime prompt bundle"
282 );
283 *bundle = self
284 .build_validated_runtime_prompt_bundle(is_simple_task)
285 .await?;
286 Ok(true)
287 }
288
289 async fn resolve_completion_acceptance(
290 &mut self,
291 effective_task: &Task,
292 session_state: &mut AgentSessionState,
293 event_recorder: &mut ExecEventRecorder,
294 orchestration_enabled: bool,
295 verification_results: &[VerificationResult],
296 revision_rounds_used: &mut usize,
297 max_revision_rounds: usize,
298 should_write_blocked_handoff: &mut bool,
299 ) -> Result<bool> {
300 if !orchestration_enabled {
301 session_state.is_completed = true;
302 session_state.outcome = TaskOutcome::Success;
303 return Ok(true);
304 }
305
306 match self
307 .apply_evaluator_gate(
308 effective_task,
309 session_state,
310 event_recorder,
311 verification_results,
312 revision_rounds_used,
313 max_revision_rounds,
314 )
315 .await?
316 {
317 EvaluatorGateOutcome::Accept => {
318 session_state.is_completed = true;
319 session_state.outcome = TaskOutcome::Success;
320 Ok(true)
321 }
322 EvaluatorGateOutcome::Continue { prompt } => {
323 session_state.add_user_message(prompt);
324 Ok(false)
325 }
326 EvaluatorGateOutcome::Exhausted { reason } => {
327 session_state.outcome = TaskOutcome::Failed { reason };
328 *should_write_blocked_handoff = true;
329 Ok(true)
330 }
331 }
332 }
333
334 async fn run_verification_commands(
335 &self,
336 commands: &[String],
337 event_recorder: &mut ExecEventRecorder,
338 ) -> Result<Vec<VerificationResult>> {
339 let mut results = Vec::with_capacity(commands.len());
340 for command in commands {
341 let command_event = event_recorder.command_started(command);
342 let payload = json!({
343 "action": "run",
344 "command": command,
345 "workdir": self._workspace.display().to_string(),
346 "yield_time_ms": 1000,
347 });
348 let result = self
349 .tool_registry
350 .execute_harness_unified_exec(payload)
351 .await?;
352 let exit_code = result
353 .get("exit_code")
354 .and_then(serde_json::Value::as_i64)
355 .map(|value| value as i32);
356 let success = exit_code.unwrap_or(0) == 0;
357 let output = summarize_verification_output(&result);
358 event_recorder.command_finished(
359 &command_event,
360 if success {
361 crate::exec::events::CommandExecutionStatus::Completed
362 } else {
363 crate::exec::events::CommandExecutionStatus::Failed
364 },
365 exit_code,
366 &output,
367 );
368 results.push(VerificationResult {
369 command: command.clone(),
370 success,
371 exit_code,
372 output,
373 });
374 if !success {
375 break;
376 }
377 }
378 Ok(results)
379 }
380
381 pub async fn execute_task(
383 &mut self,
384 task: &Task,
385 contexts: &[ContextItem],
386 ) -> Result<TaskResults> {
387 self.tool_registry
389 .set_harness_session(self.session_id.clone());
390 self.tool_registry.set_harness_task(Some(task.id.clone()));
391
392 if let Err(err) = self.tool_registry.initialize_async().await {
394 warn!(
395 error = %err,
396 "Tool registry initialization failed at task start"
397 );
398 }
399
400 let steering_receiver = self.steering_receiver.lock().take();
401 let runtime_setup = {
402 let agent_prefix = format!("[{}]", self.agent_type);
404 let mut event_recorder = ExecEventRecorder::new(
406 self.session_id.clone(),
407 self.event_sink.clone(),
408 Some(self.thread_handle.clone()),
409 );
410 event_recorder.turn_started();
411 self.runner_println(format_args!(
412 "{agent_prefix} Analyzing request and planning approach..."
413 ));
414
415 self.runner_println(format_args!(
416 "{} Executing {} task: {}",
417 style("[AGENT]").magenta().bold().on_black(),
418 self.agent_type,
419 task.title
420 ));
421
422 let run_started_at = std::time::Instant::now();
423 let is_simple_task = Self::is_simple_task(task, contexts);
424 let prompt_bundle = self
425 .build_validated_runtime_prompt_bundle(is_simple_task)
426 .await?;
427
428 let review_like = is_review_like_task(task);
429 let full_auto_active = self
430 .tool_registry
431 .current_full_auto_allowlist()
432 .await
433 .is_some();
434
435 let mut conversation = conversation_from_messages(&self.bootstrap_messages);
436 conversation.extend(build_conversation(task, contexts));
437
438 let conversation_messages = build_messages_from_conversation(&conversation);
441
442 let max_tool_loops = self.config().tools.max_tool_loops;
445 let preserve_recent_turns = self.config().context.preserve_recent_turns;
446 let max_context_tokens = self.config().context.max_context_tokens;
447
448 let mut session_state = AgentSessionState::new(
449 self.session_id.clone(),
450 self.max_turns,
451 max_tool_loops,
452 max_context_tokens,
453 );
454 session_state.conversation = conversation;
455 session_state.messages = conversation_messages;
456 session_state.last_processed_message_idx = session_state.conversation.len();
457
458 let mut runtime = AgentRuntime::new(session_state, None, steering_receiver);
459
460 if let Err(err) = self.tool_registry.initialize_async().await {
461 warn!(
462 error = %err,
463 "Tool registry initialization failed at task start"
464 );
465 runtime
466 .state
467 .warnings
468 .push(format!("Tool registry init failed: {err}"));
469 }
470 let orchestration_enabled =
471 self.harness_plan_build_evaluate_enabled(full_auto_active, review_like);
472 let planner_artifacts = if orchestration_enabled {
473 Some(self.run_planner_phase(task, &mut event_recorder).await?)
474 } else {
475 None
476 };
477 let effective_task = planner_artifacts
478 .as_ref()
479 .map(|artifacts| self.augment_generator_task(task, artifacts))
480 .unwrap_or_else(|| task.clone());
481
482 let mut continuation_controller = ContinuationController::new(
483 self._workspace.clone(),
484 self.tool_registry.plan_mode_state(),
485 self.config().agent.harness.continuation_policy.clone(),
486 full_auto_active,
487 self.tool_registry.is_plan_mode(),
488 review_like,
489 );
490 continuation_controller.prepare(&effective_task).await?;
491
492 (
493 agent_prefix,
494 event_recorder,
495 run_started_at,
496 is_simple_task,
497 prompt_bundle,
498 preserve_recent_turns,
499 max_tool_loops,
500 max_context_tokens,
501 runtime,
502 continuation_controller,
503 effective_task,
504 orchestration_enabled,
505 )
506 };
507
508 let (
509 agent_prefix,
510 mut event_recorder,
511 run_started_at,
512 is_simple_task,
513 mut prompt_bundle,
514 preserve_recent_turns,
515 max_tool_loops,
516 max_context_tokens,
517 mut runtime,
518 mut continuation_controller,
519 effective_task,
520 orchestration_enabled,
521 ) = runtime_setup;
522 let mut cost_warning_emitted = false;
523 let max_budget_usd = self.config().agent.harness.max_budget_usd;
524 let max_revision_rounds = self.config().agent.harness.max_revision_rounds.max(1);
525 let mut revision_rounds_used = 0usize;
526 let mut should_write_blocked_handoff = false;
527
528 let result = {
529 for turn in 0..self.max_turns {
530 if matches!(
531 runtime.poll_turn_control().await,
532 RuntimeControl::StopRequested
533 ) {
534 self.runner_println(format_args!(
535 "{} {}",
536 agent_prefix,
537 style("Stopped by steering signal.").red().bold()
538 ));
539 runtime.state.outcome = TaskOutcome::Cancelled;
540 break;
541 }
542
543 if let Some(input) = runtime.run_until_idle() {
544 self.runner_println(format_args!(
545 "{} {}: {}",
546 agent_prefix,
547 style("Follow-up Input").cyan().bold(),
548 input
549 ));
550 }
551
552 let utilization = runtime.state.utilization();
553 if utilization > 0.90 {
554 warn!("Context at {:.1}% - approaching limit", utilization * 100.0);
555 runtime.state.warnings.push(format!(
556 "Token budget at {}% - approaching context limit",
557 (utilization * 100.0) as u32
558 ));
559 }
560
561 if runtime.state.is_completed {
562 runtime.state.outcome = TaskOutcome::Success;
563 break;
564 }
565
566 self.runner_println(format_args!(
567 "{} {} is processing turn {}...",
568 agent_prefix,
569 style("(PROC)").cyan().bold(),
570 turn + 1
571 ));
572
573 let turn_model = self.get_selected_model();
574 let provider_name = self.provider_client.name().to_string();
575 if std::env::var_os("VTCODE_DEBUG_PROVIDER").is_some() {
576 tracing::debug!(
577 provider_client = self.provider_client.name(),
578 turn_model,
579 "Provider debug turn selection"
580 );
581 }
582 let turn_reasoning = if is_simple_task {
583 Some(ReasoningEffortLevel::Minimal)
584 } else {
585 self.reasoning_effort
586 };
587 let turn_verbosity = if is_simple_task {
588 Some(VerbosityLevel::Low)
589 } else {
590 self.verbosity
591 };
592 let max_tokens = if is_simple_task {
593 Some(800)
594 } else {
595 Some(2000)
596 };
597
598 self.summarize_conversation_if_needed(
599 &mut runtime.state,
600 preserve_recent_turns,
601 utilization,
602 );
603
604 let parallel_tool_config = if self.model.len() < 20 {
605 None
606 } else if self
607 .provider_client
608 .supports_parallel_tool_config(&turn_model)
609 {
610 Some(Box::new(
611 crate::llm::provider::ParallelToolConfig::anthropic_optimized(),
612 ))
613 } else {
614 None
615 };
616
617 let provider_kind = turn_model
618 .parse::<ModelId>()
619 .map(|model| model.provider())
620 .unwrap_or(ModelProvider::Gemini);
621
622 if matches!(provider_kind, ModelProvider::Gemini)
623 && runtime.state.conversation.len() > runtime.state.last_processed_message_idx
624 {
625 for content in
626 &runtime.state.conversation[runtime.state.last_processed_message_idx..]
627 {
628 let mut text = String::new();
629 for part in &content.parts {
630 if let Part::Text {
631 text: part_text, ..
632 } = part
633 {
634 if !text.is_empty() {
635 text.push('\n');
636 }
637 text.push_str(part_text);
638 }
639 }
640 let message = match content.role.as_str() {
641 "model" => Message::assistant(text),
642 _ => Message::user(text),
643 };
644 runtime.state.messages.push(message);
645 }
646 runtime.state.last_processed_message_idx = runtime.state.conversation.len();
647 }
648
649 let reasoning_effort =
650 if self.provider_client.supports_reasoning_effort(&turn_model) {
651 turn_reasoning
652 } else {
653 None
654 };
655 let temperature = if reasoning_effort.is_some()
656 && matches!(
657 provider_kind,
658 ModelProvider::Anthropic | ModelProvider::Minimax
659 ) {
660 None
661 } else {
662 Some(0.7)
663 };
664
665 let (request_messages, previous_response_id) = prepare_responses_request_messages(
666 &mut runtime.state.previous_response_chains,
667 &provider_name,
668 self.provider_client
669 .supports_responses_compaction(&turn_model),
670 &turn_model,
671 &runtime.state.messages,
672 );
673
674 let request = build_harness_request_plan(HarnessRequestPlanInput {
675 messages: request_messages.into_owned(),
676 system_prompt: prompt_bundle.system_instruction.as_ref().clone(),
677 tools: prompt_bundle.request_tools.clone(),
678 model: turn_model.clone(),
679 max_tokens,
680 temperature,
681 stream: self.provider_client.supports_streaming(),
682 tool_choice: None,
683 parallel_tool_config,
684 reasoning_effort,
685 verbosity: turn_verbosity,
686 metadata: None,
687 context_management: None,
688 previous_response_id,
689 prompt_cache_key: build_openai_prompt_cache_key(
690 provider_name.eq_ignore_ascii_case("openai")
691 && self.config().prompt_cache.enabled
692 && self.config().prompt_cache.providers.openai.enabled,
693 &self
694 .config()
695 .prompt_cache
696 .providers
697 .openai
698 .prompt_cache_key_mode,
699 Some(&self.session_id),
700 ),
701 prompt_cache_profile: None,
702 tool_catalog_hash: prompt_bundle.tool_snapshot.tool_catalog_hash,
703 })
704 .request;
705 self.validate_llm_request(&request)?;
709 let previous_response_chain_present = request.previous_response_id.is_some();
710 let sent_messages = request.messages.clone();
714 let streaming_timeout = self
717 .config()
718 .timeouts
719 .ceiling_duration(self.config().timeouts.streaming_ceiling_seconds);
720
721 let turn_output = runtime
722 .run_turn_once(&mut self.provider_client, request, streaming_timeout)
723 .await?;
724 event_recorder.record_thread_events(runtime.take_emitted_events());
725 let response = turn_output.response;
726 runtime.state.stop_reason =
727 Some(stop_reason_from_finish_reason(&response.finish_reason));
728 if supports_responses_chaining(
729 &provider_name,
730 self.provider_client
731 .supports_responses_compaction(&turn_model),
732 ) {
733 runtime.state.set_previous_response_chain(
734 &provider_name,
735 &turn_model,
736 response.request_id.as_deref(),
737 sent_messages,
738 );
739 }
740 match estimate_session_cost_usd(
741 self.config().agent.provider.as_str(),
742 &turn_model,
743 &runtime.state.stats.total_usage,
744 ) {
745 Some(total_cost_usd) => {
746 runtime.state.total_cost_usd = Some(total_cost_usd);
747 if let Some(max_budget_usd) = max_budget_usd
748 && total_cost_usd > max_budget_usd
749 {
750 runtime.state.outcome =
751 TaskOutcome::budget_limit_reached(max_budget_usd, total_cost_usd);
752 break;
753 }
754 }
755 None => {
756 runtime.state.total_cost_usd = None;
757 if max_budget_usd.is_some() && !cost_warning_emitted {
758 cost_warning_emitted = true;
759 let warning_message = format!(
760 "Budget enforcement disabled for model `{turn_model}` because pricing metadata is unavailable"
761 );
762 warn!(
763 provider = %self.config().agent.provider,
764 model = %turn_model,
765 "Budget enforcement disabled because pricing metadata is unavailable"
766 );
767 runtime.state.warnings.push(warning_message);
768 }
769 }
770 }
771 self.runner_println(format_args!(
772 "{} {} {} received response, processing...",
773 agent_prefix,
774 self.agent_type,
775 style("(RECV)").green().bold()
776 ));
777
778 self.warn_on_empty_response(
779 &agent_prefix,
780 response.content.as_deref().unwrap_or(""),
781 response
782 .tool_calls
783 .as_ref()
784 .is_some_and(|tool_calls| !tool_calls.is_empty()),
785 );
786
787 let response_text = response.content_string();
788 if !response_text.trim().is_empty() {
789 self.emit_final_assistant_message(&self.agent_type, &response_text);
790 }
791
792 let mut effective_tool_calls = response.tool_calls.clone();
793 let mut forced_continuation = false;
794
795 if effective_tool_calls.is_none()
796 && response.content_text().len() < 150
797 && let Some(args_value) = detect_textual_exec_tool_call(response.content_text())
798 {
799 effective_tool_calls = Some(vec![ToolCall::function(
800 format!("call_text_{}", turn),
801 tools::UNIFIED_EXEC.to_string(),
802 args_value.to_string(),
803 )]);
804 }
805
806 let is_gemini = matches!(provider_kind, ModelProvider::Gemini);
807
808 if !runtime.state.is_completed
809 && effective_tool_calls
810 .as_ref()
811 .is_none_or(|tool_calls| tool_calls.is_empty())
812 && !response.content_text().is_empty()
813 {
814 if check_for_response_loop(response.content_text(), &mut runtime.state) {
815 self.runner_println(format_args!(
816 "[{}] {}",
817 self.agent_type,
818 style(
819 "Repetitive assistant response detected. Breaking potential loop."
820 )
821 .red()
822 .bold()
823 ));
824 runtime.state.outcome = TaskOutcome::LoopDetected;
825 break;
826 }
827
828 if check_completion_candidate(response.content_text()) {
829 self.runner_println(format_args!(
830 "[{}] {}",
831 self.agent_type,
832 style("Completion candidate detected; checking tracker and verification state.")
833 .green()
834 .bold()
835 ));
836 match continuation_controller
837 .assess_completion(&effective_task, &runtime.state)
838 .await?
839 {
840 CompletionAssessment::Accept => {
841 if self
842 .resolve_completion_acceptance(
843 &effective_task,
844 &mut runtime.state,
845 &mut event_recorder,
846 orchestration_enabled,
847 &[],
848 &mut revision_rounds_used,
849 max_revision_rounds,
850 &mut should_write_blocked_handoff,
851 )
852 .await?
853 {
854 break;
855 }
856 forced_continuation = true;
857 }
858 CompletionAssessment::SkipAccept { reason } => {
859 event_recorder.harness_event(
860 HarnessEventKind::ContinuationSkipped,
861 Some(reason),
862 None,
863 None,
864 None,
865 );
866 runtime.state.is_completed = true;
867 runtime.state.outcome = TaskOutcome::Success;
868 break;
869 }
870 CompletionAssessment::Continue { reason, prompt } => {
871 event_recorder.harness_event(
872 HarnessEventKind::ContinuationStarted,
873 Some(reason),
874 None,
875 None,
876 None,
877 );
878 runtime.state.add_user_message(prompt);
879 forced_continuation = true;
880 }
881 CompletionAssessment::Verify { commands } => {
882 event_recorder.harness_event(
883 HarnessEventKind::VerificationStarted,
884 Some(format!("Running verification: {}", commands.join(", "))),
885 commands.first().cloned(),
886 None,
887 None,
888 );
889 let verification_results = self
890 .run_verification_commands(&commands, &mut event_recorder)
891 .await?;
892 if let Some(failure) =
893 verification_results.iter().find(|result| !result.success)
894 {
895 event_recorder.harness_event(
896 HarnessEventKind::VerificationFailed,
897 Some(format!(
898 "{}{}",
899 match failure.exit_code {
900 Some(code) => format!(
901 "Verification failed: {} (exit code {}).",
902 failure.command, code
903 ),
904 None => format!(
905 "Verification failed: {}.",
906 failure.command
907 ),
908 },
909 if failure.output.trim().is_empty() {
910 String::new()
911 } else {
912 format!("\n{}", failure.output.trim())
913 }
914 )),
915 Some(failure.command.clone()),
916 None,
917 failure.exit_code,
918 );
919 } else {
920 event_recorder.harness_event(
921 HarnessEventKind::VerificationPassed,
922 Some(format!(
923 "Verification passed: {}",
924 commands.join(", ")
925 )),
926 commands.last().cloned(),
927 None,
928 Some(0),
929 );
930 }
931
932 match continuation_controller
933 .after_verification(&verification_results)
934 .await?
935 {
936 CompletionAssessment::Accept
937 | CompletionAssessment::SkipAccept { .. } => {
938 if self
939 .resolve_completion_acceptance(
940 &effective_task,
941 &mut runtime.state,
942 &mut event_recorder,
943 orchestration_enabled,
944 &verification_results,
945 &mut revision_rounds_used,
946 max_revision_rounds,
947 &mut should_write_blocked_handoff,
948 )
949 .await?
950 {
951 break;
952 }
953 forced_continuation = true;
954 }
955 CompletionAssessment::Continue { reason, prompt } => {
956 event_recorder.harness_event(
957 HarnessEventKind::ContinuationStarted,
958 Some(reason),
959 None,
960 None,
961 None,
962 );
963 runtime.state.add_user_message(prompt);
964 forced_continuation = true;
965 }
966 CompletionAssessment::Verify { .. } => {}
967 }
968 }
969 }
970 }
971 }
972
973 if let Some(tool_calls) = effective_tool_calls
974 .as_ref()
975 .filter(|tool_calls| !tool_calls.is_empty())
976 .cloned()
977 {
978 self.execute_tool_call_batches(
979 tool_calls,
980 &mut runtime,
981 &mut event_recorder,
982 &agent_prefix,
983 is_gemini,
984 previous_response_chain_present,
985 )
986 .await?;
987 event_recorder.record_thread_events(runtime.take_emitted_events());
988 }
989
990 let _ = self
993 .refresh_runtime_prompt_bundle_if_catalog_changed(
994 &mut prompt_bundle,
995 is_simple_task,
996 )
997 .await?;
998
999 let had_effective_shell_tool_call =
1000 effective_tool_calls.as_ref().is_some_and(|calls| {
1001 calls.iter().any(|call| {
1002 call.function
1003 .as_ref()
1004 .map(|function| function.name.as_str())
1005 == Some(tools::UNIFIED_EXEC)
1006 })
1007 });
1008 let had_tool_call = response
1009 .tool_calls
1010 .as_ref()
1011 .is_some_and(|tool_calls| !tool_calls.is_empty())
1012 || had_effective_shell_tool_call;
1013
1014 if had_tool_call {
1015 let loops = runtime.state.register_tool_loop();
1016 if tool_loop_limit_reached(loops, runtime.state.constraints.max_tool_loops) {
1017 let warning_message = format!(
1018 "Reached tool-call limit of {} iterations; pausing autonomous loop",
1019 runtime.state.constraints.max_tool_loops
1020 );
1021 self.record_warning(
1022 &agent_prefix,
1023 &mut runtime.state,
1024 &mut event_recorder,
1025 warning_message,
1026 );
1027 runtime.state.mark_tool_loop_limit_hit();
1028 break;
1029 }
1030 runtime.state.consecutive_idle_turns = 0;
1031 } else {
1032 runtime.state.reset_tool_loop_guard();
1033 if forced_continuation {
1034 runtime.state.consecutive_idle_turns = 0;
1035 } else if !runtime.state.is_completed {
1036 runtime.state.consecutive_idle_turns =
1037 runtime.state.consecutive_idle_turns.saturating_add(1);
1038 let idle_turn_limit = self.config().agent.idle_turn_limit;
1039 if runtime.state.consecutive_idle_turns >= idle_turn_limit {
1040 let warning_message = format!(
1041 "No tool calls or completion for {} consecutive turns; halting to avoid idle loop",
1042 runtime.state.consecutive_idle_turns
1043 );
1044 self.record_warning(
1045 &agent_prefix,
1046 &mut runtime.state,
1047 &mut event_recorder,
1048 warning_message,
1049 );
1050 runtime.state.outcome = TaskOutcome::StoppedNoAction;
1051 break;
1052 }
1053 }
1054 }
1055
1056 let should_continue = forced_continuation
1057 || had_tool_call
1058 || runtime.has_pending_follow_up_inputs()
1059 || (!runtime.state.is_completed && (turn + 1) < self.max_turns);
1060
1061 if !should_continue {
1062 if runtime.state.is_completed {
1063 runtime.state.outcome = TaskOutcome::Success;
1064 } else if (turn + 1) >= self.max_turns {
1065 runtime.state.outcome =
1066 TaskOutcome::turn_limit_reached(self.max_turns, turn + 1);
1067 } else {
1068 runtime.state.outcome = TaskOutcome::StoppedNoAction;
1069 }
1070 break;
1071 }
1072 }
1073
1074 runtime.state.finalize_outcome(self.max_turns);
1075
1076 let total_duration_ms = run_started_at.elapsed().as_millis();
1077
1078 self.runner_println(format_args!("{} Done", agent_prefix));
1080
1081 let average_turn_duration_ms = if runtime.state.turn_count > 0 {
1083 Some(runtime.state.turn_total_ms as f64 / runtime.state.turn_count as f64)
1084 } else {
1085 None
1086 };
1087
1088 let max_turn_duration_ms = if runtime.state.turn_count > 0 {
1089 Some(runtime.state.turn_max_ms)
1090 } else {
1091 None
1092 };
1093
1094 let outcome = runtime.state.outcome.clone();
1095 self.thread_handle
1096 .replace_messages(runtime.state.messages.clone());
1097 let summary = self.generate_task_summary(
1098 &effective_task,
1099 &runtime.state.modified_files,
1100 &runtime.state.executed_commands,
1101 &runtime.state.warnings,
1102 &runtime.state.messages,
1103 runtime.state.stats.turns_executed,
1104 runtime.state.max_tool_loop_streak,
1105 max_tool_loops,
1106 outcome,
1107 total_duration_ms,
1108 average_turn_duration_ms,
1109 max_turn_duration_ms,
1110 );
1111
1112 if !summary.trim().is_empty() {
1113 event_recorder.agent_message(&summary);
1115 self.runner_println(format_args!(
1117 "\n{} Agent Task Summary\n{}",
1118 style("[Task]").cyan().bold(),
1119 summary
1120 ));
1121 }
1122
1123 let runtime_agent_config = self.core_agent_config();
1124 if let Err(err) = crate::persistent_memory::finalize_persistent_memory(
1125 &runtime_agent_config,
1126 Some(self.config()),
1127 &runtime.state.messages,
1128 )
1129 .await
1130 {
1131 warn!(
1132 error = %err,
1133 session_id = %self.session_id,
1134 "Failed to update persistent memory"
1135 );
1136 }
1137
1138 if runtime.state.outcome.is_hard_block() || should_write_blocked_handoff {
1139 let relevant_paths = existing_harness_artifact_paths(&self._workspace);
1140 match write_blocked_handoff(
1141 &self._workspace,
1142 &self.session_id,
1143 runtime.state.outcome.code(),
1144 &runtime.state.outcome.description(),
1145 &relevant_paths,
1146 ) {
1147 Ok(artifacts) => emit_blocked_handoff_events(
1148 &mut event_recorder,
1149 &artifacts.current_path,
1150 &artifacts.archive_path,
1151 ),
1152 Err(err) => warn!(
1153 error = %err,
1154 session_id = %self.session_id,
1155 "Failed to persist blocked handoff"
1156 ),
1157 }
1158 }
1159
1160 let total_usage = runtime.state.stats.total_usage.clone();
1161 record_terminal_turn_event(
1162 &mut event_recorder,
1163 &runtime.state.outcome,
1164 total_usage.clone(),
1165 );
1166 event_recorder.thread_completed(
1167 &self.session_id,
1168 runtime.state.outcome.thread_completion_subtype(),
1169 runtime.state.outcome.code(),
1170 runtime
1171 .state
1172 .outcome
1173 .is_success()
1174 .then_some(summary.as_str()),
1175 runtime.state.stop_reason.as_deref(),
1176 total_usage,
1177 runtime
1178 .state
1179 .total_cost_usd
1180 .and_then(serde_json::Number::from_f64),
1181 runtime.state.stats.turns_executed,
1182 );
1183 let thread_events = event_recorder.into_events();
1184 let steering_receiver = runtime.take_steering_receiver();
1185 let state = std::mem::replace(
1186 &mut runtime.state,
1187 AgentSessionState::new(
1188 self.session_id.clone(),
1189 self.max_turns,
1190 max_tool_loops,
1191 max_context_tokens,
1192 ),
1193 );
1194
1195 Ok((
1196 state.into_results(summary, thread_events, total_duration_ms),
1197 steering_receiver,
1198 ))
1199 };
1200
1201 let result = match result {
1202 Ok((task_results, steering_receiver)) => {
1203 *self.steering_receiver.lock() = steering_receiver;
1204 Ok(task_results)
1205 }
1206 Err(err) => {
1207 *self.steering_receiver.lock() = runtime.take_steering_receiver();
1208 Err(err)
1209 }
1210 };
1211
1212 self.tool_registry.set_harness_task(None);
1213 result
1214 }
1215}
1216
1217#[cfg(test)]
1218mod tests {
1219 use super::{
1220 prepare_responses_request_messages, record_terminal_turn_event, tool_loop_limit_reached,
1221 };
1222 use crate::core::agent::events::ExecEventRecorder;
1223 use crate::core::agent::session::AgentSessionState;
1224 use crate::core::agent::task::TaskOutcome;
1225 use crate::exec::events::ThreadEvent;
1226 use crate::llm::provider::Message;
1227
1228 #[test]
1229 fn failed_outcome_emits_only_turn_failed() {
1230 let mut recorder = ExecEventRecorder::new("thread", None, None);
1231 recorder.turn_started();
1232
1233 record_terminal_turn_event(
1234 &mut recorder,
1235 &TaskOutcome::Failed {
1236 reason: "boom".to_string(),
1237 },
1238 Default::default(),
1239 );
1240
1241 let events = recorder.into_events();
1242 assert_eq!(
1243 events
1244 .iter()
1245 .filter(|event| matches!(event, ThreadEvent::TurnFailed(_)))
1246 .count(),
1247 1
1248 );
1249 assert_eq!(
1250 events
1251 .iter()
1252 .filter(|event| matches!(event, ThreadEvent::TurnCompleted(_)))
1253 .count(),
1254 0
1255 );
1256 }
1257
1258 #[test]
1259 fn successful_outcome_emits_only_turn_completed() {
1260 let mut recorder = ExecEventRecorder::new("thread", None, None);
1261 recorder.turn_started();
1262
1263 record_terminal_turn_event(&mut recorder, &TaskOutcome::Success, Default::default());
1264
1265 let events = recorder.into_events();
1266 assert_eq!(
1267 events
1268 .iter()
1269 .filter(|event| matches!(event, ThreadEvent::TurnCompleted(_)))
1270 .count(),
1271 1
1272 );
1273 assert_eq!(
1274 events
1275 .iter()
1276 .filter(|event| matches!(event, ThreadEvent::TurnFailed(_)))
1277 .count(),
1278 0
1279 );
1280 }
1281
1282 #[test]
1283 fn disabled_tool_loop_limit_never_trips() {
1284 assert!(!tool_loop_limit_reached(1, 0));
1285 assert!(!tool_loop_limit_reached(32, 0));
1286 }
1287
1288 #[test]
1289 fn openai_prepare_responses_request_messages_uses_incremental_suffix() {
1290 let mut state = AgentSessionState::new("session".to_string(), 4, 4, 16_000);
1291 let prior_messages = vec![Message::user("hello".to_string())];
1292 let current_messages = vec![
1293 Message::user("hello".to_string()),
1294 Message::user("continue".to_string()),
1295 ];
1296 state.set_previous_response_chain("openai", "gpt-5.4", Some("resp_123"), prior_messages);
1297
1298 let (request_messages, previous_response_id) = prepare_responses_request_messages(
1299 &mut state.previous_response_chains,
1300 "openai",
1301 false,
1302 "gpt-5.4",
1303 ¤t_messages,
1304 );
1305
1306 assert_eq!(previous_response_id.as_deref(), Some("resp_123"));
1307 assert_eq!(
1308 request_messages.as_ref(),
1309 [Message::user("continue".to_string())].as_slice()
1310 );
1311 }
1312
1313 #[test]
1314 fn gemini_prepare_responses_request_messages_keeps_full_history() {
1315 let mut state = AgentSessionState::new("session".to_string(), 4, 4, 16_000);
1316 let prior_messages = vec![Message::user("hello".to_string())];
1317 let current_messages = vec![
1318 Message::user("hello".to_string()),
1319 Message::user("continue".to_string()),
1320 ];
1321 state.set_previous_response_chain(
1322 "gemini",
1323 "gemini-2.5-pro",
1324 Some("resp_123"),
1325 prior_messages,
1326 );
1327
1328 let (request_messages, previous_response_id) = prepare_responses_request_messages(
1329 &mut state.previous_response_chains,
1330 "gemini",
1331 false,
1332 "gemini-2.5-pro",
1333 ¤t_messages,
1334 );
1335
1336 assert_eq!(previous_response_id.as_deref(), Some("resp_123"));
1337 assert_eq!(request_messages.as_ref(), current_messages.as_slice());
1338 }
1339
1340 #[test]
1341 fn compatible_prepare_responses_request_messages_uses_incremental_suffix() {
1342 let mut state = AgentSessionState::new("session".to_string(), 4, 4, 16_000);
1343 let prior_messages = vec![Message::user("hello".to_string())];
1344 let current_messages = vec![
1345 Message::user("hello".to_string()),
1346 Message::user("continue".to_string()),
1347 ];
1348 state.set_previous_response_chain("mycorp", "gpt-5.4", Some("resp_123"), prior_messages);
1349
1350 let (request_messages, previous_response_id) = prepare_responses_request_messages(
1351 &mut state.previous_response_chains,
1352 "mycorp",
1353 true,
1354 "gpt-5.4",
1355 ¤t_messages,
1356 );
1357
1358 assert_eq!(previous_response_id.as_deref(), Some("resp_123"));
1359 assert_eq!(
1360 request_messages.as_ref(),
1361 [Message::user("continue".to_string())].as_slice()
1362 );
1363 }
1364}