Skip to main content

vtcode_core/core/agent/
task.rs

1//! Task-related data structures shared across the agent runner modules.
2
3use crate::exec::events::ThreadCompletionSubtype;
4use crate::exec::events::ThreadEvent;
5use serde::{Deserialize, Serialize};
6use std::fmt;
7
8/// Task specification consumed by the benchmark/autonomous runner.
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct Task {
11    /// Stable identifier for reporting.
12    pub id: String,
13    /// Human-readable task title displayed in progress messages.
14    pub title: String,
15    /// High-level description of the task objective.
16    pub description: String,
17    /// Optional explicit instructions appended to the conversation.
18    #[serde(default, skip_serializing_if = "Option::is_none")]
19    pub instructions: Option<String>,
20}
21
22impl Task {
23    /// Construct a task with the provided metadata.
24    pub fn new(id: String, title: String, description: String) -> Self {
25        Self {
26            id,
27            title,
28            description,
29            instructions: None,
30        }
31    }
32}
33
34/// Context entry supplied alongside the benchmark task.
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct ContextItem {
37    /// Identifier used when referencing the context in prompts.
38    pub id: String,
39    /// Raw textual content exposed to the agent.
40    pub content: String,
41}
42
43#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
44#[serde(rename_all = "snake_case")]
45pub enum TaskOutcome {
46    Success,
47    StoppedNoAction,
48    TurnLimitReached {
49        max_turns: usize,
50        actual_turns: usize,
51    },
52    BudgetLimitReached {
53        max_budget_usd: f64,
54        actual_cost_usd: f64,
55    },
56    ToolLoopLimitReached {
57        max_tool_loops: usize,
58        actual_tool_loops: usize,
59    },
60    LoopDetected,
61    Cancelled,
62    Failed {
63        reason: String,
64    },
65    Unknown,
66}
67
68impl TaskOutcome {
69    pub fn is_success(&self) -> bool {
70        matches!(self, Self::Success | Self::StoppedNoAction)
71    }
72
73    pub fn is_hard_block(&self) -> bool {
74        matches!(self, Self::ToolLoopLimitReached { .. } | Self::LoopDetected)
75    }
76
77    pub fn description(&self) -> String {
78        match self {
79            Self::Success => "Task completed successfully".into(),
80            Self::StoppedNoAction => "Stopped after agent signaled no further actions".into(),
81            Self::TurnLimitReached {
82                max_turns,
83                actual_turns,
84            } => format!(
85                "Stopped after reaching turn limit (max: {}, reached: {})",
86                max_turns, actual_turns
87            ),
88            Self::BudgetLimitReached {
89                max_budget_usd,
90                actual_cost_usd,
91            } => format!(
92                "Stopped after reaching budget limit (max: ${max_budget_usd:.4}, spent: ${actual_cost_usd:.4})"
93            ),
94            Self::ToolLoopLimitReached {
95                max_tool_loops,
96                actual_tool_loops,
97            } => {
98                if *max_tool_loops == 0 {
99                    format!(
100                        "Stopped after a tool-loop safeguard halted execution (reached: {})",
101                        actual_tool_loops
102                    )
103                } else {
104                    format!(
105                        "Stopped after reaching tool loop limit (max: {}, reached: {})",
106                        max_tool_loops, actual_tool_loops
107                    )
108                }
109            }
110            Self::LoopDetected => "Stopped due to infinite loop detection".into(),
111            Self::Cancelled => "Task cancelled by user".into(),
112            Self::Failed { reason } => format!("Task failed: {}", reason),
113            Self::Unknown => "Task outcome could not be determined".into(),
114        }
115    }
116
117    pub fn code(&self) -> &'static str {
118        match self {
119            Self::Success => "success",
120            Self::StoppedNoAction => "stopped_no_action",
121            Self::TurnLimitReached { .. } => "turn_limit_reached",
122            Self::BudgetLimitReached { .. } => "budget_limit_reached",
123            Self::ToolLoopLimitReached { .. } => "tool_loop_limit_reached",
124            Self::LoopDetected => "loop_detected",
125            Self::Cancelled => "cancelled",
126            Self::Failed { .. } => "failed",
127            Self::Unknown => "unknown",
128        }
129    }
130
131    pub fn thread_completion_subtype(&self) -> ThreadCompletionSubtype {
132        match self {
133            Self::Success | Self::StoppedNoAction => ThreadCompletionSubtype::Success,
134            Self::TurnLimitReached { .. } => ThreadCompletionSubtype::ErrorMaxTurns,
135            Self::BudgetLimitReached { .. } => ThreadCompletionSubtype::ErrorMaxBudgetUsd,
136            Self::Cancelled => ThreadCompletionSubtype::Cancelled,
137            Self::ToolLoopLimitReached { .. }
138            | Self::LoopDetected
139            | Self::Failed { .. }
140            | Self::Unknown => ThreadCompletionSubtype::ErrorDuringExecution,
141        }
142    }
143
144    pub fn success() -> Self {
145        Self::Success
146    }
147
148    pub fn turn_limit_reached(max_turns: usize, actual_turns: usize) -> Self {
149        Self::TurnLimitReached {
150            max_turns,
151            actual_turns,
152        }
153    }
154
155    pub fn budget_limit_reached(max_budget_usd: f64, actual_cost_usd: f64) -> Self {
156        Self::BudgetLimitReached {
157            max_budget_usd,
158            actual_cost_usd,
159        }
160    }
161
162    pub fn tool_loop_limit_reached(max_tool_loops: usize, actual_tool_loops: usize) -> Self {
163        Self::ToolLoopLimitReached {
164            max_tool_loops,
165            actual_tool_loops,
166        }
167    }
168}
169
170impl fmt::Display for TaskOutcome {
171    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
172        f.write_str(self.code())
173    }
174}
175
176#[cfg(test)]
177mod tests {
178    use super::{TaskOutcome, ThreadCompletionSubtype};
179
180    #[test]
181    fn tool_loop_limit_description_handles_disabled_limit() {
182        let description = TaskOutcome::tool_loop_limit_reached(0, 4).description();
183
184        assert!(description.contains("tool-loop safeguard halted execution"));
185        assert!(description.contains("reached: 4"));
186    }
187
188    #[test]
189    fn thread_completion_subtype_matches_public_result_states() {
190        assert_eq!(
191            TaskOutcome::Success.thread_completion_subtype(),
192            ThreadCompletionSubtype::Success
193        );
194        assert_eq!(
195            TaskOutcome::StoppedNoAction.thread_completion_subtype(),
196            ThreadCompletionSubtype::Success
197        );
198        assert_eq!(
199            TaskOutcome::turn_limit_reached(3, 3).thread_completion_subtype(),
200            ThreadCompletionSubtype::ErrorMaxTurns
201        );
202        assert_eq!(
203            TaskOutcome::budget_limit_reached(1.0, 1.2).thread_completion_subtype(),
204            ThreadCompletionSubtype::ErrorMaxBudgetUsd
205        );
206        assert_eq!(
207            TaskOutcome::Cancelled.thread_completion_subtype(),
208            ThreadCompletionSubtype::Cancelled
209        );
210        assert_eq!(
211            (TaskOutcome::Failed {
212                reason: "boom".to_string()
213            })
214            .thread_completion_subtype(),
215            ThreadCompletionSubtype::ErrorDuringExecution
216        );
217    }
218}
219
220/// Aggregated results returned by the autonomous agent runner.
221#[derive(Debug, Clone, Serialize, Deserialize)]
222pub struct TaskResults {
223    /// Identifiers of any contexts created during execution.
224    #[serde(default)]
225    pub created_contexts: Vec<String>,
226    /// File paths modified during the task.
227    #[serde(default)]
228    pub modified_files: Vec<String>,
229    /// Terminal commands executed while solving the task.
230    #[serde(default)]
231    pub executed_commands: Vec<String>,
232    /// Natural-language summary of the run assembled by the agent.
233    pub summary: String,
234    /// Provider stop reason associated with the last model turn, when available.
235    #[serde(default, skip_serializing_if = "Option::is_none")]
236    pub stop_reason: Option<String>,
237    /// Estimated total API cost in USD, when pricing metadata is available.
238    #[serde(default, skip_serializing_if = "Option::is_none")]
239    pub total_cost_usd: Option<f64>,
240    /// Collected warnings emitted while processing the task.
241    #[serde(default)]
242    pub warnings: Vec<String>,
243    /// Structured execution timeline for headless modes.
244    #[serde(default)]
245    pub thread_events: Vec<ThreadEvent>,
246    /// Finalized outcome of the task.
247    pub outcome: TaskOutcome,
248    /// Number of autonomous turns executed.
249    pub turns_executed: usize,
250    /// Total runtime in milliseconds.
251    pub total_duration_ms: u128,
252    /// Average turn duration in milliseconds (if turns executed).
253    #[serde(default)]
254    pub average_turn_duration_ms: Option<f64>,
255    /// Longest individual turn duration in milliseconds.
256    #[serde(default)]
257    pub max_turn_duration_ms: Option<u128>,
258    /// Per-turn duration metrics in milliseconds.
259    #[serde(default)]
260    pub turn_durations_ms: Vec<u128>,
261}