Skip to main content

car_ir/
outcome.rs

1//! Agent execution outcome semantics.
2//!
3//! Provides typed outcomes for agent execution loops, replacing ad-hoc
4//! success/failure heuristics with structured completion semantics.
5
6use chrono::{DateTime, Utc};
7use serde::{Deserialize, Serialize};
8use serde_json::Value;
9
10/// The outcome of an agent execution loop.
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct AgentOutcome {
13    /// The outcome type.
14    pub status: OutcomeStatus,
15    /// Human-readable summary of what happened.
16    pub summary: String,
17    /// Evidence supporting the outcome classification.
18    pub evidence: Vec<Evidence>,
19    /// Metrics from the execution.
20    pub metrics: OutcomeMetrics,
21    /// When the outcome was determined.
22    pub timestamp: DateTime<Utc>,
23}
24
25/// Outcome classification for an agent execution.
26#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
27#[serde(rename_all = "snake_case")]
28pub enum OutcomeStatus {
29    /// Task completed successfully with all goals met.
30    Success,
31    /// Task completed but only some goals were met.
32    PartialSuccess,
33    /// Agent explicitly determined it cannot complete the task.
34    GiveUp,
35    /// Execution exceeded time or step limits.
36    Timeout,
37    /// Execution failed due to errors.
38    Failure,
39    /// Agent explicitly signaled it is done (neutral -- may or may not have succeeded).
40    Done,
41}
42
43impl OutcomeStatus {
44    /// Whether this outcome represents any form of completion (not failure/timeout).
45    pub fn is_completed(&self) -> bool {
46        matches!(self, Self::Success | Self::PartialSuccess | Self::Done)
47    }
48
49    /// Whether this outcome represents a terminal state (no more work possible).
50    pub fn is_terminal(&self) -> bool {
51        true // All outcome statuses are terminal
52    }
53}
54
55/// Evidence supporting an outcome classification.
56#[derive(Debug, Clone, Serialize, Deserialize)]
57pub struct Evidence {
58    /// What kind of evidence this is.
59    pub kind: EvidenceKind,
60    /// Human-readable description.
61    pub description: String,
62    /// Optional structured data.
63    pub data: Option<Value>,
64}
65
66/// Types of evidence that can support an outcome.
67#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
68#[serde(rename_all = "snake_case")]
69pub enum EvidenceKind {
70    /// Agent's own assessment of task completion.
71    SelfAssessment,
72    /// Tool call results that demonstrate completion.
73    ToolResult,
74    /// State changes that demonstrate completion.
75    StateChange,
76    /// External verification (e.g., test passed).
77    ExternalVerification,
78    /// The reason execution stopped (timeout, max steps, etc.).
79    StopReason,
80    /// Product-level evaluator result.
81    Evaluator,
82}
83
84/// Execution metrics associated with an outcome.
85#[derive(Debug, Clone, Default, Serialize, Deserialize)]
86pub struct OutcomeMetrics {
87    /// Total turns/steps executed.
88    pub turns: u32,
89    /// Total tool calls made.
90    pub tool_calls: u32,
91    /// Wall-clock duration in milliseconds.
92    pub duration_ms: f64,
93    /// Number of retries/replans attempted.
94    pub retries: u32,
95    /// Number of actions that succeeded.
96    pub actions_succeeded: u32,
97    /// Number of actions that failed.
98    pub actions_failed: u32,
99}
100
101impl AgentOutcome {
102    /// Create a successful outcome.
103    pub fn success(summary: &str) -> Self {
104        Self {
105            status: OutcomeStatus::Success,
106            summary: summary.to_string(),
107            evidence: Vec::new(),
108            metrics: OutcomeMetrics::default(),
109            timestamp: Utc::now(),
110        }
111    }
112
113    /// Create a failure outcome.
114    pub fn failure(summary: &str) -> Self {
115        Self {
116            status: OutcomeStatus::Failure,
117            summary: summary.to_string(),
118            evidence: Vec::new(),
119            metrics: OutcomeMetrics::default(),
120            timestamp: Utc::now(),
121        }
122    }
123
124    /// Create a timeout outcome.
125    pub fn timeout(summary: &str, turns: u32, max_turns: u32) -> Self {
126        Self {
127            status: OutcomeStatus::Timeout,
128            summary: summary.to_string(),
129            evidence: vec![Evidence {
130                kind: EvidenceKind::StopReason,
131                description: format!("Reached {} of {} max turns", turns, max_turns),
132                data: Some(serde_json::json!({
133                    "turns": turns,
134                    "max_turns": max_turns,
135                })),
136            }],
137            metrics: OutcomeMetrics {
138                turns,
139                ..Default::default()
140            },
141            timestamp: Utc::now(),
142        }
143    }
144
145    /// Create a give-up outcome.
146    pub fn give_up(reason: &str) -> Self {
147        Self {
148            status: OutcomeStatus::GiveUp,
149            summary: reason.to_string(),
150            evidence: vec![Evidence {
151                kind: EvidenceKind::SelfAssessment,
152                description: reason.to_string(),
153                data: None,
154            }],
155            metrics: OutcomeMetrics::default(),
156            timestamp: Utc::now(),
157        }
158    }
159
160    /// Add evidence to this outcome.
161    pub fn with_evidence(mut self, evidence: Evidence) -> Self {
162        self.evidence.push(evidence);
163        self
164    }
165
166    /// Set metrics on this outcome.
167    pub fn with_metrics(mut self, metrics: OutcomeMetrics) -> Self {
168        self.metrics = metrics;
169        self
170    }
171}
172
173#[cfg(test)]
174mod tests {
175    use super::*;
176
177    #[test]
178    fn test_outcome_status_classification() {
179        assert!(OutcomeStatus::Success.is_completed());
180        assert!(OutcomeStatus::PartialSuccess.is_completed());
181        assert!(OutcomeStatus::Done.is_completed());
182        assert!(!OutcomeStatus::Failure.is_completed());
183        assert!(!OutcomeStatus::Timeout.is_completed());
184        assert!(!OutcomeStatus::GiveUp.is_completed());
185    }
186
187    #[test]
188    fn test_all_statuses_are_terminal() {
189        assert!(OutcomeStatus::Success.is_terminal());
190        assert!(OutcomeStatus::PartialSuccess.is_terminal());
191        assert!(OutcomeStatus::Done.is_terminal());
192        assert!(OutcomeStatus::Failure.is_terminal());
193        assert!(OutcomeStatus::Timeout.is_terminal());
194        assert!(OutcomeStatus::GiveUp.is_terminal());
195    }
196
197    #[test]
198    fn test_timeout_outcome() {
199        let outcome = AgentOutcome::timeout("Exceeded step limit", 10, 10);
200        assert_eq!(outcome.status, OutcomeStatus::Timeout);
201        assert_eq!(outcome.metrics.turns, 10);
202        assert_eq!(outcome.evidence.len(), 1);
203        assert_eq!(outcome.evidence[0].kind, EvidenceKind::StopReason);
204    }
205
206    #[test]
207    fn test_outcome_with_evidence() {
208        let outcome = AgentOutcome::success("Task done")
209            .with_evidence(Evidence {
210                kind: EvidenceKind::ToolResult,
211                description: "File created".to_string(),
212                data: Some(serde_json::json!({"path": "/tmp/out.txt"})),
213            })
214            .with_evidence(Evidence {
215                kind: EvidenceKind::ExternalVerification,
216                description: "Tests passed".to_string(),
217                data: None,
218            });
219        assert_eq!(outcome.evidence.len(), 2);
220    }
221
222    #[test]
223    fn test_give_up_outcome() {
224        let outcome = AgentOutcome::give_up("Cannot access required API");
225        assert_eq!(outcome.status, OutcomeStatus::GiveUp);
226        assert_eq!(outcome.evidence.len(), 1);
227        assert_eq!(outcome.evidence[0].kind, EvidenceKind::SelfAssessment);
228    }
229
230    #[test]
231    fn test_failure_outcome() {
232        let outcome = AgentOutcome::failure("Connection refused");
233        assert_eq!(outcome.status, OutcomeStatus::Failure);
234        assert!(!outcome.status.is_completed());
235    }
236
237    #[test]
238    fn test_outcome_serde_roundtrip() {
239        let outcome = AgentOutcome::success("Done")
240            .with_evidence(Evidence {
241                kind: EvidenceKind::ToolResult,
242                description: "ok".to_string(),
243                data: Some(serde_json::json!(42)),
244            })
245            .with_metrics(OutcomeMetrics {
246                turns: 5,
247                tool_calls: 3,
248                duration_ms: 1234.5,
249                retries: 1,
250                actions_succeeded: 4,
251                actions_failed: 1,
252            });
253
254        let json = serde_json::to_string(&outcome).unwrap();
255        let roundtripped: AgentOutcome = serde_json::from_str(&json).unwrap();
256
257        assert_eq!(roundtripped.status, OutcomeStatus::Success);
258        assert_eq!(roundtripped.summary, "Done");
259        assert_eq!(roundtripped.evidence.len(), 1);
260        assert_eq!(roundtripped.metrics.turns, 5);
261        assert_eq!(roundtripped.metrics.tool_calls, 3);
262    }
263
264    #[test]
265    fn test_outcome_status_snake_case_serde() {
266        assert_eq!(
267            serde_json::to_string(&OutcomeStatus::PartialSuccess).unwrap(),
268            "\"partial_success\""
269        );
270        assert_eq!(
271            serde_json::to_string(&OutcomeStatus::GiveUp).unwrap(),
272            "\"give_up\""
273        );
274    }
275}