Skip to main content

car_ir/
outcome.rs

1//! Agent execution outcome semantics.
2//!
3//! Provides typed outcomes for agent execution loops, replacing ad-hoc
4//! success/failure heuristics with structured completion semantics.
5
6use chrono::{DateTime, Utc};
7use serde::{Deserialize, Serialize};
8use serde_json::Value;
9
10/// Default timestamp for outcomes deserialized from caller-built JSON that
11/// omits `timestamp` (e.g. the canonical harness, which sends only
12/// `{status, summary, evidence, metrics, tools_called}`). `DateTime` has no
13/// `Default`, so we provide one explicitly.
14fn default_outcome_timestamp() -> DateTime<Utc> {
15    Utc::now()
16}
17
18/// The outcome of an agent execution loop.
19///
20/// Deserialization is tolerant of the caller-built shape: `timestamp` is
21/// optional (defaults to now), `summary`/`evidence`/`metrics` are optional
22/// (default to empty), and unknown fields (e.g. the harness's extra
23/// `tools_called`) are ignored (no `deny_unknown_fields`). Serialization is
24/// unchanged and always emits all fields.
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct AgentOutcome {
27    /// The outcome type.
28    pub status: OutcomeStatus,
29    /// Human-readable summary of what happened.
30    #[serde(default)]
31    pub summary: String,
32    /// Evidence supporting the outcome classification.
33    #[serde(default)]
34    pub evidence: Vec<Evidence>,
35    /// Metrics from the execution.
36    #[serde(default)]
37    pub metrics: OutcomeMetrics,
38    /// When the outcome was determined.
39    #[serde(default = "default_outcome_timestamp")]
40    pub timestamp: DateTime<Utc>,
41}
42
43/// Outcome classification for an agent execution.
44#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
45#[serde(rename_all = "snake_case")]
46pub enum OutcomeStatus {
47    /// Task completed successfully with all goals met.
48    Success,
49    /// Task completed but only some goals were met.
50    PartialSuccess,
51    /// Agent explicitly determined it cannot complete the task.
52    GiveUp,
53    /// Execution exceeded time or step limits.
54    Timeout,
55    /// Execution failed due to errors.
56    Failure,
57    /// Agent explicitly signaled it is done (neutral -- may or may not have succeeded).
58    Done,
59}
60
61impl OutcomeStatus {
62    /// Whether this outcome represents any form of completion (not failure/timeout).
63    pub fn is_completed(&self) -> bool {
64        matches!(self, Self::Success | Self::PartialSuccess | Self::Done)
65    }
66
67    /// Whether this outcome represents a terminal state (no more work possible).
68    pub fn is_terminal(&self) -> bool {
69        true // All outcome statuses are terminal
70    }
71}
72
73/// Evidence supporting an outcome classification.
74#[derive(Debug, Clone, Serialize, Deserialize)]
75pub struct Evidence {
76    /// What kind of evidence this is.
77    pub kind: EvidenceKind,
78    /// Human-readable description.
79    pub description: String,
80    /// Optional structured data.
81    pub data: Option<Value>,
82}
83
84/// Types of evidence that can support an outcome.
85#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
86#[serde(rename_all = "snake_case")]
87pub enum EvidenceKind {
88    /// Agent's own assessment of task completion.
89    SelfAssessment,
90    /// Tool call results that demonstrate completion.
91    ToolResult,
92    /// State changes that demonstrate completion.
93    StateChange,
94    /// External verification (e.g., test passed).
95    ExternalVerification,
96    /// The reason execution stopped (timeout, max steps, etc.).
97    StopReason,
98    /// Product-level evaluator result.
99    Evaluator,
100}
101
102/// Execution metrics associated with an outcome.
103///
104/// All fields are `#[serde(default)]` so a partial metrics object (e.g. the
105/// harness's `{turns, tool_calls, actions_succeeded, actions_failed}`, which
106/// omits `duration_ms` and `retries`) deserializes cleanly.
107#[derive(Debug, Clone, Default, Serialize, Deserialize)]
108pub struct OutcomeMetrics {
109    /// Total turns/steps executed.
110    #[serde(default)]
111    pub turns: u32,
112    /// Total tool calls made.
113    #[serde(default)]
114    pub tool_calls: u32,
115    /// Wall-clock duration in milliseconds.
116    #[serde(default)]
117    pub duration_ms: f64,
118    /// Number of retries/replans attempted.
119    #[serde(default)]
120    pub retries: u32,
121    /// Number of actions that succeeded.
122    #[serde(default)]
123    pub actions_succeeded: u32,
124    /// Number of actions that failed.
125    #[serde(default)]
126    pub actions_failed: u32,
127}
128
129impl AgentOutcome {
130    /// Create a successful outcome.
131    pub fn success(summary: &str) -> Self {
132        Self {
133            status: OutcomeStatus::Success,
134            summary: summary.to_string(),
135            evidence: Vec::new(),
136            metrics: OutcomeMetrics::default(),
137            timestamp: Utc::now(),
138        }
139    }
140
141    /// Create a failure outcome.
142    pub fn failure(summary: &str) -> Self {
143        Self {
144            status: OutcomeStatus::Failure,
145            summary: summary.to_string(),
146            evidence: Vec::new(),
147            metrics: OutcomeMetrics::default(),
148            timestamp: Utc::now(),
149        }
150    }
151
152    /// Create a timeout outcome.
153    pub fn timeout(summary: &str, turns: u32, max_turns: u32) -> Self {
154        Self {
155            status: OutcomeStatus::Timeout,
156            summary: summary.to_string(),
157            evidence: vec![Evidence {
158                kind: EvidenceKind::StopReason,
159                description: format!("Reached {} of {} max turns", turns, max_turns),
160                data: Some(serde_json::json!({
161                    "turns": turns,
162                    "max_turns": max_turns,
163                })),
164            }],
165            metrics: OutcomeMetrics {
166                turns,
167                ..Default::default()
168            },
169            timestamp: Utc::now(),
170        }
171    }
172
173    /// Create a give-up outcome.
174    pub fn give_up(reason: &str) -> Self {
175        Self {
176            status: OutcomeStatus::GiveUp,
177            summary: reason.to_string(),
178            evidence: vec![Evidence {
179                kind: EvidenceKind::SelfAssessment,
180                description: reason.to_string(),
181                data: None,
182            }],
183            metrics: OutcomeMetrics::default(),
184            timestamp: Utc::now(),
185        }
186    }
187
188    /// Add evidence to this outcome.
189    pub fn with_evidence(mut self, evidence: Evidence) -> Self {
190        self.evidence.push(evidence);
191        self
192    }
193
194    /// Set metrics on this outcome.
195    pub fn with_metrics(mut self, metrics: OutcomeMetrics) -> Self {
196        self.metrics = metrics;
197        self
198    }
199}
200
201#[cfg(test)]
202mod tests {
203    use super::*;
204
205    #[test]
206    fn test_outcome_status_classification() {
207        assert!(OutcomeStatus::Success.is_completed());
208        assert!(OutcomeStatus::PartialSuccess.is_completed());
209        assert!(OutcomeStatus::Done.is_completed());
210        assert!(!OutcomeStatus::Failure.is_completed());
211        assert!(!OutcomeStatus::Timeout.is_completed());
212        assert!(!OutcomeStatus::GiveUp.is_completed());
213    }
214
215    #[test]
216    fn test_all_statuses_are_terminal() {
217        assert!(OutcomeStatus::Success.is_terminal());
218        assert!(OutcomeStatus::PartialSuccess.is_terminal());
219        assert!(OutcomeStatus::Done.is_terminal());
220        assert!(OutcomeStatus::Failure.is_terminal());
221        assert!(OutcomeStatus::Timeout.is_terminal());
222        assert!(OutcomeStatus::GiveUp.is_terminal());
223    }
224
225    #[test]
226    fn test_timeout_outcome() {
227        let outcome = AgentOutcome::timeout("Exceeded step limit", 10, 10);
228        assert_eq!(outcome.status, OutcomeStatus::Timeout);
229        assert_eq!(outcome.metrics.turns, 10);
230        assert_eq!(outcome.evidence.len(), 1);
231        assert_eq!(outcome.evidence[0].kind, EvidenceKind::StopReason);
232    }
233
234    #[test]
235    fn test_outcome_with_evidence() {
236        let outcome = AgentOutcome::success("Task done")
237            .with_evidence(Evidence {
238                kind: EvidenceKind::ToolResult,
239                description: "File created".to_string(),
240                data: Some(serde_json::json!({"path": "/tmp/out.txt"})),
241            })
242            .with_evidence(Evidence {
243                kind: EvidenceKind::ExternalVerification,
244                description: "Tests passed".to_string(),
245                data: None,
246            });
247        assert_eq!(outcome.evidence.len(), 2);
248    }
249
250    #[test]
251    fn test_give_up_outcome() {
252        let outcome = AgentOutcome::give_up("Cannot access required API");
253        assert_eq!(outcome.status, OutcomeStatus::GiveUp);
254        assert_eq!(outcome.evidence.len(), 1);
255        assert_eq!(outcome.evidence[0].kind, EvidenceKind::SelfAssessment);
256    }
257
258    #[test]
259    fn test_failure_outcome() {
260        let outcome = AgentOutcome::failure("Connection refused");
261        assert_eq!(outcome.status, OutcomeStatus::Failure);
262        assert!(!outcome.status.is_completed());
263    }
264
265    #[test]
266    fn test_outcome_serde_roundtrip() {
267        let outcome = AgentOutcome::success("Done")
268            .with_evidence(Evidence {
269                kind: EvidenceKind::ToolResult,
270                description: "ok".to_string(),
271                data: Some(serde_json::json!(42)),
272            })
273            .with_metrics(OutcomeMetrics {
274                turns: 5,
275                tool_calls: 3,
276                duration_ms: 1234.5,
277                retries: 1,
278                actions_succeeded: 4,
279                actions_failed: 1,
280            });
281
282        let json = serde_json::to_string(&outcome).unwrap();
283        let roundtripped: AgentOutcome = serde_json::from_str(&json).unwrap();
284
285        assert_eq!(roundtripped.status, OutcomeStatus::Success);
286        assert_eq!(roundtripped.summary, "Done");
287        assert_eq!(roundtripped.evidence.len(), 1);
288        assert_eq!(roundtripped.metrics.turns, 5);
289        assert_eq!(roundtripped.metrics.tool_calls, 3);
290    }
291
292    /// FIX 1: the canonical harness builds the outcome itself and sends
293    /// `{status, summary, evidence, metrics, tools_called}` — NO `timestamp`,
294    /// an EXTRA `tools_called`, and a PARTIAL `metrics` object (no
295    /// `duration_ms`/`retries`). All of that must deserialize: timestamp
296    /// defaults to now, the extra field is ignored, and the missing metrics
297    /// fields default to zero.
298    #[test]
299    fn test_outcome_deserializes_harness_shape_without_timestamp() {
300        let harness = r#"{"status":"success","summary":"Created file","evidence":[],"metrics":{"turns":3,"tool_calls":3,"actions_succeeded":3,"actions_failed":0},"tools_called":["drive_cli","check_outcome","finish"]}"#;
301        let outcome: AgentOutcome =
302            serde_json::from_str(harness).expect("harness shape must deserialize");
303        assert_eq!(outcome.status, OutcomeStatus::Success);
304        assert_eq!(outcome.summary, "Created file");
305        assert!(outcome.evidence.is_empty());
306        assert_eq!(outcome.metrics.turns, 3);
307        assert_eq!(outcome.metrics.tool_calls, 3);
308        assert_eq!(outcome.metrics.actions_succeeded, 3);
309        assert_eq!(outcome.metrics.actions_failed, 0);
310        // Omitted metrics fields default to zero.
311        assert_eq!(outcome.metrics.duration_ms, 0.0);
312        assert_eq!(outcome.metrics.retries, 0);
313        // Serialize still emits a timestamp (default-now), unchanged shape.
314        let json = serde_json::to_string(&outcome).unwrap();
315        assert!(json.contains("\"timestamp\""));
316    }
317
318    #[test]
319    fn test_outcome_status_snake_case_serde() {
320        assert_eq!(
321            serde_json::to_string(&OutcomeStatus::PartialSuccess).unwrap(),
322            "\"partial_success\""
323        );
324        assert_eq!(
325            serde_json::to_string(&OutcomeStatus::GiveUp).unwrap(),
326            "\"give_up\""
327        );
328    }
329}