Skip to main content

brainwires_agents/
execution_graph.rs

1//! Execution DAG and telemetry for TaskAgent runs
2//!
3//! Provides [`ExecutionGraph`] (one node per provider-call iteration with tool
4//! call records) and [`RunTelemetry`] (aggregate summary derived from the
5//! graph at run completion).
6
7use chrono::{DateTime, Utc};
8
9/// One tool call within a single iteration step.
10#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
11pub struct ToolCallRecord {
12    /// Unique identifier for this tool use invocation.
13    pub tool_use_id: String,
14    /// Name of the tool that was called.
15    pub tool_name: String,
16    /// Whether the tool call resulted in an error.
17    pub is_error: bool,
18    /// When the tool call was executed.
19    pub executed_at: DateTime<Utc>,
20}
21
22/// One provider-call iteration in the `execute()` loop.
23#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
24pub struct StepNode {
25    /// Iteration number within the execution loop.
26    pub iteration: u32,
27    /// When this step started.
28    pub started_at: DateTime<Utc>,
29    /// When this step ended.
30    pub ended_at: DateTime<Utc>,
31    /// Prompt tokens for this call (from `Usage::prompt_tokens`).
32    pub prompt_tokens: u32,
33    /// Completion tokens for this call (from `Usage::completion_tokens`).
34    pub completion_tokens: u32,
35    /// Tool calls made during this step.
36    pub tool_calls: Vec<ToolCallRecord>,
37    /// Reason the provider stopped generating.
38    pub finish_reason: Option<String>,
39}
40
41/// Full execution trace for one `TaskAgent` run.
42///
43/// Contains one [`StepNode`] per provider call and a flat ordered
44/// [`tool_sequence`][ExecutionGraph::tool_sequence] for easy comparison
45/// against expected sequences in behavioral tests (Phase 2 recorder).
46#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
47pub struct ExecutionGraph {
48    /// SHA-256 of (system prompt bytes + sorted tool name bytes), hex-encoded.
49    /// Changes whenever the prompt or tool registry changes.
50    pub prompt_hash: String,
51    /// When the run started.
52    pub run_started_at: DateTime<Utc>,
53    /// One [`StepNode`] per provider call iteration.
54    pub steps: Vec<StepNode>,
55    /// Flat ordered list of tool names across all steps (Phase 2 recorder).
56    pub tool_sequence: Vec<String>,
57}
58
59impl ExecutionGraph {
60    /// Create a new, empty graph with the given prompt hash and start time.
61    pub fn new(prompt_hash: String, run_started_at: DateTime<Utc>) -> Self {
62        Self {
63            prompt_hash,
64            run_started_at,
65            steps: Vec::new(),
66            tool_sequence: Vec::new(),
67        }
68    }
69
70    /// Start a new step; returns its index for later finalization.
71    pub fn push_step(&mut self, iteration: u32, started_at: DateTime<Utc>) -> usize {
72        let idx = self.steps.len();
73        self.steps.push(StepNode {
74            iteration,
75            started_at,
76            ended_at: started_at,
77            prompt_tokens: 0,
78            completion_tokens: 0,
79            tool_calls: Vec::new(),
80            finish_reason: None,
81        });
82        idx
83    }
84
85    /// Fill in token counts and finish_reason after the provider call returns.
86    pub fn finalize_step(
87        &mut self,
88        step_idx: usize,
89        ended_at: DateTime<Utc>,
90        prompt_tokens: u32,
91        completion_tokens: u32,
92        finish_reason: Option<String>,
93    ) {
94        if let Some(s) = self.steps.get_mut(step_idx) {
95            s.ended_at = ended_at;
96            s.prompt_tokens = prompt_tokens;
97            s.completion_tokens = completion_tokens;
98            s.finish_reason = finish_reason;
99        }
100    }
101
102    /// Record a tool call and append its name to the flat sequence.
103    pub fn record_tool_call(&mut self, step_idx: usize, record: ToolCallRecord) {
104        self.tool_sequence.push(record.tool_name.clone());
105        if let Some(s) = self.steps.get_mut(step_idx) {
106            s.tool_calls.push(record);
107        }
108    }
109}
110
111/// Structured telemetry summary for a completed run.
112///
113/// Derived from an [`ExecutionGraph`] via [`RunTelemetry::from_graph`].
114#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
115pub struct RunTelemetry {
116    /// Hash of the system prompt and tool registry.
117    pub prompt_hash: String,
118    /// When the run started.
119    pub run_started_at: DateTime<Utc>,
120    /// When the run ended.
121    pub run_ended_at: DateTime<Utc>,
122    /// Total run duration in milliseconds.
123    pub duration_ms: u64,
124    /// Number of provider call iterations.
125    pub total_iterations: u32,
126    /// Total number of tool calls across all iterations.
127    pub total_tool_calls: u32,
128    /// Number of tool calls that returned errors.
129    pub tool_error_count: u32,
130    /// Unique tool names, deduped in first-use order.
131    pub tools_used: Vec<String>,
132    /// Total prompt tokens consumed.
133    pub total_prompt_tokens: u32,
134    /// Total completion tokens consumed.
135    pub total_completion_tokens: u32,
136    /// Total estimated cost in USD.
137    pub total_cost_usd: f64,
138    /// Whether the run completed successfully.
139    pub success: bool,
140}
141
142impl RunTelemetry {
143    /// Build a telemetry record from a completed [`ExecutionGraph`].
144    pub fn from_graph(
145        graph: &ExecutionGraph,
146        run_ended_at: DateTime<Utc>,
147        success: bool,
148        total_cost_usd: f64,
149    ) -> Self {
150        let duration_ms = (run_ended_at - graph.run_started_at)
151            .num_milliseconds()
152            .max(0) as u64;
153        let total_tool_calls: u32 = graph.steps.iter().map(|s| s.tool_calls.len() as u32).sum();
154        let tool_error_count: u32 = graph
155            .steps
156            .iter()
157            .flat_map(|s| s.tool_calls.iter())
158            .filter(|tc| tc.is_error)
159            .count() as u32;
160        let total_prompt_tokens: u32 = graph.steps.iter().map(|s| s.prompt_tokens).sum();
161        let total_completion_tokens: u32 = graph.steps.iter().map(|s| s.completion_tokens).sum();
162        let mut seen = std::collections::HashSet::new();
163        let tools_used: Vec<String> = graph
164            .tool_sequence
165            .iter()
166            .filter(|n| seen.insert((*n).clone()))
167            .cloned()
168            .collect();
169        Self {
170            prompt_hash: graph.prompt_hash.clone(),
171            run_started_at: graph.run_started_at,
172            run_ended_at,
173            duration_ms,
174            total_iterations: graph.steps.len() as u32,
175            total_tool_calls,
176            tool_error_count,
177            tools_used,
178            total_prompt_tokens,
179            total_completion_tokens,
180            total_cost_usd,
181            success,
182        }
183    }
184}
185
186#[cfg(test)]
187mod tests {
188    use super::*;
189    use chrono::Utc;
190
191    fn make_graph() -> ExecutionGraph {
192        ExecutionGraph::new("abc123".to_string(), Utc::now())
193    }
194
195    #[test]
196    fn test_push_step_returns_index() {
197        let mut g = make_graph();
198        let idx0 = g.push_step(1, Utc::now());
199        let idx1 = g.push_step(2, Utc::now());
200        assert_eq!(idx0, 0);
201        assert_eq!(idx1, 1);
202        assert_eq!(g.steps.len(), 2);
203    }
204
205    #[test]
206    fn test_finalize_step_sets_tokens() {
207        let mut g = make_graph();
208        let idx = g.push_step(1, Utc::now());
209        let end = Utc::now();
210        g.finalize_step(idx, end, 100, 50, Some("stop".to_string()));
211        assert_eq!(g.steps[idx].prompt_tokens, 100);
212        assert_eq!(g.steps[idx].completion_tokens, 50);
213        assert_eq!(g.steps[idx].finish_reason, Some("stop".to_string()));
214    }
215
216    #[test]
217    fn test_record_tool_call_appends_sequence() {
218        let mut g = make_graph();
219        let idx = g.push_step(1, Utc::now());
220        g.record_tool_call(
221            idx,
222            ToolCallRecord {
223                tool_use_id: "u1".to_string(),
224                tool_name: "read_file".to_string(),
225                is_error: false,
226                executed_at: Utc::now(),
227            },
228        );
229        g.record_tool_call(
230            idx,
231            ToolCallRecord {
232                tool_use_id: "u2".to_string(),
233                tool_name: "write_file".to_string(),
234                is_error: false,
235                executed_at: Utc::now(),
236            },
237        );
238        assert_eq!(g.tool_sequence, vec!["read_file", "write_file"]);
239        assert_eq!(g.steps[idx].tool_calls.len(), 2);
240    }
241
242    #[test]
243    fn test_telemetry_from_graph() {
244        let start = Utc::now();
245        let mut g = ExecutionGraph::new("hash".to_string(), start);
246        let idx = g.push_step(1, start);
247        g.finalize_step(idx, Utc::now(), 100, 50, None);
248        g.record_tool_call(
249            idx,
250            ToolCallRecord {
251                tool_use_id: "u1".to_string(),
252                tool_name: "bash".to_string(),
253                is_error: false,
254                executed_at: Utc::now(),
255            },
256        );
257        g.record_tool_call(
258            idx,
259            ToolCallRecord {
260                tool_use_id: "u2".to_string(),
261                tool_name: "bash".to_string(),
262                is_error: true,
263                executed_at: Utc::now(),
264            },
265        );
266
267        let telem = RunTelemetry::from_graph(&g, Utc::now(), true, 0.01);
268        assert_eq!(telem.total_iterations, 1);
269        assert_eq!(telem.total_tool_calls, 2);
270        assert_eq!(telem.tool_error_count, 1);
271        // "bash" appears twice but tools_used should deduplicate
272        assert_eq!(telem.tools_used, vec!["bash"]);
273        assert_eq!(telem.total_prompt_tokens, 100);
274        assert_eq!(telem.total_completion_tokens, 50);
275        assert!(telem.success);
276    }
277
278    #[test]
279    fn test_tool_sequence_preserves_order() {
280        let mut g = make_graph();
281        let idx = g.push_step(1, Utc::now());
282        for name in &["a", "b", "c", "b", "a"] {
283            g.record_tool_call(
284                idx,
285                ToolCallRecord {
286                    tool_use_id: "x".to_string(),
287                    tool_name: name.to_string(),
288                    is_error: false,
289                    executed_at: Utc::now(),
290                },
291            );
292        }
293        assert_eq!(g.tool_sequence, vec!["a", "b", "c", "b", "a"]);
294    }
295}