Skip to main content

adk_eval/
trace_analyzer.rs

1//! Execution trace analysis for detecting inefficiencies.
2//!
3//! The [`TraceAnalyzer`] inspects agent event streams to identify redundant tool calls,
4//! execution loops, and other patterns that waste tokens or time. It produces a
5//! [`TraceAnalysis`] summary with an efficiency score and per-pattern diagnostics.
6//!
7//! # Example
8//!
9//! ```rust,ignore
10//! use adk_eval::trace_analyzer::{TraceAnalyzer, ToolCallRecord};
11//! use serde_json::json;
12//!
13//! let analyzer = TraceAnalyzer::new();
14//! let calls = vec![
15//!     ToolCallRecord { name: "read_file".into(), args: json!({"path": "a.txt"}) },
16//!     ToolCallRecord { name: "read_file".into(), args: json!({"path": "a.txt"}) },
17//!     ToolCallRecord { name: "write_file".into(), args: json!({"path": "b.txt"}) },
18//! ];
19//! let analysis = analyzer.analyze_tool_calls(&calls);
20//! assert!(analysis.efficiency_score < 1.0);
21//! ```
22
23use adk_core::Event;
24use serde::{Deserialize, Serialize};
25use std::collections::HashSet;
26
27/// A single tool call record for direct analysis without full Events.
28#[derive(Debug, Clone, PartialEq)]
29pub struct ToolCallRecord {
30    /// Name of the tool that was called.
31    pub name: String,
32    /// Arguments passed to the tool as JSON.
33    pub args: serde_json::Value,
34}
35
36/// A detected trace inefficiency.
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct TraceDiagnostic {
39    /// Type of inefficiency pattern detected.
40    pub pattern_type: TracePattern,
41    /// Tool names involved in the pattern.
42    pub tool_names: Vec<String>,
43    /// Number of times the pattern occurred.
44    pub occurrence_count: usize,
45    /// Human-readable description of the issue.
46    pub description: String,
47}
48
49/// Types of trace inefficiency patterns.
50#[derive(Debug, Clone, Serialize, Deserialize)]
51#[serde(rename_all = "snake_case")]
52pub enum TracePattern {
53    /// Same tool called consecutively with identical arguments.
54    RedundantCall,
55    /// Repeated sequence of tool calls forming a loop.
56    ExecutionLoop,
57    /// Tool called many times suggesting retry issues.
58    ExcessiveRetries,
59}
60
61/// Summary of trace analysis results.
62#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct TraceAnalysis {
64    /// Total number of tool calls in the event stream.
65    pub total_tool_calls: usize,
66    /// Number of unique tools used.
67    pub unique_tools: usize,
68    /// Number of useful (non-redundant, non-loop) tool calls.
69    pub useful_tool_calls: usize,
70    /// Efficiency score in [0.0, 1.0]: useful_calls / total_calls (1.0 when total is 0).
71    pub efficiency_score: f64,
72    /// Detected inefficiency patterns.
73    pub diagnostics: Vec<TraceDiagnostic>,
74}
75
76/// Analyzes agent execution traces for inefficiencies.
77///
78/// The analyzer inspects tool call sequences to detect:
79/// - **Redundant calls**: consecutive calls with the same tool name AND same arguments
80/// - **Execution loops**: sequences of 3+ repeated tool-call patterns
81///
82/// # Example
83///
84/// ```rust,ignore
85/// use adk_eval::TraceAnalyzer;
86///
87/// let analyzer = TraceAnalyzer::new();
88/// let analysis = analyzer.analyze(&events);
89/// println!("Efficiency: {:.1}%", analysis.efficiency_score * 100.0);
90/// ```
91pub struct TraceAnalyzer;
92
93impl TraceAnalyzer {
94    /// Creates a new `TraceAnalyzer`.
95    pub fn new() -> Self {
96        Self
97    }
98
99    /// Analyze an event stream for trace inefficiencies.
100    ///
101    /// Extracts tool calls from events and delegates to [`Self::analyze_tool_calls`].
102    pub fn analyze(&self, events: &[Event]) -> TraceAnalysis {
103        let calls = Self::extract_tool_calls(events);
104        self.analyze_tool_calls(&calls)
105    }
106
107    /// Analyze a sequence of tool call records directly.
108    ///
109    /// This is useful for testing without constructing full Event objects.
110    pub fn analyze_tool_calls(&self, calls: &[ToolCallRecord]) -> TraceAnalysis {
111        let total_tool_calls = calls.len();
112
113        if total_tool_calls == 0 {
114            return TraceAnalysis {
115                total_tool_calls: 0,
116                unique_tools: 0,
117                useful_tool_calls: 0,
118                efficiency_score: 1.0,
119                diagnostics: Vec::new(),
120            };
121        }
122
123        let unique_tools = {
124            let mut set = HashSet::new();
125            for call in calls {
126                set.insert(call.name.as_str());
127            }
128            set.len()
129        };
130
131        let redundant_diagnostics = Self::detect_redundant_calls(calls);
132        let loop_diagnostics = Self::detect_loops(calls);
133
134        let redundant_count: usize = redundant_diagnostics.iter().map(|d| d.occurrence_count).sum();
135        let loop_count: usize = loop_diagnostics.iter().map(|d| d.occurrence_count).sum();
136
137        let wasted = redundant_count + loop_count;
138        let useful_tool_calls = total_tool_calls.saturating_sub(wasted);
139
140        let efficiency_score = useful_tool_calls as f64 / total_tool_calls as f64;
141
142        let mut diagnostics = Vec::new();
143        diagnostics.extend(redundant_diagnostics);
144        diagnostics.extend(loop_diagnostics);
145
146        TraceAnalysis {
147            total_tool_calls,
148            unique_tools,
149            useful_tool_calls,
150            efficiency_score,
151            diagnostics,
152        }
153    }
154
155    /// Extract tool calls from events by scanning for `FunctionCall` parts.
156    fn extract_tool_calls(events: &[Event]) -> Vec<ToolCallRecord> {
157        let mut calls = Vec::new();
158        for event in events {
159            if let Some(content) = &event.llm_response.content {
160                for part in &content.parts {
161                    if let adk_core::Part::FunctionCall { name, args, .. } = part {
162                        calls.push(ToolCallRecord { name: name.clone(), args: args.clone() });
163                    }
164                }
165            }
166        }
167        calls
168    }
169
170    /// Detect redundant consecutive calls — same tool name AND same arguments.
171    ///
172    /// Two consecutive tool calls are redundant if they have the same tool name
173    /// and their arguments are equal (JSON equality).
174    fn detect_redundant_calls(calls: &[ToolCallRecord]) -> Vec<TraceDiagnostic> {
175        if calls.len() < 2 {
176            return Vec::new();
177        }
178
179        let mut diagnostics: Vec<TraceDiagnostic> = Vec::new();
180
181        let mut i = 0;
182        while i < calls.len() - 1 {
183            if calls[i].name == calls[i + 1].name && calls[i].args == calls[i + 1].args {
184                // Count how many consecutive duplicates follow
185                let tool_name = calls[i].name.clone();
186                let mut count = 0;
187                let mut j = i + 1;
188                while j < calls.len()
189                    && calls[j].name == calls[i].name
190                    && calls[j].args == calls[i].args
191                {
192                    count += 1;
193                    j += 1;
194                }
195
196                diagnostics.push(TraceDiagnostic {
197                    pattern_type: TracePattern::RedundantCall,
198                    tool_names: vec![tool_name.clone()],
199                    occurrence_count: count,
200                    description: format!(
201                        "Tool '{}' called {} consecutive time(s) with identical arguments",
202                        tool_name, count
203                    ),
204                });
205
206                i = j;
207            } else {
208                i += 1;
209            }
210        }
211
212        diagnostics
213    }
214
215    /// Detect execution loops — sequences of 3+ repeated tool-call patterns.
216    ///
217    /// Uses a sliding window approach: for each possible pattern length (1..=n/3),
218    /// checks if a sequence of tool call names repeats 3+ times consecutively.
219    fn detect_loops(calls: &[ToolCallRecord]) -> Vec<TraceDiagnostic> {
220        if calls.len() < 3 {
221            return Vec::new();
222        }
223
224        let names: Vec<&str> = calls.iter().map(|c| c.name.as_str()).collect();
225        let n = names.len();
226        let mut diagnostics: Vec<TraceDiagnostic> = Vec::new();
227        let mut covered: Vec<bool> = vec![false; n];
228
229        // Try pattern lengths from 1 up to n/3 (need at least 3 repetitions)
230        for pattern_len in 1..=(n / 3) {
231            let mut i = 0;
232            while i + pattern_len * 3 <= n {
233                if covered[i] {
234                    i += 1;
235                    continue;
236                }
237
238                let pattern = &names[i..i + pattern_len];
239                let mut repetitions = 1;
240                let mut j = i + pattern_len;
241
242                while j + pattern_len <= n && &names[j..j + pattern_len] == pattern {
243                    repetitions += 1;
244                    j += pattern_len;
245                }
246
247                if repetitions >= 3 {
248                    let loop_tool_names: Vec<String> =
249                        pattern.iter().map(|s| (*s).to_string()).collect();
250
251                    // Mark covered indices to avoid double-counting
252                    // The wasted iterations are repetitions - 1 (first occurrence is useful)
253                    let wasted_iterations = (repetitions - 1) * pattern_len;
254                    for item in
255                        covered.iter_mut().take(i + repetitions * pattern_len).skip(i + pattern_len)
256                    {
257                        *item = true;
258                    }
259
260                    diagnostics.push(TraceDiagnostic {
261                        pattern_type: TracePattern::ExecutionLoop,
262                        tool_names: loop_tool_names.clone(),
263                        occurrence_count: wasted_iterations,
264                        description: format!(
265                            "Pattern {:?} repeated {} times ({} wasted iterations)",
266                            loop_tool_names, repetitions, wasted_iterations
267                        ),
268                    });
269
270                    i = j;
271                } else {
272                    i += 1;
273                }
274            }
275        }
276
277        diagnostics
278    }
279}
280
281impl Default for TraceAnalyzer {
282    fn default() -> Self {
283        Self::new()
284    }
285}
286
287#[cfg(test)]
288mod tests {
289    use super::*;
290    use serde_json::json;
291
292    #[test]
293    fn test_empty_calls() {
294        let analyzer = TraceAnalyzer::new();
295        let analysis = analyzer.analyze_tool_calls(&[]);
296        assert_eq!(analysis.total_tool_calls, 0);
297        assert_eq!(analysis.unique_tools, 0);
298        assert_eq!(analysis.useful_tool_calls, 0);
299        assert_eq!(analysis.efficiency_score, 1.0);
300        assert!(analysis.diagnostics.is_empty());
301    }
302
303    #[test]
304    fn test_no_redundancy() {
305        let analyzer = TraceAnalyzer::new();
306        let calls = vec![
307            ToolCallRecord { name: "read_file".into(), args: json!({"path": "a.txt"}) },
308            ToolCallRecord { name: "write_file".into(), args: json!({"path": "b.txt"}) },
309            ToolCallRecord { name: "read_file".into(), args: json!({"path": "c.txt"}) },
310        ];
311        let analysis = analyzer.analyze_tool_calls(&calls);
312        assert_eq!(analysis.total_tool_calls, 3);
313        assert_eq!(analysis.unique_tools, 2);
314        assert_eq!(analysis.useful_tool_calls, 3);
315        assert_eq!(analysis.efficiency_score, 1.0);
316        assert!(analysis.diagnostics.is_empty());
317    }
318
319    #[test]
320    fn test_redundant_calls_detected() {
321        let analyzer = TraceAnalyzer::new();
322        let calls = vec![
323            ToolCallRecord { name: "read_file".into(), args: json!({"path": "a.txt"}) },
324            ToolCallRecord { name: "read_file".into(), args: json!({"path": "a.txt"}) },
325            ToolCallRecord { name: "write_file".into(), args: json!({"path": "b.txt"}) },
326        ];
327        let analysis = analyzer.analyze_tool_calls(&calls);
328        assert_eq!(analysis.total_tool_calls, 3);
329        assert_eq!(analysis.useful_tool_calls, 2);
330        assert!(analysis.efficiency_score < 1.0);
331        assert!(!analysis.diagnostics.is_empty());
332    }
333
334    #[test]
335    fn test_same_tool_different_args_not_redundant() {
336        let analyzer = TraceAnalyzer::new();
337        let calls = vec![
338            ToolCallRecord { name: "read_file".into(), args: json!({"path": "a.txt"}) },
339            ToolCallRecord { name: "read_file".into(), args: json!({"path": "b.txt"}) },
340        ];
341        let analysis = analyzer.analyze_tool_calls(&calls);
342        assert_eq!(analysis.useful_tool_calls, 2);
343        assert_eq!(analysis.efficiency_score, 1.0);
344        assert!(analysis.diagnostics.is_empty());
345    }
346
347    #[test]
348    fn test_loop_detection() {
349        let analyzer = TraceAnalyzer::new();
350        // Pattern "a" repeated 4 times
351        let calls = vec![
352            ToolCallRecord { name: "check".into(), args: json!({}) },
353            ToolCallRecord { name: "check".into(), args: json!({}) },
354            ToolCallRecord { name: "check".into(), args: json!({}) },
355            ToolCallRecord { name: "check".into(), args: json!({}) },
356        ];
357        let analysis = analyzer.analyze_tool_calls(&calls);
358        assert_eq!(analysis.total_tool_calls, 4);
359        // Should detect redundancy and/or loops
360        assert!(analysis.useful_tool_calls < 4);
361        assert!(analysis.efficiency_score < 1.0);
362    }
363
364    #[test]
365    fn test_multi_tool_loop_detection() {
366        let analyzer = TraceAnalyzer::new();
367        // Pattern ["read", "write"] repeated 3 times
368        let calls = vec![
369            ToolCallRecord { name: "read".into(), args: json!({"x": 1}) },
370            ToolCallRecord { name: "write".into(), args: json!({"y": 2}) },
371            ToolCallRecord { name: "read".into(), args: json!({"x": 1}) },
372            ToolCallRecord { name: "write".into(), args: json!({"y": 2}) },
373            ToolCallRecord { name: "read".into(), args: json!({"x": 1}) },
374            ToolCallRecord { name: "write".into(), args: json!({"y": 2}) },
375        ];
376        let analysis = analyzer.analyze_tool_calls(&calls);
377        assert_eq!(analysis.total_tool_calls, 6);
378        // Loop pattern detected — some iterations are wasted
379        assert!(analysis.useful_tool_calls < 6);
380        assert!(analysis.efficiency_score < 1.0);
381    }
382
383    #[test]
384    fn test_analyze_events() {
385        use adk_core::{Content, Event, Part};
386
387        let analyzer = TraceAnalyzer::new();
388        let mut event1 = Event::new("inv-1");
389        event1.llm_response.content = Some(Content {
390            role: "model".to_string(),
391            parts: vec![Part::FunctionCall {
392                name: "get_weather".to_string(),
393                args: json!({"city": "NYC"}),
394                id: None,
395                thought_signature: None,
396            }],
397        });
398
399        let mut event2 = Event::new("inv-1");
400        event2.llm_response.content = Some(Content {
401            role: "model".to_string(),
402            parts: vec![Part::FunctionCall {
403                name: "get_weather".to_string(),
404                args: json!({"city": "NYC"}),
405                id: None,
406                thought_signature: None,
407            }],
408        });
409
410        let analysis = analyzer.analyze(&[event1, event2]);
411        assert_eq!(analysis.total_tool_calls, 2);
412        assert_eq!(analysis.unique_tools, 1);
413        // Redundant call detected
414        assert_eq!(analysis.useful_tool_calls, 1);
415        assert_eq!(analysis.efficiency_score, 0.5);
416    }
417
418    #[test]
419    fn test_single_call() {
420        let analyzer = TraceAnalyzer::new();
421        let calls = vec![ToolCallRecord { name: "search".into(), args: json!({"query": "hello"}) }];
422        let analysis = analyzer.analyze_tool_calls(&calls);
423        assert_eq!(analysis.total_tool_calls, 1);
424        assert_eq!(analysis.unique_tools, 1);
425        assert_eq!(analysis.useful_tool_calls, 1);
426        assert_eq!(analysis.efficiency_score, 1.0);
427    }
428
429    #[test]
430    fn test_efficiency_score_bounds() {
431        let analyzer = TraceAnalyzer::new();
432        // All redundant: same call 5 times
433        let calls = vec![
434            ToolCallRecord { name: "ping".into(), args: json!({}) },
435            ToolCallRecord { name: "ping".into(), args: json!({}) },
436            ToolCallRecord { name: "ping".into(), args: json!({}) },
437            ToolCallRecord { name: "ping".into(), args: json!({}) },
438            ToolCallRecord { name: "ping".into(), args: json!({}) },
439        ];
440        let analysis = analyzer.analyze_tool_calls(&calls);
441        assert!(analysis.efficiency_score >= 0.0);
442        assert!(analysis.efficiency_score <= 1.0);
443        assert!(analysis.useful_tool_calls <= analysis.total_tool_calls);
444    }
445}