1use crate::runner::AgentRunResult;
9use serde::{Deserialize, Serialize};
10
11#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
12pub enum FailureKind {
13 Timeout,
14 ProcessError,
15 InvalidJson,
16 EchoFallback,
17 LowConfidence,
18 MissingReasoning,
19 Unknown,
20}
21
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct FailureSignal {
24 pub kind: FailureKind,
25 pub severity: u8,
26 pub evidence: String,
27 #[serde(default)]
28 pub span_id: Option<String>,
29}
30
31#[derive(Debug, Clone, Serialize, Deserialize, Default)]
32pub struct TraceDiagnosis {
33 pub signals: Vec<FailureSignal>,
34 #[serde(default)]
35 pub ranked_span_ids: Vec<String>,
36}
37
38impl TraceDiagnosis {
39 pub fn has_failures(&self) -> bool {
40 !self.signals.is_empty()
41 }
42
43 pub fn compact_summary(&self) -> String {
44 if self.signals.is_empty() {
45 return "no obvious trace failures".to_string();
46 }
47
48 self.signals
49 .iter()
50 .map(|signal| format!("{:?}:{}", signal.kind, signal.severity))
51 .collect::<Vec<_>>()
52 .join(", ")
53 }
54}
55
56pub fn diagnose_run(result: &AgentRunResult) -> TraceDiagnosis {
57 let mut signals = Vec::new();
58
59 if !result.success {
60 let error = result
61 .error
62 .clone()
63 .unwrap_or_else(|| "unknown".to_string());
64 let kind = if error.to_lowercase().contains("timeout") {
65 FailureKind::Timeout
66 } else {
67 FailureKind::ProcessError
68 };
69 signals.push(FailureSignal {
70 kind,
71 severity: 3,
72 evidence: truncate(&error, 240),
73 span_id: failing_span_id(result),
74 });
75 }
76
77 if result.output.get("raw").is_some() {
78 signals.push(FailureSignal {
79 kind: FailureKind::InvalidJson,
80 severity: 2,
81 evidence: "agent stdout was not valid JSON".to_string(),
82 span_id: failing_span_id(result),
83 });
84 }
85
86 if let Some(answer) = result.output.get("answer").and_then(|value| value.as_str()) {
87 if answer.starts_with("Echo:") {
88 signals.push(FailureSignal {
89 kind: FailureKind::EchoFallback,
90 severity: 2,
91 evidence: truncate(answer, 160),
92 span_id: failing_span_id(result),
93 });
94 }
95 }
96
97 if let Some(confidence) = result
98 .output
99 .get("confidence")
100 .and_then(|value| value.as_f64())
101 {
102 if confidence < 0.5 {
103 signals.push(FailureSignal {
104 kind: FailureKind::LowConfidence,
105 severity: 1,
106 evidence: format!("confidence={confidence:.2}"),
107 span_id: failing_span_id(result),
108 });
109 }
110 }
111
112 let reasoning = result
113 .output
114 .get("reasoning")
115 .and_then(|value| value.as_str())
116 .unwrap_or("");
117 if reasoning.trim().is_empty() {
118 signals.push(FailureSignal {
119 kind: FailureKind::MissingReasoning,
120 severity: 1,
121 evidence: "reasoning field missing or empty".to_string(),
122 span_id: failing_span_id(result),
123 });
124 }
125
126 let mut ranked_span_ids: Vec<String> = signals
127 .iter()
128 .filter_map(|signal| signal.span_id.clone())
129 .collect();
130 ranked_span_ids.sort();
131 ranked_span_ids.dedup();
132
133 TraceDiagnosis {
134 signals,
135 ranked_span_ids,
136 }
137}
138
139fn failing_span_id(result: &AgentRunResult) -> Option<String> {
140 result
141 .traces
142 .iter()
143 .rev()
144 .find_map(|event| event.span_id.clone())
145}
146
147fn truncate(value: &str, limit: usize) -> String {
148 if value.len() <= limit {
149 value.to_string()
150 } else {
151 let truncated: String = value.chars().take(limit).collect();
152 format!("{truncated}...")
153 }
154}
155
156#[cfg(test)]
157mod tests {
158 use super::*;
159 use crate::runner::AgentRunResult;
160
161 #[test]
162 fn diagnose_echo_fallback_and_low_confidence() {
163 let result = AgentRunResult {
164 output: serde_json::json!({
165 "answer": "Echo: hello",
166 "confidence": 0.2,
167 "reasoning": ""
168 }),
169 duration_ms: 1,
170 success: true,
171 error: None,
172 traces: vec![],
173 };
174
175 let diagnosis = diagnose_run(&result);
176
177 assert!(diagnosis.has_failures());
178 assert!(diagnosis
179 .signals
180 .iter()
181 .any(|signal| signal.kind == FailureKind::EchoFallback));
182 assert!(diagnosis
183 .signals
184 .iter()
185 .any(|signal| signal.kind == FailureKind::LowConfidence));
186 }
187}