Skip to main content

mdx_rust_core/
trace.rs

1//! Trace diagnosis primitives.
2//!
3//! This is the bridge between raw runner traces and future targeted fixes.
4//! Today it summarizes obvious run-level failures. As trace spans become
5//! richer, this module should become the place that maps span failures to
6//! candidate edit strategies.
7
8use crate::runner::AgentRunResult;
9use serde::{Deserialize, Serialize};
10
11#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
12pub enum FailureKind {
13    Timeout,
14    ProcessError,
15    InvalidJson,
16    EchoFallback,
17    LowConfidence,
18    MissingReasoning,
19    Unknown,
20}
21
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct FailureSignal {
24    pub kind: FailureKind,
25    pub severity: u8,
26    pub evidence: String,
27    #[serde(default)]
28    pub span_id: Option<String>,
29}
30
31#[derive(Debug, Clone, Serialize, Deserialize, Default)]
32pub struct TraceDiagnosis {
33    pub signals: Vec<FailureSignal>,
34    #[serde(default)]
35    pub ranked_span_ids: Vec<String>,
36}
37
38impl TraceDiagnosis {
39    pub fn has_failures(&self) -> bool {
40        !self.signals.is_empty()
41    }
42
43    pub fn compact_summary(&self) -> String {
44        if self.signals.is_empty() {
45            return "no obvious trace failures".to_string();
46        }
47
48        self.signals
49            .iter()
50            .map(|signal| format!("{:?}:{}", signal.kind, signal.severity))
51            .collect::<Vec<_>>()
52            .join(", ")
53    }
54}
55
56pub fn diagnose_run(result: &AgentRunResult) -> TraceDiagnosis {
57    let mut signals = Vec::new();
58
59    if !result.success {
60        let error = result
61            .error
62            .clone()
63            .unwrap_or_else(|| "unknown".to_string());
64        let kind = if error.to_lowercase().contains("timeout") {
65            FailureKind::Timeout
66        } else {
67            FailureKind::ProcessError
68        };
69        signals.push(FailureSignal {
70            kind,
71            severity: 3,
72            evidence: truncate(&error, 240),
73            span_id: failing_span_id(result),
74        });
75    }
76
77    if result.output.get("raw").is_some() {
78        signals.push(FailureSignal {
79            kind: FailureKind::InvalidJson,
80            severity: 2,
81            evidence: "agent stdout was not valid JSON".to_string(),
82            span_id: failing_span_id(result),
83        });
84    }
85
86    if let Some(answer) = result.output.get("answer").and_then(|value| value.as_str()) {
87        if answer.starts_with("Echo:") {
88            signals.push(FailureSignal {
89                kind: FailureKind::EchoFallback,
90                severity: 2,
91                evidence: truncate(answer, 160),
92                span_id: failing_span_id(result),
93            });
94        }
95    }
96
97    if let Some(confidence) = result
98        .output
99        .get("confidence")
100        .and_then(|value| value.as_f64())
101    {
102        if confidence < 0.5 {
103            signals.push(FailureSignal {
104                kind: FailureKind::LowConfidence,
105                severity: 1,
106                evidence: format!("confidence={confidence:.2}"),
107                span_id: failing_span_id(result),
108            });
109        }
110    }
111
112    let reasoning = result
113        .output
114        .get("reasoning")
115        .and_then(|value| value.as_str())
116        .unwrap_or("");
117    if reasoning.trim().is_empty() {
118        signals.push(FailureSignal {
119            kind: FailureKind::MissingReasoning,
120            severity: 1,
121            evidence: "reasoning field missing or empty".to_string(),
122            span_id: failing_span_id(result),
123        });
124    }
125
126    let mut ranked_span_ids: Vec<String> = signals
127        .iter()
128        .filter_map(|signal| signal.span_id.clone())
129        .collect();
130    ranked_span_ids.sort();
131    ranked_span_ids.dedup();
132
133    TraceDiagnosis {
134        signals,
135        ranked_span_ids,
136    }
137}
138
139fn failing_span_id(result: &AgentRunResult) -> Option<String> {
140    result
141        .traces
142        .iter()
143        .rev()
144        .find_map(|event| event.span_id.clone())
145}
146
147fn truncate(value: &str, limit: usize) -> String {
148    if value.len() <= limit {
149        value.to_string()
150    } else {
151        let truncated: String = value.chars().take(limit).collect();
152        format!("{truncated}...")
153    }
154}
155
156#[cfg(test)]
157mod tests {
158    use super::*;
159    use crate::runner::AgentRunResult;
160
161    #[test]
162    fn diagnose_echo_fallback_and_low_confidence() {
163        let result = AgentRunResult {
164            output: serde_json::json!({
165                "answer": "Echo: hello",
166                "confidence": 0.2,
167                "reasoning": ""
168            }),
169            duration_ms: 1,
170            success: true,
171            error: None,
172            traces: vec![],
173        };
174
175        let diagnosis = diagnose_run(&result);
176
177        assert!(diagnosis.has_failures());
178        assert!(diagnosis
179            .signals
180            .iter()
181            .any(|signal| signal.kind == FailureKind::EchoFallback));
182        assert!(diagnosis
183            .signals
184            .iter()
185            .any(|signal| signal.kind == FailureKind::LowConfidence));
186    }
187}