Skip to main content

mdx_rust_core/
trace.rs

1//! Trace diagnosis primitives.
2//!
3//! This is the bridge between raw runner traces and future targeted fixes.
4//! Today it summarizes obvious run-level failures. As trace spans become
5//! richer, this module should become the place that maps span failures to
6//! candidate edit strategies.
7
8use crate::runner::AgentRunResult;
9use schemars::JsonSchema;
10use serde::{Deserialize, Serialize};
11
12#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq)]
13pub enum FailureKind {
14    Timeout,
15    ProcessError,
16    InvalidJson,
17    EchoFallback,
18    LowConfidence,
19    MissingReasoning,
20    Unknown,
21}
22
23#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
24pub struct FailureSignal {
25    pub kind: FailureKind,
26    pub severity: u8,
27    pub evidence: String,
28    #[serde(default)]
29    pub span_id: Option<String>,
30}
31
32#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
33pub struct TraceDiagnosis {
34    pub signals: Vec<FailureSignal>,
35    #[serde(default)]
36    pub ranked_span_ids: Vec<String>,
37}
38
39impl TraceDiagnosis {
40    pub fn has_failures(&self) -> bool {
41        !self.signals.is_empty()
42    }
43
44    pub fn compact_summary(&self) -> String {
45        if self.signals.is_empty() {
46            return "no obvious trace failures".to_string();
47        }
48
49        self.signals
50            .iter()
51            .map(|signal| format!("{:?}:{}", signal.kind, signal.severity))
52            .collect::<Vec<_>>()
53            .join(", ")
54    }
55}
56
57pub fn diagnose_run(result: &AgentRunResult) -> TraceDiagnosis {
58    let mut signals = Vec::new();
59
60    if !result.success {
61        let error = result
62            .error
63            .clone()
64            .unwrap_or_else(|| "unknown".to_string());
65        let kind = if error.to_lowercase().contains("timeout") {
66            FailureKind::Timeout
67        } else {
68            FailureKind::ProcessError
69        };
70        signals.push(FailureSignal {
71            kind,
72            severity: 3,
73            evidence: truncate(&error, 240),
74            span_id: failing_span_id(result),
75        });
76    }
77
78    if result.output.get("raw").is_some() {
79        signals.push(FailureSignal {
80            kind: FailureKind::InvalidJson,
81            severity: 2,
82            evidence: "agent stdout was not valid JSON".to_string(),
83            span_id: failing_span_id(result),
84        });
85    }
86
87    if let Some(answer) = result.output.get("answer").and_then(|value| value.as_str()) {
88        if answer.starts_with("Echo:") {
89            signals.push(FailureSignal {
90                kind: FailureKind::EchoFallback,
91                severity: 2,
92                evidence: truncate(answer, 160),
93                span_id: failing_span_id(result),
94            });
95        }
96    }
97
98    if let Some(confidence) = result
99        .output
100        .get("confidence")
101        .and_then(|value| value.as_f64())
102    {
103        if confidence < 0.5 {
104            signals.push(FailureSignal {
105                kind: FailureKind::LowConfidence,
106                severity: 1,
107                evidence: format!("confidence={confidence:.2}"),
108                span_id: failing_span_id(result),
109            });
110        }
111    }
112
113    let reasoning = result
114        .output
115        .get("reasoning")
116        .and_then(|value| value.as_str())
117        .unwrap_or("");
118    if reasoning.trim().is_empty() {
119        signals.push(FailureSignal {
120            kind: FailureKind::MissingReasoning,
121            severity: 1,
122            evidence: "reasoning field missing or empty".to_string(),
123            span_id: failing_span_id(result),
124        });
125    }
126
127    let mut ranked_span_ids: Vec<String> = signals
128        .iter()
129        .filter_map(|signal| signal.span_id.clone())
130        .collect();
131    ranked_span_ids.sort();
132    ranked_span_ids.dedup();
133
134    TraceDiagnosis {
135        signals,
136        ranked_span_ids,
137    }
138}
139
140fn failing_span_id(result: &AgentRunResult) -> Option<String> {
141    result
142        .traces
143        .iter()
144        .rev()
145        .find_map(|event| event.span_id.clone())
146}
147
148fn truncate(value: &str, limit: usize) -> String {
149    if value.len() <= limit {
150        value.to_string()
151    } else {
152        let truncated: String = value.chars().take(limit).collect();
153        format!("{truncated}...")
154    }
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160    use crate::runner::AgentRunResult;
161
162    #[test]
163    fn diagnose_echo_fallback_and_low_confidence() {
164        let result = AgentRunResult {
165            output: serde_json::json!({
166                "answer": "Echo: hello",
167                "confidence": 0.2,
168                "reasoning": ""
169            }),
170            duration_ms: 1,
171            success: true,
172            error: None,
173            traces: vec![],
174        };
175
176        let diagnosis = diagnose_run(&result);
177
178        assert!(diagnosis.has_failures());
179        assert!(diagnosis
180            .signals
181            .iter()
182            .any(|signal| signal.kind == FailureKind::EchoFallback));
183        assert!(diagnosis
184            .signals
185            .iter()
186            .any(|signal| signal.kind == FailureKind::LowConfidence));
187    }
188}