Skip to main content

axon/
replay.rs

1//! Execution Replay — reconstruct and analyze recorded traces.
2//!
3//! Reads `.trace.json` files (produced by `axon run --trace`) and provides:
4//!   - Structured timeline reconstruction
5//!   - Per-step result extraction
6//!   - Anchor pass/breach summary
7//!   - Regression comparison between two traces
8//!
9//! Usage:
10//!   axon replay trace.json                   — replay a single trace
11//!   axon replay trace.json --json            — structured JSON output
12//!   axon replay old.trace.json new.trace.json — regression comparison
13//!
14//! Exit codes:
15//!   0 — replay successful (or traces match for regression)
16//!   1 — regression differences detected
17//!   2 — I/O or parse error
18
19use std::collections::HashMap;
20use std::io::IsTerminal;
21
22// ── Replay structures ────────────────────────────────────────────────────
23
24/// A reconstructed execution from a trace file.
25#[derive(Debug, Clone, serde::Serialize)]
26pub struct ReplayTrace {
27    pub meta: TraceMeta,
28    pub units: Vec<ReplayUnit>,
29    pub summary: ReplaySummary,
30}
31
32/// Trace metadata from the _meta header.
33#[derive(Debug, Clone, serde::Serialize)]
34pub struct TraceMeta {
35    pub source: String,
36    pub backend: String,
37    pub tool_mode: String,
38    pub axon_version: String,
39    pub mode: String,
40}
41
42/// A reconstructed execution unit.
43#[derive(Debug, Clone, serde::Serialize)]
44pub struct ReplayUnit {
45    pub flow_name: String,
46    pub steps: Vec<ReplayStep>,
47    pub duration_ms: u64,
48    pub total_input_tokens: u64,
49    pub total_output_tokens: u64,
50    pub anchor_breaches: u32,
51}
52
53/// A reconstructed step from trace events.
54#[derive(Debug, Clone, serde::Serialize)]
55pub struct ReplayStep {
56    pub name: String,
57    pub event_type: String,
58    pub output: String,
59    pub success: bool,
60    pub anchor_results: Vec<AnchorEvent>,
61    pub was_retried: bool,
62}
63
64/// Anchor pass/breach event.
65#[derive(Debug, Clone, serde::Serialize)]
66pub struct AnchorEvent {
67    pub anchor_name: String,
68    pub passed: bool,
69    pub detail: String,
70}
71
72/// Summary of a replayed trace.
73#[derive(Debug, Clone, serde::Serialize)]
74pub struct ReplaySummary {
75    pub total_units: usize,
76    pub total_steps: usize,
77    pub total_anchor_passes: usize,
78    pub total_anchor_breaches: usize,
79    pub total_retries: usize,
80    pub total_errors: usize,
81    pub total_input_tokens: u64,
82    pub total_output_tokens: u64,
83}
84
85/// Regression diff between two replayed traces.
86#[derive(Debug, Clone, serde::Serialize)]
87pub struct RegressionDiff {
88    pub identical: bool,
89    pub step_diffs: Vec<StepRegression>,
90    pub summary: RegressionSummary,
91}
92
93/// Regression diff for a single step.
94#[derive(Debug, Clone, serde::Serialize)]
95pub struct StepRegression {
96    pub unit: String,
97    pub step: String,
98    pub status: RegressionStatus,
99    pub old_output: String,
100    pub new_output: String,
101}
102
103/// Regression status for a step.
104#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)]
105#[serde(rename_all = "lowercase")]
106pub enum RegressionStatus {
107    /// Output matches.
108    Match,
109    /// Output differs.
110    Changed,
111    /// Step only in old trace.
112    Removed,
113    /// Step only in new trace.
114    Added,
115}
116
117/// Summary of regression comparison.
118#[derive(Debug, Clone, serde::Serialize)]
119pub struct RegressionSummary {
120    pub total_steps: usize,
121    pub matched: usize,
122    pub changed: usize,
123    pub added: usize,
124    pub removed: usize,
125}
126
127// ── Trace parsing ────────────────────────────────────────────────────────
128
129/// Parse a trace JSON value into a structured ReplayTrace.
130pub fn parse_trace(data: &serde_json::Value) -> ReplayTrace {
131    let meta = parse_meta(data);
132    let events = data["events"]
133        .as_array()
134        .cloned()
135        .unwrap_or_default();
136
137    let units = reconstruct_units(&events);
138
139    let mut summary = ReplaySummary {
140        total_units: units.len(),
141        total_steps: 0,
142        total_anchor_passes: 0,
143        total_anchor_breaches: 0,
144        total_retries: 0,
145        total_errors: 0,
146        total_input_tokens: 0,
147        total_output_tokens: 0,
148    };
149
150    for u in &units {
151        summary.total_steps += u.steps.len();
152        summary.total_input_tokens += u.total_input_tokens;
153        summary.total_output_tokens += u.total_output_tokens;
154        summary.total_anchor_breaches += u.anchor_breaches as usize;
155        for s in &u.steps {
156            summary.total_anchor_passes += s.anchor_results.iter().filter(|a| a.passed).count();
157            summary.total_anchor_breaches += s.anchor_results.iter().filter(|a| !a.passed).count();
158            if s.was_retried {
159                summary.total_retries += 1;
160            }
161            if !s.success {
162                summary.total_errors += 1;
163            }
164        }
165    }
166
167    ReplayTrace {
168        meta,
169        units,
170        summary,
171    }
172}
173
174fn parse_meta(data: &serde_json::Value) -> TraceMeta {
175    let meta = &data["_meta"];
176    TraceMeta {
177        source: meta["source"].as_str().unwrap_or("").to_string(),
178        backend: meta["backend"].as_str().unwrap_or("").to_string(),
179        tool_mode: meta["tool_mode"].as_str().unwrap_or("").to_string(),
180        axon_version: meta["axon_version"].as_str().unwrap_or("").to_string(),
181        mode: meta["mode"].as_str().unwrap_or("").to_string(),
182    }
183}
184
185fn reconstruct_units(events: &[serde_json::Value]) -> Vec<ReplayUnit> {
186    let mut units: Vec<ReplayUnit> = Vec::new();
187    let mut current_unit: Option<ReplayUnit> = None;
188    let mut current_step_anchors: Vec<AnchorEvent> = Vec::new();
189    let mut current_step_retried = false;
190
191    for event in events {
192        let etype = event["event"]
193            .as_str()
194            .or_else(|| event["type"].as_str())
195            .unwrap_or("");
196        let unit_name = event["unit"].as_str().unwrap_or("");
197        let step_name = event["step"].as_str().unwrap_or("");
198        let detail = event["detail"].as_str().unwrap_or("");
199
200        match etype {
201            "unit_start" => {
202                if let Some(u) = current_unit.take() {
203                    units.push(u);
204                }
205                current_unit = Some(ReplayUnit {
206                    flow_name: unit_name.to_string(),
207                    steps: Vec::new(),
208                    duration_ms: 0,
209                    total_input_tokens: 0,
210                    total_output_tokens: 0,
211                    anchor_breaches: 0,
212                });
213                current_step_anchors.clear();
214                current_step_retried = false;
215            }
216            "unit_complete" => {
217                if let Some(u) = current_unit.take() {
218                    units.push(u);
219                }
220            }
221            "step_complete" | "step_stub" | "tool_native" | "step_parallel" => {
222                if let Some(ref mut u) = current_unit {
223                    let success = etype != "step_error";
224                    u.steps.push(ReplayStep {
225                        name: step_name.to_string(),
226                        event_type: etype.to_string(),
227                        output: detail.to_string(),
228                        success,
229                        anchor_results: std::mem::take(&mut current_step_anchors),
230                        was_retried: current_step_retried,
231                    });
232                    current_step_retried = false;
233                }
234            }
235            "step_error" => {
236                if let Some(ref mut u) = current_unit {
237                    u.steps.push(ReplayStep {
238                        name: step_name.to_string(),
239                        event_type: etype.to_string(),
240                        output: detail.to_string(),
241                        success: false,
242                        anchor_results: std::mem::take(&mut current_step_anchors),
243                        was_retried: current_step_retried,
244                    });
245                    current_step_retried = false;
246                }
247            }
248            "anchor_pass" => {
249                current_step_anchors.push(AnchorEvent {
250                    anchor_name: extract_anchor_name(detail),
251                    passed: true,
252                    detail: detail.to_string(),
253                });
254            }
255            "anchor_breach" => {
256                current_step_anchors.push(AnchorEvent {
257                    anchor_name: extract_anchor_name(detail),
258                    passed: false,
259                    detail: detail.to_string(),
260                });
261                if let Some(ref mut u) = current_unit {
262                    u.anchor_breaches += 1;
263                }
264            }
265            "retry_attempt" => {
266                current_step_retried = true;
267            }
268            "hook_unit_metrics" => {
269                if let Some(ref mut u) = current_unit.as_mut().or_else(|| units.last_mut()) {
270                    // Parse: "duration=123ms, steps=2, tokens_in=500, tokens_out=200, ..."
271                    for part in detail.split(", ") {
272                        if let Some(val) = part.strip_prefix("duration=").and_then(|s| s.strip_suffix("ms")) {
273                            u.duration_ms = val.parse().unwrap_or(0);
274                        } else if let Some(val) = part.strip_prefix("tokens_in=") {
275                            u.total_input_tokens = val.parse().unwrap_or(0);
276                        } else if let Some(val) = part.strip_prefix("tokens_out=") {
277                            u.total_output_tokens = val.parse().unwrap_or(0);
278                        }
279                    }
280                }
281            }
282            // Session events create synthetic steps
283            e if e.starts_with("session_") => {
284                if let Some(ref mut u) = current_unit {
285                    u.steps.push(ReplayStep {
286                        name: step_name.to_string(),
287                        event_type: etype.to_string(),
288                        output: detail.to_string(),
289                        success: true,
290                        anchor_results: Vec::new(),
291                        was_retried: false,
292                    });
293                }
294            }
295            _ => {} // wave_start, step_deps, schedule, etc. — metadata only
296        }
297    }
298
299    // Push final unit if still open
300    if let Some(u) = current_unit {
301        units.push(u);
302    }
303
304    units
305}
306
307fn extract_anchor_name(detail: &str) -> String {
308    // Format: "AnchorName: 0.95" or "AnchorName: 0.50, reason=..."
309    detail.split(':').next().unwrap_or("").trim().to_string()
310}
311
312// ── Regression comparison ────────────────────────────────────────────────
313
314/// Compare two replayed traces for regression testing.
315pub fn compare_traces(old: &ReplayTrace, new: &ReplayTrace) -> RegressionDiff {
316    let mut step_diffs = Vec::new();
317
318    // Build step output maps: (unit, step) → output
319    let old_map = build_step_map(old);
320    let new_map = build_step_map(new);
321
322    let mut all_keys: Vec<(String, String)> = old_map
323        .keys()
324        .chain(new_map.keys())
325        .cloned()
326        .collect::<std::collections::HashSet<_>>()
327        .into_iter()
328        .collect();
329    all_keys.sort();
330
331    for key in &all_keys {
332        let old_val = old_map.get(key);
333        let new_val = new_map.get(key);
334
335        let (status, old_output, new_output) = match (old_val, new_val) {
336            (Some(o), Some(n)) => {
337                if o == n {
338                    (RegressionStatus::Match, o.clone(), n.clone())
339                } else {
340                    (RegressionStatus::Changed, o.clone(), n.clone())
341                }
342            }
343            (Some(o), None) => (RegressionStatus::Removed, o.clone(), String::new()),
344            (None, Some(n)) => (RegressionStatus::Added, String::new(), n.clone()),
345            (None, None) => continue,
346        };
347
348        step_diffs.push(StepRegression {
349            unit: key.0.clone(),
350            step: key.1.clone(),
351            status,
352            old_output,
353            new_output,
354        });
355    }
356
357    let matched = step_diffs.iter().filter(|d| d.status == RegressionStatus::Match).count();
358    let changed = step_diffs.iter().filter(|d| d.status == RegressionStatus::Changed).count();
359    let added = step_diffs.iter().filter(|d| d.status == RegressionStatus::Added).count();
360    let removed = step_diffs.iter().filter(|d| d.status == RegressionStatus::Removed).count();
361    let identical = changed == 0 && added == 0 && removed == 0;
362    let total_steps = step_diffs.len();
363
364    RegressionDiff {
365        identical,
366        step_diffs,
367        summary: RegressionSummary {
368            total_steps,
369            matched,
370            changed,
371            added,
372            removed,
373        },
374    }
375}
376
377fn build_step_map(trace: &ReplayTrace) -> HashMap<(String, String), String> {
378    let mut map = HashMap::new();
379    for u in &trace.units {
380        for s in &u.steps {
381            map.insert(
382                (u.flow_name.clone(), s.name.clone()),
383                s.output.clone(),
384            );
385        }
386    }
387    map
388}
389
390// ── CLI entry point ──────────────────────────────────────────────────────
391
392/// Run the replay command. Returns exit code.
393pub fn run_replay(file: &str, compare_file: Option<&str>, json_output: bool) -> i32 {
394    let use_color = !json_output && std::io::stdout().is_terminal();
395
396    // Read primary trace
397    let content = match std::fs::read_to_string(file) {
398        Ok(s) => s,
399        Err(e) => {
400            eprintln!("Cannot read '{}': {e}", file);
401            return 2;
402        }
403    };
404    let data: serde_json::Value = match serde_json::from_str(&content) {
405        Ok(v) => v,
406        Err(e) => {
407            eprintln!("Invalid JSON in '{}': {e}", file);
408            return 2;
409        }
410    };
411
412    let trace = parse_trace(&data);
413
414    // If comparison file provided, do regression
415    if let Some(cmp_file) = compare_file {
416        let cmp_content = match std::fs::read_to_string(cmp_file) {
417            Ok(s) => s,
418            Err(e) => {
419                eprintln!("Cannot read '{}': {e}", cmp_file);
420                return 2;
421            }
422        };
423        let cmp_data: serde_json::Value = match serde_json::from_str(&cmp_content) {
424            Ok(v) => v,
425            Err(e) => {
426                eprintln!("Invalid JSON in '{}': {e}", cmp_file);
427                return 2;
428            }
429        };
430
431        let cmp_trace = parse_trace(&cmp_data);
432        let regression = compare_traces(&trace, &cmp_trace);
433
434        if json_output {
435            println!("{}", serde_json::to_string_pretty(&regression).unwrap());
436        } else {
437            print_regression(&regression, file, cmp_file, use_color);
438        }
439
440        return if regression.identical { 0 } else { 1 };
441    }
442
443    // Single trace replay
444    if json_output {
445        println!("{}", serde_json::to_string_pretty(&trace).unwrap());
446    } else {
447        print_replay(&trace, file, use_color);
448    }
449
450    0
451}
452
453// ── Human-readable output ────────────────────────────────────────────────
454
455fn print_replay(trace: &ReplayTrace, file: &str, use_color: bool) {
456    let bold = |s: &str| if use_color { format!("\x1b[1m{s}\x1b[0m") } else { s.to_string() };
457    let dim = |s: &str| if use_color { format!("\x1b[2m{s}\x1b[0m") } else { s.to_string() };
458    let green = |s: &str| if use_color { format!("\x1b[32m{s}\x1b[0m") } else { s.to_string() };
459    let red = |s: &str| if use_color { format!("\x1b[31m{s}\x1b[0m") } else { s.to_string() };
460    let cyan = |s: &str| if use_color { format!("\x1b[36m{s}\x1b[0m") } else { s.to_string() };
461    let yellow = |s: &str| if use_color { format!("\x1b[33m{s}\x1b[0m") } else { s.to_string() };
462
463    println!("{} {}", bold("Replay:"), dim(file));
464    println!(
465        "  {} source={}, backend={}, mode={}",
466        dim("meta:"),
467        trace.meta.source,
468        trace.meta.backend,
469        trace.meta.mode,
470    );
471
472    for u in &trace.units {
473        println!(
474            "\n  {} {} ({} steps, {}ms)",
475            cyan("▶"),
476            bold(&u.flow_name),
477            u.steps.len(),
478            u.duration_ms,
479        );
480
481        for (i, s) in u.steps.iter().enumerate() {
482            let icon = if s.success { green("✓") } else { red("✗") };
483            let truncated = truncate_line(&s.output, 80);
484            println!(
485                "    {} {}.{} [{}] → {}",
486                icon,
487                i + 1,
488                bold(&s.name),
489                s.event_type,
490                truncated,
491            );
492
493            for a in &s.anchor_results {
494                let a_icon = if a.passed { green("⚓") } else { red("⚓") };
495                println!("      {} {}", a_icon, a.detail);
496            }
497
498            if s.was_retried {
499                println!("      {} retried", yellow("↻"));
500            }
501        }
502    }
503
504    // Summary
505    let s = &trace.summary;
506    println!(
507        "\n  {} {} units, {} steps, {} passes, {} breaches, {} retries, {} errors",
508        bold("Summary:"),
509        s.total_units,
510        s.total_steps,
511        s.total_anchor_passes,
512        s.total_anchor_breaches,
513        s.total_retries,
514        s.total_errors,
515    );
516    if s.total_input_tokens > 0 || s.total_output_tokens > 0 {
517        println!(
518            "  {} {} input + {} output tokens",
519            dim("Tokens:"),
520            s.total_input_tokens,
521            s.total_output_tokens,
522        );
523    }
524}
525
526fn print_regression(diff: &RegressionDiff, file_a: &str, file_b: &str, use_color: bool) {
527    let bold = |s: &str| if use_color { format!("\x1b[1m{s}\x1b[0m") } else { s.to_string() };
528    let dim = |s: &str| if use_color { format!("\x1b[2m{s}\x1b[0m") } else { s.to_string() };
529    let green = |s: &str| if use_color { format!("\x1b[1;32m{s}\x1b[0m") } else { s.to_string() };
530    let red = |s: &str| if use_color { format!("\x1b[1;31m{s}\x1b[0m") } else { s.to_string() };
531    let yellow = |s: &str| if use_color { format!("\x1b[1;33m{s}\x1b[0m") } else { s.to_string() };
532
533    println!(
534        "{} {} → {}",
535        bold("Regression:"),
536        dim(file_a),
537        dim(file_b),
538    );
539
540    if diff.identical {
541        println!("  {} Traces match — no regressions.", green("✓"));
542        return;
543    }
544
545    let s = &diff.summary;
546    println!(
547        "  {} {}/{} steps match, {} changed, {} added, {} removed",
548        yellow("!"),
549        s.matched,
550        s.total_steps,
551        s.changed,
552        s.added,
553        s.removed,
554    );
555
556    for d in &diff.step_diffs {
557        match d.status {
558            RegressionStatus::Match => {} // skip
559            RegressionStatus::Changed => {
560                println!(
561                    "\n  {} {}.{} — output changed",
562                    yellow("~"),
563                    d.unit,
564                    bold(&d.step),
565                );
566                println!("    {} {}", red("-"), truncate_line(&d.old_output, 80));
567                println!("    {} {}", green("+"), truncate_line(&d.new_output, 80));
568            }
569            RegressionStatus::Added => {
570                println!(
571                    "  {} {}.{} — new step",
572                    green("+"),
573                    d.unit,
574                    bold(&d.step),
575                );
576            }
577            RegressionStatus::Removed => {
578                println!(
579                    "  {} {}.{} — step removed",
580                    red("-"),
581                    d.unit,
582                    bold(&d.step),
583                );
584            }
585        }
586    }
587}
588
589fn truncate_line(s: &str, max: usize) -> String {
590    let line = s.lines().next().unwrap_or(s);
591    if line.len() > max {
592        format!("{}...", &line[..max])
593    } else {
594        line.to_string()
595    }
596}
597
598// ── Tests ────────────────────────────────────────────────────────────────
599
600#[cfg(test)]
601mod tests {
602    use super::*;
603    use serde_json::json;
604
605    fn sample_trace() -> serde_json::Value {
606        json!({
607            "_meta": {
608                "source": "test.axon",
609                "backend": "anthropic",
610                "tool_mode": "stub",
611                "axon_version": "1.0.0",
612                "mode": "stub",
613            },
614            "events": [
615                { "event": "unit_start", "unit": "Flow1", "step": "", "detail": "persona=P1, context=default" },
616                { "event": "anchor_pass", "unit": "Flow1", "step": "S1", "detail": "NoHallucination: 0.95" },
617                { "event": "step_complete", "unit": "Flow1", "step": "S1", "detail": "result of S1" },
618                { "event": "anchor_breach", "unit": "Flow1", "step": "S2", "detail": "FactualOnly: 0.30, reason=opinion detected" },
619                { "event": "step_complete", "unit": "Flow1", "step": "S2", "detail": "result of S2" },
620                { "event": "unit_complete", "unit": "Flow1", "step": "", "detail": "2 steps, 4 conversation turns" },
621            ]
622        })
623    }
624
625    #[test]
626    fn parse_meta() {
627        let data = sample_trace();
628        let trace = parse_trace(&data);
629        assert_eq!(trace.meta.source, "test.axon");
630        assert_eq!(trace.meta.backend, "anthropic");
631        assert_eq!(trace.meta.mode, "stub");
632    }
633
634    #[test]
635    fn parse_units_and_steps() {
636        let data = sample_trace();
637        let trace = parse_trace(&data);
638        assert_eq!(trace.units.len(), 1);
639        assert_eq!(trace.units[0].flow_name, "Flow1");
640        assert_eq!(trace.units[0].steps.len(), 2);
641        assert_eq!(trace.units[0].steps[0].name, "S1");
642        assert_eq!(trace.units[0].steps[0].output, "result of S1");
643        assert!(trace.units[0].steps[0].success);
644        assert_eq!(trace.units[0].steps[1].name, "S2");
645    }
646
647    #[test]
648    fn parse_anchor_events() {
649        let data = sample_trace();
650        let trace = parse_trace(&data);
651
652        // S1 has an anchor pass
653        assert_eq!(trace.units[0].steps[0].anchor_results.len(), 1);
654        assert!(trace.units[0].steps[0].anchor_results[0].passed);
655        assert_eq!(trace.units[0].steps[0].anchor_results[0].anchor_name, "NoHallucination");
656
657        // S2 has an anchor breach
658        assert_eq!(trace.units[0].steps[1].anchor_results.len(), 1);
659        assert!(!trace.units[0].steps[1].anchor_results[0].passed);
660        assert_eq!(trace.units[0].steps[1].anchor_results[0].anchor_name, "FactualOnly");
661    }
662
663    #[test]
664    fn parse_summary() {
665        let data = sample_trace();
666        let trace = parse_trace(&data);
667        assert_eq!(trace.summary.total_units, 1);
668        assert_eq!(trace.summary.total_steps, 2);
669        assert_eq!(trace.summary.total_anchor_passes, 1);
670        // Breaches counted from both unit level and step level
671        assert!(trace.summary.total_anchor_breaches >= 1);
672    }
673
674    #[test]
675    fn parse_tool_events() {
676        let data = json!({
677            "_meta": { "source": "t.axon", "backend": "anthropic", "tool_mode": "stub", "axon_version": "1.0.0", "mode": "stub" },
678            "events": [
679                { "event": "unit_start", "unit": "F", "step": "", "detail": "" },
680                { "event": "tool_native", "unit": "F", "step": "CalcStep", "detail": "tool=Calculator, success=true, output=42" },
681                { "event": "unit_complete", "unit": "F", "step": "", "detail": "" },
682            ]
683        });
684
685        let trace = parse_trace(&data);
686        assert_eq!(trace.units[0].steps.len(), 1);
687        assert_eq!(trace.units[0].steps[0].name, "CalcStep");
688        assert_eq!(trace.units[0].steps[0].event_type, "tool_native");
689        assert!(trace.units[0].steps[0].success);
690    }
691
692    #[test]
693    fn parse_retry_events() {
694        let data = json!({
695            "_meta": { "source": "t.axon", "backend": "anthropic", "tool_mode": "real", "axon_version": "1.0.0", "mode": "real" },
696            "events": [
697                { "event": "unit_start", "unit": "F", "step": "", "detail": "" },
698                { "event": "retry_attempt", "unit": "F", "step": "S1", "detail": "attempt=1/2" },
699                { "event": "step_complete", "unit": "F", "step": "S1", "detail": "retry succeeded" },
700                { "event": "unit_complete", "unit": "F", "step": "", "detail": "" },
701            ]
702        });
703
704        let trace = parse_trace(&data);
705        assert!(trace.units[0].steps[0].was_retried);
706        assert_eq!(trace.summary.total_retries, 1);
707    }
708
709    #[test]
710    fn parse_error_step() {
711        let data = json!({
712            "_meta": { "source": "t.axon", "backend": "anthropic", "tool_mode": "real", "axon_version": "1.0.0", "mode": "real" },
713            "events": [
714                { "event": "unit_start", "unit": "F", "step": "", "detail": "" },
715                { "event": "step_error", "unit": "F", "step": "Bad", "detail": "connection failed" },
716                { "event": "unit_complete", "unit": "F", "step": "", "detail": "" },
717            ]
718        });
719
720        let trace = parse_trace(&data);
721        assert!(!trace.units[0].steps[0].success);
722        assert_eq!(trace.summary.total_errors, 1);
723    }
724
725    #[test]
726    fn parse_hook_metrics() {
727        let data = json!({
728            "_meta": { "source": "t.axon", "backend": "anthropic", "tool_mode": "real", "axon_version": "1.0.0", "mode": "real" },
729            "events": [
730                { "event": "unit_start", "unit": "F", "step": "", "detail": "" },
731                { "event": "step_complete", "unit": "F", "step": "S", "detail": "ok" },
732                { "event": "unit_complete", "unit": "F", "step": "", "detail": "" },
733                { "event": "hook_unit_metrics", "unit": "F", "step": "", "detail": "duration=250ms, steps=1, tokens_in=100, tokens_out=50, breaches=0, chains=0" },
734            ]
735        });
736
737        let trace = parse_trace(&data);
738        assert_eq!(trace.units[0].duration_ms, 250);
739        assert_eq!(trace.units[0].total_input_tokens, 100);
740        assert_eq!(trace.units[0].total_output_tokens, 50);
741    }
742
743    #[test]
744    fn regression_identical() {
745        let data = sample_trace();
746        let trace = parse_trace(&data);
747        let diff = compare_traces(&trace, &trace);
748        assert!(diff.identical);
749        assert_eq!(diff.summary.matched, 2);
750        assert_eq!(diff.summary.changed, 0);
751    }
752
753    #[test]
754    fn regression_changed_output() {
755        let data_old = sample_trace();
756        let mut data_new = sample_trace();
757        // events[2] is step_complete for S1
758        data_new["events"][2]["detail"] = json!("different result");
759
760        let old = parse_trace(&data_old);
761        let new = parse_trace(&data_new);
762        let diff = compare_traces(&old, &new);
763
764        assert!(!diff.identical);
765        assert_eq!(diff.summary.changed, 1);
766        assert_eq!(diff.summary.matched, 1);
767    }
768
769    #[test]
770    fn regression_added_step() {
771        let data_old = sample_trace();
772        let mut data_new = sample_trace();
773        // Add a new step
774        data_new["events"].as_array_mut().unwrap().insert(3, json!(
775            { "event": "step_complete", "unit": "Flow1", "step": "S3", "detail": "new step" }
776        ));
777
778        let old = parse_trace(&data_old);
779        let new = parse_trace(&data_new);
780        let diff = compare_traces(&old, &new);
781
782        assert!(!diff.identical);
783        assert_eq!(diff.summary.added, 1);
784    }
785
786    #[test]
787    fn run_replay_file_not_found() {
788        assert_eq!(run_replay("nonexistent.trace.json", None, false), 2);
789    }
790
791    #[test]
792    fn run_replay_single_trace() {
793        let tmp = std::env::temp_dir().join("axon_replay_test.trace.json");
794        let data = sample_trace();
795        std::fs::write(&tmp, serde_json::to_string(&data).unwrap()).unwrap();
796
797        assert_eq!(run_replay(tmp.to_str().unwrap(), None, true), 0);
798        let _ = std::fs::remove_file(tmp);
799    }
800
801    #[test]
802    fn run_replay_regression_identical() {
803        let tmp = std::env::temp_dir().join("axon_replay_reg.trace.json");
804        let data = sample_trace();
805        std::fs::write(&tmp, serde_json::to_string(&data).unwrap()).unwrap();
806
807        let path = tmp.to_str().unwrap();
808        assert_eq!(run_replay(path, Some(path), true), 0);
809        let _ = std::fs::remove_file(tmp);
810    }
811
812    #[test]
813    fn run_replay_regression_different() {
814        let tmp_a = std::env::temp_dir().join("axon_replay_a.trace.json");
815        let tmp_b = std::env::temp_dir().join("axon_replay_b.trace.json");
816
817        let data_a = sample_trace();
818        let mut data_b = sample_trace();
819        // events[2] is step_complete for S1
820        data_b["events"][2]["detail"] = json!("changed output");
821
822        std::fs::write(&tmp_a, serde_json::to_string(&data_a).unwrap()).unwrap();
823        std::fs::write(&tmp_b, serde_json::to_string(&data_b).unwrap()).unwrap();
824
825        assert_eq!(
826            run_replay(tmp_a.to_str().unwrap(), Some(tmp_b.to_str().unwrap()), true),
827            1,
828        );
829
830        let _ = std::fs::remove_file(tmp_a);
831        let _ = std::fs::remove_file(tmp_b);
832    }
833
834    #[test]
835    fn regression_status_serializes() {
836        assert_eq!(serde_json::to_string(&RegressionStatus::Match).unwrap(), "\"match\"");
837        assert_eq!(serde_json::to_string(&RegressionStatus::Changed).unwrap(), "\"changed\"");
838        assert_eq!(serde_json::to_string(&RegressionStatus::Added).unwrap(), "\"added\"");
839    }
840
841    #[test]
842    fn empty_trace() {
843        let data = json!({ "_meta": {}, "events": [] });
844        let trace = parse_trace(&data);
845        assert_eq!(trace.units.len(), 0);
846        assert_eq!(trace.summary.total_steps, 0);
847    }
848}