Skip to main content

axon/
trace_stats.rs

1//! Trace Analytics — aggregate statistics across multiple execution traces.
2//!
3//! Loads one or more `.trace.json` files and computes:
4//!   - Latency percentiles (p50, p95, p99, mean, min, max)
5//!   - Token usage (total, mean, per-unit, per-step)
6//!   - Anchor breach rate and top breached anchors
7//!   - Error rate and retry rate
8//!   - Step frequency distribution
9//!
10//! Usage:
11//!   axon stats trace1.json trace2.json ...   — aggregate stats
12//!   axon stats *.trace.json --json           — structured JSON output
13//!
14//! Exit codes:
15//!   0 — stats computed successfully
16//!   2 — I/O or parse error
17
18use std::collections::HashMap;
19use std::io::IsTerminal;
20
21use crate::replay;
22
23// ── Analytics structures ────────────────────────────────────────────────
24
25/// Aggregate analytics across one or more traces.
26#[derive(Debug, Clone, serde::Serialize)]
27pub struct TraceAnalytics {
28    pub trace_count: usize,
29    pub latency: LatencyStats,
30    pub tokens: TokenStats,
31    pub anchors: AnchorStats,
32    pub errors: ErrorStats,
33    pub steps: StepFrequency,
34}
35
36/// Latency statistics with percentiles.
37#[derive(Debug, Clone, serde::Serialize)]
38pub struct LatencyStats {
39    pub unit_count: usize,
40    pub p50_ms: u64,
41    pub p95_ms: u64,
42    pub p99_ms: u64,
43    pub mean_ms: u64,
44    pub min_ms: u64,
45    pub max_ms: u64,
46}
47
48/// Token usage statistics.
49#[derive(Debug, Clone, serde::Serialize)]
50pub struct TokenStats {
51    pub total_input: u64,
52    pub total_output: u64,
53    pub total: u64,
54    pub mean_input_per_unit: u64,
55    pub mean_output_per_unit: u64,
56    pub mean_total_per_unit: u64,
57    pub unit_count: usize,
58}
59
60/// Anchor pass/breach statistics.
61#[derive(Debug, Clone, serde::Serialize)]
62pub struct AnchorStats {
63    pub total_checks: usize,
64    pub total_passes: usize,
65    pub total_breaches: usize,
66    pub pass_rate: f64,
67    pub breach_rate: f64,
68    pub top_breaches: Vec<AnchorBreachEntry>,
69}
70
71/// A single anchor breach frequency entry.
72#[derive(Debug, Clone, serde::Serialize)]
73pub struct AnchorBreachEntry {
74    pub anchor_name: String,
75    pub breach_count: usize,
76}
77
78/// Error and retry statistics.
79#[derive(Debug, Clone, serde::Serialize)]
80pub struct ErrorStats {
81    pub total_steps: usize,
82    pub total_errors: usize,
83    pub total_retries: usize,
84    pub error_rate: f64,
85    pub retry_rate: f64,
86}
87
88/// Step name frequency distribution.
89#[derive(Debug, Clone, serde::Serialize)]
90pub struct StepFrequency {
91    pub unique_steps: usize,
92    pub top_steps: Vec<StepFreqEntry>,
93}
94
95/// A single step frequency entry.
96#[derive(Debug, Clone, serde::Serialize)]
97pub struct StepFreqEntry {
98    pub step_name: String,
99    pub count: usize,
100}
101
102// ── Computation ─────────────────────────────────────────────────────────
103
104/// Compute aggregate analytics from a set of parsed traces.
105pub fn compute_analytics(traces: &[replay::ReplayTrace]) -> TraceAnalytics {
106    let mut durations: Vec<u64> = Vec::new();
107    let mut total_input: u64 = 0;
108    let mut total_output: u64 = 0;
109    let mut total_passes: usize = 0;
110    let mut total_breaches: usize = 0;
111    let mut total_steps: usize = 0;
112    let mut total_errors: usize = 0;
113    let mut total_retries: usize = 0;
114    let mut breach_counts: HashMap<String, usize> = HashMap::new();
115    let mut step_counts: HashMap<String, usize> = HashMap::new();
116
117    for trace in traces {
118        for unit in &trace.units {
119            durations.push(unit.duration_ms);
120            total_input += unit.total_input_tokens;
121            total_output += unit.total_output_tokens;
122
123            for step in &unit.steps {
124                total_steps += 1;
125                *step_counts.entry(step.name.clone()).or_insert(0) += 1;
126
127                if !step.success {
128                    total_errors += 1;
129                }
130                if step.was_retried {
131                    total_retries += 1;
132                }
133
134                for anchor in &step.anchor_results {
135                    if anchor.passed {
136                        total_passes += 1;
137                    } else {
138                        total_breaches += 1;
139                        *breach_counts.entry(anchor.anchor_name.clone()).or_insert(0) += 1;
140                    }
141                }
142            }
143        }
144    }
145
146    let latency = compute_latency(&durations);
147    let unit_count = durations.len();
148
149    let tokens = TokenStats {
150        total_input,
151        total_output,
152        total: total_input + total_output,
153        mean_input_per_unit: if unit_count > 0 { total_input / unit_count as u64 } else { 0 },
154        mean_output_per_unit: if unit_count > 0 { total_output / unit_count as u64 } else { 0 },
155        mean_total_per_unit: if unit_count > 0 { (total_input + total_output) / unit_count as u64 } else { 0 },
156        unit_count,
157    };
158
159    let total_checks = total_passes + total_breaches;
160    let anchors = AnchorStats {
161        total_checks,
162        total_passes,
163        total_breaches,
164        pass_rate: if total_checks > 0 { total_passes as f64 / total_checks as f64 } else { 1.0 },
165        breach_rate: if total_checks > 0 { total_breaches as f64 / total_checks as f64 } else { 0.0 },
166        top_breaches: top_breaches(&breach_counts, 10),
167    };
168
169    let errors = ErrorStats {
170        total_steps,
171        total_errors,
172        total_retries,
173        error_rate: if total_steps > 0 { total_errors as f64 / total_steps as f64 } else { 0.0 },
174        retry_rate: if total_steps > 0 { total_retries as f64 / total_steps as f64 } else { 0.0 },
175    };
176
177    let steps = compute_step_frequency(&step_counts, 10);
178
179    TraceAnalytics {
180        trace_count: traces.len(),
181        latency,
182        tokens,
183        anchors,
184        errors,
185        steps,
186    }
187}
188
189fn compute_latency(durations: &[u64]) -> LatencyStats {
190    if durations.is_empty() {
191        return LatencyStats {
192            unit_count: 0,
193            p50_ms: 0,
194            p95_ms: 0,
195            p99_ms: 0,
196            mean_ms: 0,
197            min_ms: 0,
198            max_ms: 0,
199        };
200    }
201
202    let mut sorted = durations.to_vec();
203    sorted.sort();
204    let n = sorted.len();
205
206    LatencyStats {
207        unit_count: n,
208        p50_ms: percentile(&sorted, 50.0),
209        p95_ms: percentile(&sorted, 95.0),
210        p99_ms: percentile(&sorted, 99.0),
211        mean_ms: sorted.iter().sum::<u64>() / n as u64,
212        min_ms: sorted[0],
213        max_ms: sorted[n - 1],
214    }
215}
216
217/// Compute a percentile from a sorted slice using nearest-rank method.
218fn percentile(sorted: &[u64], pct: f64) -> u64 {
219    if sorted.is_empty() {
220        return 0;
221    }
222    let rank = (pct / 100.0 * sorted.len() as f64).ceil() as usize;
223    let idx = rank.min(sorted.len()).saturating_sub(1);
224    sorted[idx]
225}
226
227fn top_breaches(counts: &HashMap<String, usize>, limit: usize) -> Vec<AnchorBreachEntry> {
228    let mut entries: Vec<AnchorBreachEntry> = counts
229        .iter()
230        .map(|(name, &count)| AnchorBreachEntry {
231            anchor_name: name.clone(),
232            breach_count: count,
233        })
234        .collect();
235    entries.sort_by(|a, b| b.breach_count.cmp(&a.breach_count));
236    entries.truncate(limit);
237    entries
238}
239
240fn compute_step_frequency(counts: &HashMap<String, usize>, limit: usize) -> StepFrequency {
241    let mut entries: Vec<StepFreqEntry> = counts
242        .iter()
243        .map(|(name, &count)| StepFreqEntry {
244            step_name: name.clone(),
245            count,
246        })
247        .collect();
248    entries.sort_by(|a, b| b.count.cmp(&a.count));
249    let unique_steps = entries.len();
250    entries.truncate(limit);
251
252    StepFrequency {
253        unique_steps,
254        top_steps: entries,
255    }
256}
257
258// ── CLI entry point ─────────────────────────────────────────────────────
259
260/// Load trace files and compute aggregate analytics.
261/// Format: "text" (default), "json", "prometheus", "csv".
262/// Returns exit code: 0 = success, 2 = error.
263pub fn run_stats(files: &[String], format: &str) -> i32 {
264    if files.is_empty() {
265        eprintln!("error: no trace files provided");
266        return 2;
267    }
268
269    let mut traces: Vec<replay::ReplayTrace> = Vec::new();
270    let mut errors = 0;
271
272    for path in files {
273        match std::fs::read_to_string(path) {
274            Ok(content) => {
275                match serde_json::from_str::<serde_json::Value>(&content) {
276                    Ok(data) => {
277                        traces.push(replay::parse_trace(&data));
278                    }
279                    Err(e) => {
280                        eprintln!("error: failed to parse {}: {}", path, e);
281                        errors += 1;
282                    }
283                }
284            }
285            Err(e) => {
286                eprintln!("error: failed to read {}: {}", path, e);
287                errors += 1;
288            }
289        }
290    }
291
292    if traces.is_empty() {
293        eprintln!("error: no valid traces loaded ({} errors)", errors);
294        return 2;
295    }
296
297    let analytics = compute_analytics(&traces);
298
299    match format {
300        "json" => println!("{}", serde_json::to_string_pretty(&analytics).unwrap()),
301        "prometheus" => print!("{}", crate::trace_export::to_prometheus(&analytics)),
302        "csv" => print!("{}", crate::trace_export::to_csv(&analytics)),
303        _ => print_analytics(&analytics, errors),
304    }
305
306    0
307}
308
309// ── Human-readable output ───────────────────────────────────────────────
310
311fn print_analytics(a: &TraceAnalytics, load_errors: usize) {
312    let use_color = std::io::stdout().is_terminal();
313
314    let bold = if use_color { "\x1b[1m" } else { "" };
315    let cyan = if use_color { "\x1b[36m" } else { "" };
316    let yellow = if use_color { "\x1b[33m" } else { "" };
317    let red = if use_color { "\x1b[31m" } else { "" };
318    let green = if use_color { "\x1b[32m" } else { "" };
319    let reset = if use_color { "\x1b[0m" } else { "" };
320
321    println!("{}═══ AXON Trace Analytics ═══{}", bold, reset);
322    println!();
323
324    // Overview
325    println!("{}Traces:{} {}", cyan, reset, a.trace_count);
326    if load_errors > 0 {
327        println!("{}Load errors:{} {}", red, reset, load_errors);
328    }
329    println!("{}Units:{} {}", cyan, reset, a.latency.unit_count);
330    println!("{}Steps:{} {}", cyan, reset, a.errors.total_steps);
331    println!();
332
333    // Latency
334    println!("{}── Latency ──{}", bold, reset);
335    if a.latency.unit_count > 0 {
336        println!("  p50:  {} ms", a.latency.p50_ms);
337        println!("  p95:  {} ms", a.latency.p95_ms);
338        println!("  p99:  {} ms", a.latency.p99_ms);
339        println!("  mean: {} ms", a.latency.mean_ms);
340        println!("  min:  {} ms", a.latency.min_ms);
341        println!("  max:  {} ms", a.latency.max_ms);
342    } else {
343        println!("  (no latency data)");
344    }
345    println!();
346
347    // Tokens
348    println!("{}── Tokens ──{}", bold, reset);
349    println!("  total input:    {}", a.tokens.total_input);
350    println!("  total output:   {}", a.tokens.total_output);
351    println!("  total:          {}", a.tokens.total);
352    if a.tokens.unit_count > 0 {
353        println!("  mean/unit:      {} in + {} out", a.tokens.mean_input_per_unit, a.tokens.mean_output_per_unit);
354    }
355    println!();
356
357    // Anchors
358    println!("{}── Anchors ──{}", bold, reset);
359    if a.anchors.total_checks > 0 {
360        println!("  checks:    {}", a.anchors.total_checks);
361        println!("  {}passes:    {}{} ({:.1}%)", green, a.anchors.total_passes, reset, a.anchors.pass_rate * 100.0);
362        println!("  {}breaches:  {}{} ({:.1}%)", red, a.anchors.total_breaches, reset, a.anchors.breach_rate * 100.0);
363        if !a.anchors.top_breaches.is_empty() {
364            println!("  top breaches:");
365            for b in &a.anchors.top_breaches {
366                println!("    {}× {}{}{}", b.breach_count, yellow, b.anchor_name, reset);
367            }
368        }
369    } else {
370        println!("  (no anchor data)");
371    }
372    println!();
373
374    // Errors
375    println!("{}── Errors ──{}", bold, reset);
376    println!("  errors:  {} / {} steps ({:.1}%)", a.errors.total_errors, a.errors.total_steps, a.errors.error_rate * 100.0);
377    println!("  retries: {} ({:.1}%)", a.errors.total_retries, a.errors.retry_rate * 100.0);
378    println!();
379
380    // Step frequency
381    println!("{}── Step Frequency ──{}", bold, reset);
382    println!("  unique steps: {}", a.steps.unique_steps);
383    if !a.steps.top_steps.is_empty() {
384        for s in &a.steps.top_steps {
385            println!("    {}× {}", s.count, s.step_name);
386        }
387    }
388}
389
390// ── Tests ────────────────────────────────────────────────────────────────
391
392#[cfg(test)]
393mod tests {
394    use super::*;
395    use crate::replay::{ReplayTrace, TraceMeta, ReplayUnit, ReplayStep, AnchorEvent, ReplaySummary};
396
397    fn make_meta() -> TraceMeta {
398        TraceMeta {
399            source: "test.axon".into(),
400            backend: "anthropic".into(),
401            tool_mode: "stub".into(),
402            axon_version: "1.0.0".into(),
403            mode: "stub".into(),
404        }
405    }
406
407    fn make_step(name: &str, success: bool, retried: bool, anchors: Vec<AnchorEvent>) -> ReplayStep {
408        ReplayStep {
409            name: name.into(),
410            event_type: "step_complete".into(),
411            output: format!("{} output", name),
412            success,
413            anchor_results: anchors,
414            was_retried: retried,
415        }
416    }
417
418    fn make_unit(flow: &str, duration_ms: u64, input_tokens: u64, output_tokens: u64, steps: Vec<ReplayStep>) -> ReplayUnit {
419        ReplayUnit {
420            flow_name: flow.into(),
421            steps,
422            duration_ms,
423            total_input_tokens: input_tokens,
424            total_output_tokens: output_tokens,
425            anchor_breaches: 0,
426        }
427    }
428
429    fn make_trace(units: Vec<ReplayUnit>) -> ReplayTrace {
430        let total_steps = units.iter().map(|u| u.steps.len()).sum();
431        let total_input: u64 = units.iter().map(|u| u.total_input_tokens).sum();
432        let total_output: u64 = units.iter().map(|u| u.total_output_tokens).sum();
433        ReplayTrace {
434            meta: make_meta(),
435            units,
436            summary: ReplaySummary {
437                total_units: 0,
438                total_steps,
439                total_anchor_passes: 0,
440                total_anchor_breaches: 0,
441                total_retries: 0,
442                total_errors: 0,
443                total_input_tokens: total_input,
444                total_output_tokens: total_output,
445            },
446        }
447    }
448
449    fn anchor(name: &str, passed: bool) -> AnchorEvent {
450        AnchorEvent { anchor_name: name.into(), passed, detail: String::new() }
451    }
452
453    #[test]
454    fn percentile_basic() {
455        // 10 values: 10,20,30,...,100
456        let data: Vec<u64> = (1..=10).map(|x| x * 10).collect();
457        assert_eq!(percentile(&data, 50.0), 50);
458        assert_eq!(percentile(&data, 95.0), 100);
459        assert_eq!(percentile(&data, 99.0), 100);
460        assert_eq!(percentile(&data, 0.0), 10); // ceil(0) = 0, saturating_sub → 0 → first element
461    }
462
463    #[test]
464    fn percentile_single_value() {
465        assert_eq!(percentile(&[42], 50.0), 42);
466        assert_eq!(percentile(&[42], 99.0), 42);
467    }
468
469    #[test]
470    fn percentile_empty() {
471        assert_eq!(percentile(&[], 50.0), 0);
472    }
473
474    #[test]
475    fn latency_stats_computed() {
476        let t1 = make_trace(vec![
477            make_unit("F", 100, 0, 0, vec![make_step("S1", true, false, vec![])]),
478            make_unit("F", 200, 0, 0, vec![make_step("S2", true, false, vec![])]),
479        ]);
480        let t2 = make_trace(vec![
481            make_unit("F", 150, 0, 0, vec![make_step("S1", true, false, vec![])]),
482        ]);
483
484        let a = compute_analytics(&[t1, t2]);
485        assert_eq!(a.latency.unit_count, 3);
486        assert_eq!(a.latency.min_ms, 100);
487        assert_eq!(a.latency.max_ms, 200);
488        assert_eq!(a.latency.mean_ms, 150); // (100+200+150)/3
489    }
490
491    #[test]
492    fn token_stats_aggregated() {
493        let t = make_trace(vec![
494            make_unit("F1", 0, 100, 50, vec![make_step("S", true, false, vec![])]),
495            make_unit("F2", 0, 200, 80, vec![make_step("S", true, false, vec![])]),
496        ]);
497
498        let a = compute_analytics(&[t]);
499        assert_eq!(a.tokens.total_input, 300);
500        assert_eq!(a.tokens.total_output, 130);
501        assert_eq!(a.tokens.total, 430);
502        assert_eq!(a.tokens.mean_input_per_unit, 150);
503        assert_eq!(a.tokens.mean_output_per_unit, 65);
504        assert_eq!(a.tokens.unit_count, 2);
505    }
506
507    #[test]
508    fn anchor_stats_computed() {
509        let t = make_trace(vec![
510            make_unit("F", 0, 0, 0, vec![
511                make_step("S1", true, false, vec![
512                    anchor("SafeOutput", true),
513                    anchor("NoHallucination", false),
514                ]),
515                make_step("S2", true, false, vec![
516                    anchor("SafeOutput", true),
517                    anchor("NoHallucination", false),
518                    anchor("FactualOnly", false),
519                ]),
520            ]),
521        ]);
522
523        let a = compute_analytics(&[t]);
524        assert_eq!(a.anchors.total_checks, 5);
525        assert_eq!(a.anchors.total_passes, 2);
526        assert_eq!(a.anchors.total_breaches, 3);
527        assert!((a.anchors.pass_rate - 0.4).abs() < 0.01);
528        assert!((a.anchors.breach_rate - 0.6).abs() < 0.01);
529
530        // Top breaches sorted by count
531        assert_eq!(a.anchors.top_breaches.len(), 2);
532        assert_eq!(a.anchors.top_breaches[0].anchor_name, "NoHallucination");
533        assert_eq!(a.anchors.top_breaches[0].breach_count, 2);
534        assert_eq!(a.anchors.top_breaches[1].anchor_name, "FactualOnly");
535        assert_eq!(a.anchors.top_breaches[1].breach_count, 1);
536    }
537
538    #[test]
539    fn error_and_retry_stats() {
540        let t = make_trace(vec![
541            make_unit("F", 0, 0, 0, vec![
542                make_step("S1", true, false, vec![]),
543                make_step("S2", false, true, vec![]),  // error + retried
544                make_step("S3", true, true, vec![]),   // success but was retried
545                make_step("S4", false, false, vec![]), // error, no retry
546            ]),
547        ]);
548
549        let a = compute_analytics(&[t]);
550        assert_eq!(a.errors.total_steps, 4);
551        assert_eq!(a.errors.total_errors, 2);
552        assert_eq!(a.errors.total_retries, 2);
553        assert!((a.errors.error_rate - 0.5).abs() < 0.01);
554        assert!((a.errors.retry_rate - 0.5).abs() < 0.01);
555    }
556
557    #[test]
558    fn step_frequency_distribution() {
559        let t = make_trace(vec![
560            make_unit("F1", 0, 0, 0, vec![
561                make_step("Analyze", true, false, vec![]),
562                make_step("Summarize", true, false, vec![]),
563            ]),
564            make_unit("F2", 0, 0, 0, vec![
565                make_step("Analyze", true, false, vec![]),
566                make_step("Generate", true, false, vec![]),
567                make_step("Analyze", true, false, vec![]),
568            ]),
569        ]);
570
571        let a = compute_analytics(&[t]);
572        assert_eq!(a.steps.unique_steps, 3);
573        assert_eq!(a.steps.top_steps[0].step_name, "Analyze");
574        assert_eq!(a.steps.top_steps[0].count, 3);
575    }
576
577    #[test]
578    fn empty_traces() {
579        let a = compute_analytics(&[]);
580        assert_eq!(a.trace_count, 0);
581        assert_eq!(a.latency.unit_count, 0);
582        assert_eq!(a.latency.p50_ms, 0);
583        assert_eq!(a.tokens.total, 0);
584        assert!((a.anchors.pass_rate - 1.0).abs() < 0.01); // No checks → 100% pass
585        assert!((a.anchors.breach_rate - 0.0).abs() < 0.01);
586    }
587
588    #[test]
589    fn multiple_traces_aggregate() {
590        let t1 = make_trace(vec![
591            make_unit("F", 100, 50, 20, vec![make_step("A", true, false, vec![])]),
592        ]);
593        let t2 = make_trace(vec![
594            make_unit("F", 200, 70, 30, vec![make_step("B", true, false, vec![])]),
595        ]);
596
597        let a = compute_analytics(&[t1, t2]);
598        assert_eq!(a.trace_count, 2);
599        assert_eq!(a.latency.unit_count, 2);
600        assert_eq!(a.tokens.total_input, 120);
601        assert_eq!(a.tokens.total_output, 50);
602        assert_eq!(a.errors.total_steps, 2);
603        assert_eq!(a.steps.unique_steps, 2);
604    }
605
606    #[test]
607    fn no_anchor_data_defaults() {
608        let t = make_trace(vec![
609            make_unit("F", 100, 0, 0, vec![make_step("S", true, false, vec![])]),
610        ]);
611
612        let a = compute_analytics(&[t]);
613        assert_eq!(a.anchors.total_checks, 0);
614        assert!((a.anchors.pass_rate - 1.0).abs() < 0.01);
615        assert!(a.anchors.top_breaches.is_empty());
616    }
617
618    #[test]
619    fn analytics_serializes_to_json() {
620        let t = make_trace(vec![
621            make_unit("F", 100, 50, 20, vec![
622                make_step("S", true, false, vec![anchor("Safe", true)]),
623            ]),
624        ]);
625
626        let a = compute_analytics(&[t]);
627        let json = serde_json::to_value(&a).unwrap();
628        assert_eq!(json["trace_count"], 1);
629        assert!(json["latency"]["p50_ms"].is_number());
630        assert!(json["tokens"]["total"].is_number());
631        assert!(json["anchors"]["pass_rate"].is_number());
632        assert!(json["errors"]["error_rate"].is_number());
633        assert!(json["steps"]["unique_steps"].is_number());
634    }
635
636    #[test]
637    fn run_stats_no_files_returns_error() {
638        assert_eq!(run_stats(&[], "text"), 2);
639    }
640
641    #[test]
642    fn run_stats_missing_file_returns_error() {
643        let files = vec!["nonexistent_trace_file.json".to_string()];
644        assert_eq!(run_stats(&files, "text"), 2);
645    }
646
647    #[test]
648    fn run_stats_valid_trace_json() {
649        let tmp = std::env::temp_dir().join("axon_stats_test.trace.json");
650        let data = serde_json::json!({
651            "_meta": { "source": "t.axon", "backend": "anthropic", "tool_mode": "stub", "axon_version": "1.0.0", "mode": "stub" },
652            "events": [
653                { "event": "unit_start", "unit": "F", "step": "", "detail": "" },
654                { "event": "step_complete", "unit": "F", "step": "S", "detail": "ok" },
655                { "event": "unit_complete", "unit": "F", "step": "", "detail": "" },
656            ]
657        });
658        std::fs::write(&tmp, serde_json::to_string(&data).unwrap()).unwrap();
659
660        let files = vec![tmp.to_str().unwrap().to_string()];
661        assert_eq!(run_stats(&files, "json"), 0);
662
663        let _ = std::fs::remove_file(tmp);
664    }
665}