Skip to main content

harn_vm/
profile.rs

1//! Categorical profile rollup over completed [`crate::tracing::Span`]s.
2//!
3//! Turns the raw span tree (parent/child, kinds, durations) into a digestible
4//! "where did the time go?" answer for harness writers. This module owns no
5//! state of its own — it consumes the snapshot returned by
6//! [`crate::tracing::peek_spans`] and folds it into a [`RunProfile`] that
7//! callers can render to text or serialize to JSON.
8//!
9//! Top-level wall time and residual ("VM / script overhead") are computed
10//! by summing only spans whose parent is the pipeline root (or `None`),
11//! so nested LLM/tool work isn't double-counted under both its category
12//! and its containing step.
13
14use std::collections::BTreeMap;
15
16use serde::{Deserialize, Serialize};
17
18use crate::tracing::Span;
19
20/// Top-N to surface in the rendered profile. Kept small so the stderr
21/// summary stays scannable.
22const TOP_N: usize = 5;
23
24/// Aggregate breakdown of one run's spans.
25#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
26pub struct RunProfile {
27    pub total_wall_ms: u64,
28    #[serde(skip_serializing_if = "Option::is_none")]
29    pub first_token_ms: Option<u64>,
30    pub by_kind: Vec<KindBucket>,
31    pub residual_ms: u64,
32    pub top_llm_calls: Vec<SpanRef>,
33    pub top_tool_calls: Vec<SpanRef>,
34    pub steps: Vec<StepSummary>,
35}
36
37#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
38pub struct KindBucket {
39    pub kind: String,
40    pub total_ms: u64,
41    pub count: u64,
42    pub pct_of_wall: f64,
43}
44
45#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
46pub struct SpanRef {
47    pub span_id: u64,
48    pub kind: String,
49    pub name: String,
50    pub duration_ms: u64,
51    pub step: Option<String>,
52    pub model: Option<String>,
53}
54
55#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
56pub struct StepSummary {
57    pub name: String,
58    pub duration_ms: u64,
59    pub llm_ms: u64,
60    pub tool_ms: u64,
61    pub other_ms: u64,
62    pub llm_calls: u64,
63    pub tool_calls: u64,
64}
65
66/// Build a profile from a slice of completed spans. Designed to be called
67/// post-run with `crate::tracing::peek_spans()`. Pure function — does not
68/// touch globals.
69pub fn build(spans: &[Span]) -> RunProfile {
70    if spans.is_empty() {
71        return RunProfile::default();
72    }
73
74    // The pipeline root is conventionally the only top-level VM span. Host
75    // adapters may add sibling setup spans, so top-level non-pipeline spans
76    // contribute to wall time and their own buckets.
77    let total_wall_ms: u64 = spans
78        .iter()
79        .filter(|s| s.parent_id.is_none())
80        .map(|s| s.duration_ms)
81        .sum();
82
83    let by_kind = bucket_by_kind(spans, total_wall_ms);
84
85    // Residual = wall - sum of "real work" categories at the top level.
86    // Imports/parallel/spawn count as work too; the category buckets cover
87    // them so we subtract depth-1 work plus any host setup sibling spans.
88    let accounted_total: u64 = spans
89        .iter()
90        .filter(|s| {
91            (s.parent_id.is_none() && !is_profile_root_span(s))
92                || matches!(s.parent_id, Some(pid) if is_pipeline_root(spans, pid))
93        })
94        .map(|s| s.duration_ms)
95        .sum();
96    let residual_ms = total_wall_ms.saturating_sub(accounted_total);
97
98    let top_llm_calls = top_n_by_duration(spans, "llm_call");
99    let top_tool_calls = top_n_by_duration(spans, "tool_call");
100    let steps = build_step_summaries(spans);
101    let first_token_ms = first_token_ms(spans);
102
103    RunProfile {
104        total_wall_ms,
105        first_token_ms,
106        by_kind,
107        residual_ms,
108        top_llm_calls,
109        top_tool_calls,
110        steps,
111    }
112}
113
114/// Build one aggregate profile from independent span snapshots.
115///
116/// Each input snapshot can reuse span ids starting at 1. The helper remaps ids
117/// before folding so parent/child relationships do not collide across runs.
118pub fn build_aggregate(span_groups: &[Vec<Span>]) -> RunProfile {
119    let mut merged = Vec::new();
120    let mut next_offset = 0u64;
121    for group in span_groups {
122        let offset = next_offset;
123        let max_id = group.iter().map(|span| span.span_id).max().unwrap_or(0);
124        for span in group {
125            let mut remapped = span.clone();
126            remapped.span_id += offset;
127            remapped.parent_id = remapped.parent_id.map(|id| id + offset);
128            merged.push(remapped);
129        }
130        next_offset += max_id + 1;
131    }
132    build(&merged)
133}
134
135fn first_token_ms(spans: &[Span]) -> Option<u64> {
136    spans
137        .iter()
138        .filter(|span| span.kind.as_str() == "llm_call")
139        .filter_map(|span| {
140            span.metadata
141                .get(crate::llm::first_token::FIRST_TOKEN_METADATA_KEY)
142                .and_then(serde_json::Value::as_u64)
143        })
144        .min()
145}
146
147fn is_pipeline_root(spans: &[Span], id: u64) -> bool {
148    spans
149        .iter()
150        .find(|s| s.span_id == id)
151        .map(is_profile_root_span)
152        .unwrap_or(false)
153}
154
155fn is_profile_root_span(span: &Span) -> bool {
156    span.parent_id.is_none() && span.kind == crate::tracing::SpanKind::Pipeline
157}
158
159fn bucket_by_kind(spans: &[Span], total_wall_ms: u64) -> Vec<KindBucket> {
160    // Sum per kind across ALL spans of that kind (any depth). This is
161    // the user's mental model of "how much LLM time was there in this
162    // run?" — overlapping/nested doesn't matter because LLM calls are
163    // leaves.
164    let mut totals: BTreeMap<String, (u64, u64)> = BTreeMap::new();
165    for span in spans {
166        if is_profile_root_span(span) {
167            // Skip the synthetic pipeline span; it's the wall-time
168            // denominator, not a category bucket.
169            continue;
170        }
171        let entry = totals.entry(span.kind.as_str().to_string()).or_default();
172        entry.0 += span.duration_ms;
173        entry.1 += 1;
174    }
175    let mut buckets: Vec<KindBucket> = totals
176        .into_iter()
177        .map(|(kind, (total_ms, count))| KindBucket {
178            kind,
179            total_ms,
180            count,
181            pct_of_wall: pct(total_ms, total_wall_ms),
182        })
183        .collect();
184    buckets.sort_by_key(|bucket| std::cmp::Reverse(bucket.total_ms));
185    buckets
186}
187
188fn top_n_by_duration(spans: &[Span], kind: &str) -> Vec<SpanRef> {
189    let mut matches: Vec<&Span> = spans.iter().filter(|s| s.kind.as_str() == kind).collect();
190    matches.sort_by_key(|span| std::cmp::Reverse(span.duration_ms));
191    matches
192        .into_iter()
193        .take(TOP_N)
194        .map(|span| SpanRef {
195            span_id: span.span_id,
196            kind: span.kind.as_str().to_string(),
197            name: span.name.clone(),
198            duration_ms: span.duration_ms,
199            step: enclosing_step_name(spans, span.parent_id),
200            model: span
201                .metadata
202                .get("model")
203                .and_then(|v| v.as_str())
204                .map(str::to_string),
205        })
206        .collect()
207}
208
209fn enclosing_step_name(spans: &[Span], mut parent_id: Option<u64>) -> Option<String> {
210    while let Some(pid) = parent_id {
211        let parent = spans.iter().find(|s| s.span_id == pid)?;
212        if parent.kind.as_str() == "step" {
213            return Some(parent.name.clone());
214        }
215        parent_id = parent.parent_id;
216    }
217    None
218}
219
220fn build_step_summaries(spans: &[Span]) -> Vec<StepSummary> {
221    let mut steps: Vec<StepSummary> = Vec::new();
222    for step_span in spans.iter().filter(|s| s.kind.as_str() == "step") {
223        let mut summary = StepSummary {
224            name: step_span.name.clone(),
225            duration_ms: step_span.duration_ms,
226            ..StepSummary::default()
227        };
228        for descendant in descendants(spans, step_span.span_id) {
229            match descendant.kind.as_str() {
230                "llm_call" => {
231                    summary.llm_ms += descendant.duration_ms;
232                    summary.llm_calls += 1;
233                }
234                "tool_call" => {
235                    summary.tool_ms += descendant.duration_ms;
236                    summary.tool_calls += 1;
237                }
238                _ => {}
239            }
240        }
241        // "other" approximates VM + script overhead inside the step.
242        // LLM/tool durations can overlap (parallel calls), so clamp at 0.
243        summary.other_ms = summary
244            .duration_ms
245            .saturating_sub(summary.llm_ms.saturating_add(summary.tool_ms));
246        steps.push(summary);
247    }
248    steps.sort_by_key(|summary| std::cmp::Reverse(summary.duration_ms));
249    steps
250}
251
252fn descendants(spans: &[Span], root: u64) -> Vec<&Span> {
253    let mut out = Vec::new();
254    let mut frontier = vec![root];
255    while let Some(parent) = frontier.pop() {
256        for span in spans {
257            if span.parent_id == Some(parent) {
258                out.push(span);
259                frontier.push(span.span_id);
260            }
261        }
262    }
263    out
264}
265
266fn pct(part: u64, whole: u64) -> f64 {
267    if whole == 0 {
268        0.0
269    } else {
270        (part as f64 / whole as f64) * 100.0
271    }
272}
273
274/// Render a profile to a human-readable string suitable for stderr after
275/// a `harn run --profile` invocation. ANSI-styled to match the existing
276/// `--trace` output.
277pub fn render(profile: &RunProfile) -> String {
278    use std::fmt::Write;
279    let mut out = String::new();
280    let _ = writeln!(out, "\n\x1b[2m─── Run profile ───\x1b[0m");
281    let _ = writeln!(
282        out,
283        "  Total wall time: {}",
284        format_secs(profile.total_wall_ms)
285    );
286    if let Some(first_token_ms) = profile.first_token_ms {
287        let _ = writeln!(out, "  First token:     {}", format_secs(first_token_ms));
288    }
289    let _ = writeln!(out, "\n  By category:");
290    for bucket in &profile.by_kind {
291        let _ = writeln!(
292            out,
293            "    {:<14} {:>10}  {:>5.1}%   ({} call{})",
294            bucket.kind,
295            format_secs(bucket.total_ms),
296            bucket.pct_of_wall,
297            bucket.count,
298            if bucket.count == 1 { "" } else { "s" },
299        );
300    }
301    let _ = writeln!(
302        out,
303        "    {:<14} {:>10}  {:>5.1}%",
304        "vm/residual",
305        format_secs(profile.residual_ms),
306        pct(profile.residual_ms, profile.total_wall_ms),
307    );
308    if !profile.top_llm_calls.is_empty() {
309        let _ = writeln!(out, "\n  Top LLM calls:");
310        for span in &profile.top_llm_calls {
311            let model = span.model.as_deref().unwrap_or(&span.name);
312            let step = span
313                .step
314                .as_deref()
315                .map(|s| format!("  step={s}"))
316                .unwrap_or_default();
317            let _ = writeln!(
318                out,
319                "    #{:<4} {:<24} {:>10}{}",
320                span.span_id,
321                model,
322                format_secs(span.duration_ms),
323                step,
324            );
325        }
326    }
327    if !profile.top_tool_calls.is_empty() {
328        let _ = writeln!(out, "\n  Top tool calls:");
329        for span in &profile.top_tool_calls {
330            let step = span
331                .step
332                .as_deref()
333                .map(|s| format!("  step={s}"))
334                .unwrap_or_default();
335            let _ = writeln!(
336                out,
337                "    #{:<4} {:<24} {:>10}{}",
338                span.span_id,
339                span.name,
340                format_secs(span.duration_ms),
341                step,
342            );
343        }
344    }
345    if !profile.steps.is_empty() {
346        let _ = writeln!(out, "\n  Per-@step:");
347        for step in &profile.steps {
348            let _ = writeln!(
349                out,
350                "    {:<20} {:>10}   (LLM {} · tools {} · other {})",
351                step.name,
352                format_secs(step.duration_ms),
353                format_secs(step.llm_ms),
354                format_secs(step.tool_ms),
355                format_secs(step.other_ms),
356            );
357        }
358    }
359    out
360}
361
362fn format_secs(ms: u64) -> String {
363    if ms < 1000 {
364        format!("{ms} ms")
365    } else {
366        format!("{:.3} s", ms as f64 / 1000.0)
367    }
368}
369
370#[cfg(test)]
371mod tests {
372    use super::*;
373    use crate::tracing::SpanKind;
374
375    fn span(span_id: u64, parent_id: Option<u64>, kind: SpanKind, name: &str, dur: u64) -> Span {
376        Span {
377            trace_id: "trace_test".to_string(),
378            span_id,
379            parent_id,
380            kind,
381            name: name.into(),
382            start_ms: 0,
383            start_unix_ms: 0,
384            duration_ms: dur,
385            metadata: BTreeMap::new(),
386            links: Vec::new(),
387            events: Vec::new(),
388        }
389    }
390
391    fn span_with_meta(
392        span_id: u64,
393        parent_id: Option<u64>,
394        kind: SpanKind,
395        name: &str,
396        dur: u64,
397        meta: &[(&str, serde_json::Value)],
398    ) -> Span {
399        let mut s = span(span_id, parent_id, kind, name, dur);
400        for (k, v) in meta {
401            s.metadata.insert((*k).to_string(), v.clone());
402        }
403        s
404    }
405
406    #[test]
407    fn empty_spans_yield_default_profile() {
408        let profile = build(&[]);
409        assert_eq!(profile.total_wall_ms, 0);
410        assert_eq!(profile.first_token_ms, None);
411        assert!(profile.by_kind.is_empty());
412        assert_eq!(profile.residual_ms, 0);
413    }
414
415    #[test]
416    fn buckets_are_sorted_descending_by_total() {
417        let spans = vec![
418            span(1, None, SpanKind::Pipeline, "main", 1000),
419            span(2, Some(1), SpanKind::LlmCall, "llm_call", 600),
420            span(3, Some(1), SpanKind::ToolCall, "mcp_call", 250),
421            span(4, Some(1), SpanKind::ToolCall, "mcp_call", 50),
422        ];
423        let profile = build(&spans);
424        assert_eq!(profile.total_wall_ms, 1000);
425        assert_eq!(profile.by_kind[0].kind, "llm_call");
426        assert_eq!(profile.by_kind[0].total_ms, 600);
427        assert_eq!(profile.by_kind[1].kind, "tool_call");
428        assert_eq!(profile.by_kind[1].total_ms, 300);
429        assert_eq!(profile.by_kind[1].count, 2);
430        // 1000 wall - (600 + 300) depth-1 = 100 ms residual
431        assert_eq!(profile.residual_ms, 100);
432    }
433
434    #[test]
435    fn first_token_ms_uses_earliest_llm_span_metadata() {
436        let spans = vec![
437            span(1, None, SpanKind::Pipeline, "main", 1000),
438            span_with_meta(
439                2,
440                Some(1),
441                SpanKind::LlmCall,
442                "llm_call",
443                600,
444                &[("first_token_ms", serde_json::json!(350))],
445            ),
446            span_with_meta(
447                3,
448                Some(1),
449                SpanKind::LlmCall,
450                "llm_call",
451                300,
452                &[("first_token_ms", serde_json::json!(125))],
453            ),
454        ];
455
456        let profile = build(&spans);
457
458        assert_eq!(profile.first_token_ms, Some(125));
459    }
460
461    #[test]
462    fn render_includes_first_token_when_present() {
463        let mut profile = RunProfile {
464            total_wall_ms: 1000,
465            first_token_ms: Some(120),
466            ..RunProfile::default()
467        };
468        profile.by_kind.push(KindBucket {
469            kind: "llm_call".to_string(),
470            total_ms: 900,
471            count: 1,
472            pct_of_wall: 90.0,
473        });
474
475        let rendered = render(&profile);
476
477        assert!(rendered.contains("First token:"));
478        assert!(rendered.contains("120 ms"));
479    }
480
481    #[test]
482    fn top_level_vm_setup_span_gets_its_own_bucket() {
483        let spans = vec![
484            span(1, None, SpanKind::VmSetup, "acp_vm_setup", 20),
485            span(2, None, SpanKind::Pipeline, "main", 80),
486            span(3, Some(2), SpanKind::LlmCall, "llm_call", 50),
487        ];
488        let profile = build(&spans);
489        assert_eq!(profile.total_wall_ms, 100);
490        assert_eq!(profile.residual_ms, 30);
491        assert!(profile
492            .by_kind
493            .iter()
494            .any(|bucket| bucket.kind == "vm_setup" && bucket.total_ms == 20));
495    }
496
497    #[test]
498    fn nested_spans_do_not_double_count_residual() {
499        // Pipeline (1000ms) > Step (800ms) > LlmCall (700ms)
500        // Depth-1 sum = 800 (the step), residual = 200, NOT 1000-700-800.
501        let spans = vec![
502            span(1, None, SpanKind::Pipeline, "main", 1000),
503            span(2, Some(1), SpanKind::Step, "research", 800),
504            span(3, Some(2), SpanKind::LlmCall, "llm_call", 700),
505        ];
506        let profile = build(&spans);
507        assert_eq!(profile.total_wall_ms, 1000);
508        assert_eq!(profile.residual_ms, 200);
509    }
510
511    #[test]
512    fn step_summaries_split_llm_tool_other() {
513        let spans = vec![
514            span(1, None, SpanKind::Pipeline, "main", 2000),
515            span(2, Some(1), SpanKind::Step, "research", 1500),
516            span(3, Some(2), SpanKind::LlmCall, "llm_call", 900),
517            span(4, Some(2), SpanKind::ToolCall, "mcp_call", 400),
518        ];
519        let profile = build(&spans);
520        assert_eq!(profile.steps.len(), 1);
521        let step = &profile.steps[0];
522        assert_eq!(step.name, "research");
523        assert_eq!(step.duration_ms, 1500);
524        assert_eq!(step.llm_ms, 900);
525        assert_eq!(step.tool_ms, 400);
526        assert_eq!(step.other_ms, 200);
527        assert_eq!(step.llm_calls, 1);
528        assert_eq!(step.tool_calls, 1);
529    }
530
531    #[test]
532    fn top_llm_calls_attribute_enclosing_step_and_model() {
533        let spans = vec![
534            span(1, None, SpanKind::Pipeline, "main", 2000),
535            span(2, Some(1), SpanKind::Step, "research", 1500),
536            span_with_meta(
537                3,
538                Some(2),
539                SpanKind::LlmCall,
540                "llm_call",
541                900,
542                &[("model", serde_json::json!("claude-sonnet-4-6"))],
543            ),
544            span(4, Some(1), SpanKind::LlmCall, "llm_call", 100),
545        ];
546        let profile = build(&spans);
547        assert_eq!(profile.top_llm_calls.len(), 2);
548        assert_eq!(profile.top_llm_calls[0].duration_ms, 900);
549        assert_eq!(profile.top_llm_calls[0].step.as_deref(), Some("research"));
550        assert_eq!(
551            profile.top_llm_calls[0].model.as_deref(),
552            Some("claude-sonnet-4-6")
553        );
554        assert!(profile.top_llm_calls[1].step.is_none());
555    }
556
557    #[test]
558    fn render_produces_nonempty_output_for_real_run() {
559        let spans = vec![
560            span(1, None, SpanKind::Pipeline, "main", 1000),
561            span(2, Some(1), SpanKind::LlmCall, "llm_call", 700),
562        ];
563        let rendered = render(&build(&spans));
564        assert!(rendered.contains("Run profile"));
565        assert!(rendered.contains("llm_call"));
566        assert!(rendered.contains("vm/residual"));
567    }
568
569    #[test]
570    fn render_for_empty_profile_still_produces_header() {
571        // When --profile was requested but no spans landed, we still
572        // render the header + a zero-everything residual line — the
573        // user explicitly asked for output and an empty string would
574        // look like the flag did nothing.
575        let rendered = render(&RunProfile::default());
576        assert!(rendered.contains("Run profile"));
577        assert!(rendered.contains("vm/residual"));
578    }
579
580    #[test]
581    fn aggregate_remaps_duplicate_span_ids_across_runs() {
582        let first = vec![
583            span(1, None, SpanKind::Pipeline, "main", 100),
584            span(2, Some(1), SpanKind::LlmCall, "llm_call", 40),
585        ];
586        let second = vec![
587            span(1, None, SpanKind::Pipeline, "main", 200),
588            span(2, Some(1), SpanKind::ToolCall, "tool", 50),
589        ];
590
591        let profile = build_aggregate(&[first, second]);
592
593        assert_eq!(profile.total_wall_ms, 300);
594        assert_eq!(profile.by_kind.len(), 2);
595        assert!(profile.by_kind.iter().any(|bucket| {
596            bucket.kind == "llm_call" && bucket.total_ms == 40 && bucket.count == 1
597        }));
598        assert!(profile.by_kind.iter().any(|bucket| {
599            bucket.kind == "tool_call" && bucket.total_ms == 50 && bucket.count == 1
600        }));
601    }
602}