harn_vm/
profile.rs

1//! Categorical profile rollup over completed [`crate::tracing::Span`]s.
2//!
3//! Turns the raw span tree (parent/child, kinds, durations) into a digestible
4//! "where did the time go?" answer for harness writers. This module owns no
5//! state of its own — it consumes the snapshot returned by
6//! [`crate::tracing::peek_spans`] and folds it into a [`RunProfile`] that
7//! callers can render to text or serialize to JSON.
8//!
9//! Top-level wall time and residual ("VM / script overhead") are computed
10//! by summing only spans whose parent is the pipeline root (or `None`),
11//! so nested LLM/tool work isn't double-counted under both its category
12//! and its containing step.
13
14use std::collections::BTreeMap;
15
16use serde::{Deserialize, Serialize};
17
18use crate::tracing::Span;
19
20/// Top-N to surface in the rendered profile. Kept small so the stderr
21/// summary stays scannable.
22const TOP_N: usize = 5;
23
24/// Aggregate breakdown of one run's spans.
25#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
26pub struct RunProfile {
27    pub total_wall_ms: u64,
28    pub by_kind: Vec<KindBucket>,
29    pub residual_ms: u64,
30    pub top_llm_calls: Vec<SpanRef>,
31    pub top_tool_calls: Vec<SpanRef>,
32    pub steps: Vec<StepSummary>,
33}
34
35#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
36pub struct KindBucket {
37    pub kind: String,
38    pub total_ms: u64,
39    pub count: u64,
40    pub pct_of_wall: f64,
41}
42
43#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
44pub struct SpanRef {
45    pub span_id: u64,
46    pub kind: String,
47    pub name: String,
48    pub duration_ms: u64,
49    pub step: Option<String>,
50    pub model: Option<String>,
51}
52
53#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
54pub struct StepSummary {
55    pub name: String,
56    pub duration_ms: u64,
57    pub llm_ms: u64,
58    pub tool_ms: u64,
59    pub other_ms: u64,
60    pub llm_calls: u64,
61    pub tool_calls: u64,
62}
63
64/// Build a profile from a slice of completed spans. Designed to be called
65/// post-run with `crate::tracing::peek_spans()`. Pure function — does not
66/// touch globals.
67pub fn build(spans: &[Span]) -> RunProfile {
68    if spans.is_empty() {
69        return RunProfile::default();
70    }
71
72    // The pipeline root is conventionally the only top-level VM span. Host
73    // adapters may add sibling setup spans, so top-level non-pipeline spans
74    // contribute to wall time and their own buckets.
75    let total_wall_ms: u64 = spans
76        .iter()
77        .filter(|s| s.parent_id.is_none())
78        .map(|s| s.duration_ms)
79        .sum();
80
81    let by_kind = bucket_by_kind(spans, total_wall_ms);
82
83    // Residual = wall - sum of "real work" categories at the top level.
84    // Imports/parallel/spawn count as work too; the category buckets cover
85    // them so we subtract depth-1 work plus any host setup sibling spans.
86    let accounted_total: u64 = spans
87        .iter()
88        .filter(|s| {
89            (s.parent_id.is_none() && !is_profile_root_span(s))
90                || matches!(s.parent_id, Some(pid) if is_pipeline_root(spans, pid))
91        })
92        .map(|s| s.duration_ms)
93        .sum();
94    let residual_ms = total_wall_ms.saturating_sub(accounted_total);
95
96    let top_llm_calls = top_n_by_duration(spans, "llm_call");
97    let top_tool_calls = top_n_by_duration(spans, "tool_call");
98    let steps = build_step_summaries(spans);
99
100    RunProfile {
101        total_wall_ms,
102        by_kind,
103        residual_ms,
104        top_llm_calls,
105        top_tool_calls,
106        steps,
107    }
108}
109
110/// Build one aggregate profile from independent span snapshots.
111///
112/// Each input snapshot can reuse span ids starting at 1. The helper remaps ids
113/// before folding so parent/child relationships do not collide across runs.
114pub fn build_aggregate(span_groups: &[Vec<Span>]) -> RunProfile {
115    let mut merged = Vec::new();
116    let mut next_offset = 0u64;
117    for group in span_groups {
118        let offset = next_offset;
119        let max_id = group.iter().map(|span| span.span_id).max().unwrap_or(0);
120        for span in group {
121            let mut remapped = span.clone();
122            remapped.span_id += offset;
123            remapped.parent_id = remapped.parent_id.map(|id| id + offset);
124            merged.push(remapped);
125        }
126        next_offset += max_id + 1;
127    }
128    build(&merged)
129}
130
131fn is_pipeline_root(spans: &[Span], id: u64) -> bool {
132    spans
133        .iter()
134        .find(|s| s.span_id == id)
135        .map(is_profile_root_span)
136        .unwrap_or(false)
137}
138
139fn is_profile_root_span(span: &Span) -> bool {
140    span.parent_id.is_none() && span.kind == crate::tracing::SpanKind::Pipeline
141}
142
143fn bucket_by_kind(spans: &[Span], total_wall_ms: u64) -> Vec<KindBucket> {
144    // Sum per kind across ALL spans of that kind (any depth). This is
145    // the user's mental model of "how much LLM time was there in this
146    // run?" — overlapping/nested doesn't matter because LLM calls are
147    // leaves.
148    let mut totals: BTreeMap<String, (u64, u64)> = BTreeMap::new();
149    for span in spans {
150        if is_profile_root_span(span) {
151            // Skip the synthetic pipeline span; it's the wall-time
152            // denominator, not a category bucket.
153            continue;
154        }
155        let entry = totals.entry(span.kind.as_str().to_string()).or_default();
156        entry.0 += span.duration_ms;
157        entry.1 += 1;
158    }
159    let mut buckets: Vec<KindBucket> = totals
160        .into_iter()
161        .map(|(kind, (total_ms, count))| KindBucket {
162            kind,
163            total_ms,
164            count,
165            pct_of_wall: pct(total_ms, total_wall_ms),
166        })
167        .collect();
168    buckets.sort_by_key(|bucket| std::cmp::Reverse(bucket.total_ms));
169    buckets
170}
171
172fn top_n_by_duration(spans: &[Span], kind: &str) -> Vec<SpanRef> {
173    let mut matches: Vec<&Span> = spans.iter().filter(|s| s.kind.as_str() == kind).collect();
174    matches.sort_by_key(|span| std::cmp::Reverse(span.duration_ms));
175    matches
176        .into_iter()
177        .take(TOP_N)
178        .map(|span| SpanRef {
179            span_id: span.span_id,
180            kind: span.kind.as_str().to_string(),
181            name: span.name.clone(),
182            duration_ms: span.duration_ms,
183            step: enclosing_step_name(spans, span.parent_id),
184            model: span
185                .metadata
186                .get("model")
187                .and_then(|v| v.as_str())
188                .map(str::to_string),
189        })
190        .collect()
191}
192
193fn enclosing_step_name(spans: &[Span], mut parent_id: Option<u64>) -> Option<String> {
194    while let Some(pid) = parent_id {
195        let parent = spans.iter().find(|s| s.span_id == pid)?;
196        if parent.kind.as_str() == "step" {
197            return Some(parent.name.clone());
198        }
199        parent_id = parent.parent_id;
200    }
201    None
202}
203
204fn build_step_summaries(spans: &[Span]) -> Vec<StepSummary> {
205    let mut steps: Vec<StepSummary> = Vec::new();
206    for step_span in spans.iter().filter(|s| s.kind.as_str() == "step") {
207        let mut summary = StepSummary {
208            name: step_span.name.clone(),
209            duration_ms: step_span.duration_ms,
210            ..StepSummary::default()
211        };
212        for descendant in descendants(spans, step_span.span_id) {
213            match descendant.kind.as_str() {
214                "llm_call" => {
215                    summary.llm_ms += descendant.duration_ms;
216                    summary.llm_calls += 1;
217                }
218                "tool_call" => {
219                    summary.tool_ms += descendant.duration_ms;
220                    summary.tool_calls += 1;
221                }
222                _ => {}
223            }
224        }
225        // "other" approximates VM + script overhead inside the step.
226        // LLM/tool durations can overlap (parallel calls), so clamp at 0.
227        summary.other_ms = summary
228            .duration_ms
229            .saturating_sub(summary.llm_ms.saturating_add(summary.tool_ms));
230        steps.push(summary);
231    }
232    steps.sort_by_key(|summary| std::cmp::Reverse(summary.duration_ms));
233    steps
234}
235
236fn descendants(spans: &[Span], root: u64) -> Vec<&Span> {
237    let mut out = Vec::new();
238    let mut frontier = vec![root];
239    while let Some(parent) = frontier.pop() {
240        for span in spans {
241            if span.parent_id == Some(parent) {
242                out.push(span);
243                frontier.push(span.span_id);
244            }
245        }
246    }
247    out
248}
249
250fn pct(part: u64, whole: u64) -> f64 {
251    if whole == 0 {
252        0.0
253    } else {
254        (part as f64 / whole as f64) * 100.0
255    }
256}
257
258/// Render a profile to a human-readable string suitable for stderr after
259/// a `harn run --profile` invocation. ANSI-styled to match the existing
260/// `--trace` output.
261pub fn render(profile: &RunProfile) -> String {
262    use std::fmt::Write;
263    let mut out = String::new();
264    let _ = writeln!(out, "\n\x1b[2m─── Run profile ───\x1b[0m");
265    let _ = writeln!(
266        out,
267        "  Total wall time: {}",
268        format_secs(profile.total_wall_ms)
269    );
270    let _ = writeln!(out, "\n  By category:");
271    for bucket in &profile.by_kind {
272        let _ = writeln!(
273            out,
274            "    {:<14} {:>10}  {:>5.1}%   ({} call{})",
275            bucket.kind,
276            format_secs(bucket.total_ms),
277            bucket.pct_of_wall,
278            bucket.count,
279            if bucket.count == 1 { "" } else { "s" },
280        );
281    }
282    let _ = writeln!(
283        out,
284        "    {:<14} {:>10}  {:>5.1}%",
285        "vm/residual",
286        format_secs(profile.residual_ms),
287        pct(profile.residual_ms, profile.total_wall_ms),
288    );
289    if !profile.top_llm_calls.is_empty() {
290        let _ = writeln!(out, "\n  Top LLM calls:");
291        for span in &profile.top_llm_calls {
292            let model = span.model.as_deref().unwrap_or(&span.name);
293            let step = span
294                .step
295                .as_deref()
296                .map(|s| format!("  step={s}"))
297                .unwrap_or_default();
298            let _ = writeln!(
299                out,
300                "    #{:<4} {:<24} {:>10}{}",
301                span.span_id,
302                model,
303                format_secs(span.duration_ms),
304                step,
305            );
306        }
307    }
308    if !profile.top_tool_calls.is_empty() {
309        let _ = writeln!(out, "\n  Top tool calls:");
310        for span in &profile.top_tool_calls {
311            let step = span
312                .step
313                .as_deref()
314                .map(|s| format!("  step={s}"))
315                .unwrap_or_default();
316            let _ = writeln!(
317                out,
318                "    #{:<4} {:<24} {:>10}{}",
319                span.span_id,
320                span.name,
321                format_secs(span.duration_ms),
322                step,
323            );
324        }
325    }
326    if !profile.steps.is_empty() {
327        let _ = writeln!(out, "\n  Per-@step:");
328        for step in &profile.steps {
329            let _ = writeln!(
330                out,
331                "    {:<20} {:>10}   (LLM {} · tools {} · other {})",
332                step.name,
333                format_secs(step.duration_ms),
334                format_secs(step.llm_ms),
335                format_secs(step.tool_ms),
336                format_secs(step.other_ms),
337            );
338        }
339    }
340    out
341}
342
343fn format_secs(ms: u64) -> String {
344    if ms < 1000 {
345        format!("{} ms", ms)
346    } else {
347        format!("{:.3} s", ms as f64 / 1000.0)
348    }
349}
350
351#[cfg(test)]
352mod tests {
353    use super::*;
354    use crate::tracing::SpanKind;
355
356    fn span(span_id: u64, parent_id: Option<u64>, kind: SpanKind, name: &str, dur: u64) -> Span {
357        Span {
358            span_id,
359            parent_id,
360            kind,
361            name: name.into(),
362            start_ms: 0,
363            duration_ms: dur,
364            metadata: BTreeMap::new(),
365        }
366    }
367
368    fn span_with_meta(
369        span_id: u64,
370        parent_id: Option<u64>,
371        kind: SpanKind,
372        name: &str,
373        dur: u64,
374        meta: &[(&str, serde_json::Value)],
375    ) -> Span {
376        let mut s = span(span_id, parent_id, kind, name, dur);
377        for (k, v) in meta {
378            s.metadata.insert((*k).to_string(), v.clone());
379        }
380        s
381    }
382
383    #[test]
384    fn empty_spans_yield_default_profile() {
385        let profile = build(&[]);
386        assert_eq!(profile.total_wall_ms, 0);
387        assert!(profile.by_kind.is_empty());
388        assert_eq!(profile.residual_ms, 0);
389    }
390
391    #[test]
392    fn buckets_are_sorted_descending_by_total() {
393        let spans = vec![
394            span(1, None, SpanKind::Pipeline, "main", 1000),
395            span(2, Some(1), SpanKind::LlmCall, "llm_call", 600),
396            span(3, Some(1), SpanKind::ToolCall, "mcp_call", 250),
397            span(4, Some(1), SpanKind::ToolCall, "mcp_call", 50),
398        ];
399        let profile = build(&spans);
400        assert_eq!(profile.total_wall_ms, 1000);
401        assert_eq!(profile.by_kind[0].kind, "llm_call");
402        assert_eq!(profile.by_kind[0].total_ms, 600);
403        assert_eq!(profile.by_kind[1].kind, "tool_call");
404        assert_eq!(profile.by_kind[1].total_ms, 300);
405        assert_eq!(profile.by_kind[1].count, 2);
406        // 1000 wall - (600 + 300) depth-1 = 100 ms residual
407        assert_eq!(profile.residual_ms, 100);
408    }
409
410    #[test]
411    fn top_level_vm_setup_span_gets_its_own_bucket() {
412        let spans = vec![
413            span(1, None, SpanKind::VmSetup, "acp_vm_setup", 20),
414            span(2, None, SpanKind::Pipeline, "main", 80),
415            span(3, Some(2), SpanKind::LlmCall, "llm_call", 50),
416        ];
417        let profile = build(&spans);
418        assert_eq!(profile.total_wall_ms, 100);
419        assert_eq!(profile.residual_ms, 30);
420        assert!(profile
421            .by_kind
422            .iter()
423            .any(|bucket| bucket.kind == "vm_setup" && bucket.total_ms == 20));
424    }
425
426    #[test]
427    fn nested_spans_do_not_double_count_residual() {
428        // Pipeline (1000ms) > Step (800ms) > LlmCall (700ms)
429        // Depth-1 sum = 800 (the step), residual = 200, NOT 1000-700-800.
430        let spans = vec![
431            span(1, None, SpanKind::Pipeline, "main", 1000),
432            span(2, Some(1), SpanKind::Step, "research", 800),
433            span(3, Some(2), SpanKind::LlmCall, "llm_call", 700),
434        ];
435        let profile = build(&spans);
436        assert_eq!(profile.total_wall_ms, 1000);
437        assert_eq!(profile.residual_ms, 200);
438    }
439
440    #[test]
441    fn step_summaries_split_llm_tool_other() {
442        let spans = vec![
443            span(1, None, SpanKind::Pipeline, "main", 2000),
444            span(2, Some(1), SpanKind::Step, "research", 1500),
445            span(3, Some(2), SpanKind::LlmCall, "llm_call", 900),
446            span(4, Some(2), SpanKind::ToolCall, "mcp_call", 400),
447        ];
448        let profile = build(&spans);
449        assert_eq!(profile.steps.len(), 1);
450        let step = &profile.steps[0];
451        assert_eq!(step.name, "research");
452        assert_eq!(step.duration_ms, 1500);
453        assert_eq!(step.llm_ms, 900);
454        assert_eq!(step.tool_ms, 400);
455        assert_eq!(step.other_ms, 200);
456        assert_eq!(step.llm_calls, 1);
457        assert_eq!(step.tool_calls, 1);
458    }
459
460    #[test]
461    fn top_llm_calls_attribute_enclosing_step_and_model() {
462        let spans = vec![
463            span(1, None, SpanKind::Pipeline, "main", 2000),
464            span(2, Some(1), SpanKind::Step, "research", 1500),
465            span_with_meta(
466                3,
467                Some(2),
468                SpanKind::LlmCall,
469                "llm_call",
470                900,
471                &[("model", serde_json::json!("claude-sonnet-4-6"))],
472            ),
473            span(4, Some(1), SpanKind::LlmCall, "llm_call", 100),
474        ];
475        let profile = build(&spans);
476        assert_eq!(profile.top_llm_calls.len(), 2);
477        assert_eq!(profile.top_llm_calls[0].duration_ms, 900);
478        assert_eq!(profile.top_llm_calls[0].step.as_deref(), Some("research"));
479        assert_eq!(
480            profile.top_llm_calls[0].model.as_deref(),
481            Some("claude-sonnet-4-6")
482        );
483        assert!(profile.top_llm_calls[1].step.is_none());
484    }
485
486    #[test]
487    fn render_produces_nonempty_output_for_real_run() {
488        let spans = vec![
489            span(1, None, SpanKind::Pipeline, "main", 1000),
490            span(2, Some(1), SpanKind::LlmCall, "llm_call", 700),
491        ];
492        let rendered = render(&build(&spans));
493        assert!(rendered.contains("Run profile"));
494        assert!(rendered.contains("llm_call"));
495        assert!(rendered.contains("vm/residual"));
496    }
497
498    #[test]
499    fn render_for_empty_profile_still_produces_header() {
500        // When --profile was requested but no spans landed, we still
501        // render the header + a zero-everything residual line — the
502        // user explicitly asked for output and an empty string would
503        // look like the flag did nothing.
504        let rendered = render(&RunProfile::default());
505        assert!(rendered.contains("Run profile"));
506        assert!(rendered.contains("vm/residual"));
507    }
508
509    #[test]
510    fn aggregate_remaps_duplicate_span_ids_across_runs() {
511        let first = vec![
512            span(1, None, SpanKind::Pipeline, "main", 100),
513            span(2, Some(1), SpanKind::LlmCall, "llm_call", 40),
514        ];
515        let second = vec![
516            span(1, None, SpanKind::Pipeline, "main", 200),
517            span(2, Some(1), SpanKind::ToolCall, "tool", 50),
518        ];
519
520        let profile = build_aggregate(&[first, second]);
521
522        assert_eq!(profile.total_wall_ms, 300);
523        assert_eq!(profile.by_kind.len(), 2);
524        assert!(profile.by_kind.iter().any(|bucket| {
525            bucket.kind == "llm_call" && bucket.total_ms == 40 && bucket.count == 1
526        }));
527        assert!(profile.by_kind.iter().any(|bucket| {
528            bucket.kind == "tool_call" && bucket.total_ms == 50 && bucket.count == 1
529        }));
530    }
531}
harn_vm/profile.rs

harn_vm/
profile.rs