harn_vm/llm/
trace.rs

1use std::cell::RefCell;
2
3/// A single LLM call trace entry.
4#[derive(Debug, Clone)]
5pub struct LlmTraceEntry {
6    pub model: String,
7    pub input_tokens: i64,
8    pub output_tokens: i64,
9    pub duration_ms: u64,
10}
11
12thread_local! {
13    static LLM_TRACE: RefCell<Vec<LlmTraceEntry>> = const { RefCell::new(Vec::new()) };
14    static LLM_TRACING_ENABLED: RefCell<bool> = const { RefCell::new(false) };
15}
16
17/// Enable LLM tracing for the current thread.
18pub fn enable_tracing() {
19    LLM_TRACING_ENABLED.with(|v| *v.borrow_mut() = true);
20}
21
22/// Get and clear the trace log.
23pub fn take_trace() -> Vec<LlmTraceEntry> {
24    LLM_TRACE.with(|v| std::mem::take(&mut *v.borrow_mut()))
25}
26
27/// Clone the current trace log without consuming it.
28pub fn peek_trace() -> Vec<LlmTraceEntry> {
29    LLM_TRACE.with(|v| v.borrow().clone())
30}
31
32/// Summarize trace usage without consuming entries.
33pub fn peek_trace_summary() -> (i64, i64, i64, i64) {
34    LLM_TRACE.with(|v| {
35        let entries = v.borrow();
36        let mut input = 0i64;
37        let mut output = 0i64;
38        let mut duration = 0i64;
39        let count = entries.len() as i64;
40        for e in entries.iter() {
41            input += e.input_tokens;
42            output += e.output_tokens;
43            duration += e.duration_ms as i64;
44        }
45        (input, output, duration, count)
46    })
47}
48
49/// Reset thread-local trace state. Call between test runs.
50pub(crate) fn reset_trace_state() {
51    LLM_TRACE.with(|v| v.borrow_mut().clear());
52    LLM_TRACING_ENABLED.with(|v| *v.borrow_mut() = false);
53}
54
55pub(crate) fn trace_llm_call(entry: LlmTraceEntry) {
56    LLM_TRACING_ENABLED.with(|enabled| {
57        if *enabled.borrow() {
58            LLM_TRACE.with(|v| v.borrow_mut().push(entry));
59        }
60    });
61}
62
63/// Fine-grained event emitted during agent loop execution. Captures tool
64/// calls, LLM calls, interventions, compaction, and phase changes so
65/// downstream consumers (portal, IDE hosts, cloud runners) can display
66/// execution traces without reconstructing them from raw JSON.
67#[derive(Debug, Clone, serde::Serialize)]
68#[serde(tag = "type", rename_all = "snake_case")]
69pub enum AgentTraceEvent {
70    LlmCall {
71        call_id: String,
72        model: String,
73        input_tokens: i64,
74        output_tokens: i64,
75        cache_tokens: i64,
76        duration_ms: u64,
77        iteration: usize,
78    },
79    ToolExecution {
80        tool_name: String,
81        tool_use_id: String,
82        duration_ms: u64,
83        status: String,
84        classification: String,
85        iteration: usize,
86    },
87    ToolRejected {
88        tool_name: String,
89        reason: String,
90        iteration: usize,
91    },
92    LoopIntervention {
93        tool_name: String,
94        kind: String,
95        count: usize,
96        iteration: usize,
97    },
98    ContextCompaction {
99        archived_messages: usize,
100        new_summary_len: usize,
101        iteration: usize,
102    },
103    PhaseChange {
104        from_phase: String,
105        to_phase: String,
106        iteration: usize,
107    },
108    LoopComplete {
109        status: String,
110        iterations: usize,
111        total_duration_ms: u64,
112        tools_used: Vec<String>,
113        successful_tools: Vec<String>,
114    },
115    /// Emitted when `llm_call` re-prompts the model after the previous
116    /// response failed `output_schema` validation. One event per retry;
117    /// `attempt` counts retries (the initial call is attempt 0 and
118    /// produces no event; the first retry emits `attempt: 1`).
119    ///
120    /// The retry does **not** persist the invalid response — the
121    /// original messages are replayed with a single appended user-role
122    /// correction that cites the validation errors and schema. That
123    /// correction text is surfaced here as `correction_prompt` so
124    /// transcripts show both why the retry happened and what was sent.
125    SchemaRetry {
126        attempt: usize,
127        errors: Vec<String>,
128        nudge_used: bool,
129        correction_prompt: String,
130    },
131    /// Emitted when `llm_call` aborts a streaming provider response
132    /// because the partial JSON content can no longer satisfy
133    /// `output_schema`. `chunks_consumed` counts text-delta chunks seen
134    /// before the abort; `provider` / `model` track the route that fired
135    /// so cost dashboards can attribute the savings.
136    SchemaStreamAborted {
137        provider: String,
138        model: String,
139        reason: String,
140        path: String,
141        chunks_consumed: usize,
142    },
143    TypedCheckpoint {
144        name: String,
145        status: String,
146        checkpoint_attempts: usize,
147        llm_attempts: usize,
148        error_category: Option<String>,
149        errors: Vec<String>,
150        repaired: bool,
151        final_accepted: bool,
152        raw_text: String,
153    },
154    NativeToolFallback {
155        iteration: usize,
156        accepted: bool,
157        policy: String,
158        fallback_index: usize,
159        tool_call_count: usize,
160    },
161    EmptyCompletionRetry {
162        iteration: usize,
163        attempt: usize,
164        error: String,
165    },
166}
167
168thread_local! {
169    static AGENT_TRACE: RefCell<Vec<AgentTraceEvent>> = const { RefCell::new(Vec::new()) };
170}
171
172/// Emit an agent trace event.
173pub(crate) fn emit_agent_event(event: AgentTraceEvent) {
174    AGENT_TRACE.with(|v| v.borrow_mut().push(event));
175}
176
177/// Get and clear the agent trace log.
178pub fn take_agent_trace() -> Vec<AgentTraceEvent> {
179    AGENT_TRACE.with(|v| std::mem::take(&mut *v.borrow_mut()))
180}
181
182/// Clone the current agent trace log without consuming it.
183pub fn peek_agent_trace() -> Vec<AgentTraceEvent> {
184    AGENT_TRACE.with(|v| v.borrow().clone())
185}
186
187/// Produce a rolled-up summary of agent trace events as JSON.
188pub fn agent_trace_summary() -> serde_json::Value {
189    AGENT_TRACE.with(|v| {
190        let events = v.borrow();
191        let mut llm_calls = 0usize;
192        let mut tool_executions = 0usize;
193        let mut tool_rejections = 0usize;
194        let mut interventions = 0usize;
195        let mut compactions = 0usize;
196        let mut native_text_tool_fallbacks = 0usize;
197        let mut native_text_tool_fallback_rejections = 0usize;
198        let mut empty_completion_retries = 0usize;
199        let mut schema_stream_aborts = 0usize;
200        let mut typed_checkpoints = 0usize;
201        let mut typed_checkpoint_failures = 0usize;
202        let mut total_input_tokens = 0i64;
203        let mut total_output_tokens = 0i64;
204        let mut total_llm_duration_ms = 0u64;
205        let mut total_tool_duration_ms = 0u64;
206        let mut tools_used: Vec<String> = Vec::new();
207        let mut status = "unknown".to_string();
208        let mut iterations = 0usize;
209        let mut total_duration_ms = 0u64;
210
211        for event in events.iter() {
212            match event {
213                AgentTraceEvent::LlmCall {
214                    input_tokens,
215                    output_tokens,
216                    duration_ms,
217                    ..
218                } => {
219                    llm_calls += 1;
220                    total_input_tokens += input_tokens;
221                    total_output_tokens += output_tokens;
222                    total_llm_duration_ms += duration_ms;
223                }
224                AgentTraceEvent::ToolExecution {
225                    tool_name,
226                    duration_ms,
227                    ..
228                } => {
229                    tool_executions += 1;
230                    total_tool_duration_ms += duration_ms;
231                    if !tools_used.contains(tool_name) {
232                        tools_used.push(tool_name.clone());
233                    }
234                }
235                AgentTraceEvent::ToolRejected { .. } => {
236                    tool_rejections += 1;
237                }
238                AgentTraceEvent::LoopIntervention { .. } => {
239                    interventions += 1;
240                }
241                AgentTraceEvent::ContextCompaction { .. } => {
242                    compactions += 1;
243                }
244                AgentTraceEvent::PhaseChange { .. } => {}
245                AgentTraceEvent::LoopComplete {
246                    status: s,
247                    iterations: i,
248                    total_duration_ms: d,
249                    ..
250                } => {
251                    status = s.clone();
252                    iterations = *i;
253                    total_duration_ms = *d;
254                }
255                AgentTraceEvent::SchemaRetry { .. } => {}
256                AgentTraceEvent::SchemaStreamAborted { .. } => {
257                    schema_stream_aborts += 1;
258                }
259                AgentTraceEvent::TypedCheckpoint { final_accepted, .. } => {
260                    typed_checkpoints += 1;
261                    if !final_accepted {
262                        typed_checkpoint_failures += 1;
263                    }
264                }
265                AgentTraceEvent::NativeToolFallback { accepted, .. } => {
266                    native_text_tool_fallbacks += 1;
267                    if !accepted {
268                        native_text_tool_fallback_rejections += 1;
269                    }
270                }
271                AgentTraceEvent::EmptyCompletionRetry { .. } => {
272                    empty_completion_retries += 1;
273                }
274            }
275        }
276
277        serde_json::json!({
278            "status": status,
279            "iterations": iterations,
280            "total_duration_ms": total_duration_ms,
281            "llm_calls": llm_calls,
282            "tool_executions": tool_executions,
283            "tool_rejections": tool_rejections,
284            "interventions": interventions,
285            "compactions": compactions,
286            "native_text_tool_fallbacks": native_text_tool_fallbacks,
287            "native_text_tool_fallback_rejections": native_text_tool_fallback_rejections,
288            "empty_completion_retries": empty_completion_retries,
289            "schema_stream_aborts": schema_stream_aborts,
290            "typed_checkpoints": typed_checkpoints,
291            "typed_checkpoint_failures": typed_checkpoint_failures,
292            "total_input_tokens": total_input_tokens,
293            "total_output_tokens": total_output_tokens,
294            "total_llm_duration_ms": total_llm_duration_ms,
295            "total_tool_duration_ms": total_tool_duration_ms,
296            "tools_used": tools_used,
297        })
298    })
299}
300
301/// Reset agent trace state. Call between test runs.
302pub(crate) fn reset_agent_trace_state() {
303    AGENT_TRACE.with(|v| v.borrow_mut().clear());
304}
harn_vm/llm/trace.rs

harn_vm/llm/
trace.rs