Skip to main content

harn_vm/llm/
trace.rs

1use std::cell::RefCell;
2
3/// A single LLM call trace entry.
4#[derive(Debug, Clone)]
5pub struct LlmTraceEntry {
6    pub model: String,
7    pub input_tokens: i64,
8    pub output_tokens: i64,
9    pub duration_ms: u64,
10}
11
12thread_local! {
13    static LLM_TRACE: RefCell<Vec<LlmTraceEntry>> = const { RefCell::new(Vec::new()) };
14    static LLM_TRACING_ENABLED: RefCell<bool> = const { RefCell::new(false) };
15}
16
17/// Enable LLM tracing for the current thread.
18pub fn enable_tracing() {
19    LLM_TRACING_ENABLED.with(|v| *v.borrow_mut() = true);
20}
21
22/// Get and clear the trace log.
23pub fn take_trace() -> Vec<LlmTraceEntry> {
24    LLM_TRACE.with(|v| std::mem::take(&mut *v.borrow_mut()))
25}
26
27/// Clone the current trace log without consuming it.
28pub fn peek_trace() -> Vec<LlmTraceEntry> {
29    LLM_TRACE.with(|v| v.borrow().clone())
30}
31
32/// Summarize trace usage without consuming entries.
33pub fn peek_trace_summary() -> (i64, i64, i64, i64) {
34    LLM_TRACE.with(|v| {
35        let entries = v.borrow();
36        let mut input = 0i64;
37        let mut output = 0i64;
38        let mut duration = 0i64;
39        let count = entries.len() as i64;
40        for e in entries.iter() {
41            input += e.input_tokens;
42            output += e.output_tokens;
43            duration += e.duration_ms as i64;
44        }
45        (input, output, duration, count)
46    })
47}
48
49/// Reset thread-local trace state. Call between test runs.
50pub(crate) fn reset_trace_state() {
51    LLM_TRACE.with(|v| v.borrow_mut().clear());
52    LLM_TRACING_ENABLED.with(|v| *v.borrow_mut() = false);
53}
54
55pub(crate) fn trace_llm_call(entry: LlmTraceEntry) {
56    LLM_TRACING_ENABLED.with(|enabled| {
57        if *enabled.borrow() {
58            LLM_TRACE.with(|v| v.borrow_mut().push(entry));
59        }
60    });
61}
62
63/// Fine-grained event emitted during agent loop execution. Captures tool
64/// calls, LLM calls, interventions, compaction, and phase changes so
65/// downstream consumers (portal, IDE hosts, cloud runners) can display
66/// execution traces without reconstructing them from raw JSON.
67#[derive(Debug, Clone, serde::Serialize)]
68#[serde(tag = "type", rename_all = "snake_case")]
69pub enum AgentTraceEvent {
70    LlmCall {
71        call_id: String,
72        model: String,
73        input_tokens: i64,
74        output_tokens: i64,
75        cache_tokens: i64,
76        duration_ms: u64,
77        iteration: usize,
78    },
79    ToolExecution {
80        tool_name: String,
81        tool_use_id: String,
82        duration_ms: u64,
83        status: String,
84        classification: String,
85        iteration: usize,
86    },
87    ToolRejected {
88        tool_name: String,
89        reason: String,
90        iteration: usize,
91    },
92    LoopIntervention {
93        tool_name: String,
94        kind: String,
95        count: usize,
96        iteration: usize,
97    },
98    ContextCompaction {
99        archived_messages: usize,
100        new_summary_len: usize,
101        iteration: usize,
102    },
103    PhaseChange {
104        from_phase: String,
105        to_phase: String,
106        iteration: usize,
107    },
108    LoopComplete {
109        status: String,
110        iterations: usize,
111        total_duration_ms: u64,
112        tools_used: Vec<String>,
113        successful_tools: Vec<String>,
114    },
115    /// Emitted when `llm_call` re-prompts the model after the previous
116    /// response failed `output_schema` validation. One event per retry;
117    /// `attempt` counts retries (the initial call is attempt 0 and
118    /// produces no event; the first retry emits `attempt: 1`).
119    ///
120    /// The retry does **not** persist the invalid response — the
121    /// original messages are replayed with a single appended user-role
122    /// correction that cites the validation errors and schema. That
123    /// correction text is surfaced here as `correction_prompt` so
124    /// transcripts show both why the retry happened and what was sent.
125    SchemaRetry {
126        attempt: usize,
127        errors: Vec<String>,
128        nudge_used: bool,
129        correction_prompt: String,
130    },
131    TypedCheckpoint {
132        name: String,
133        status: String,
134        checkpoint_attempts: usize,
135        llm_attempts: usize,
136        error_category: Option<String>,
137        errors: Vec<String>,
138        repaired: bool,
139        final_accepted: bool,
140        raw_text: String,
141    },
142    NativeToolFallback {
143        iteration: usize,
144        accepted: bool,
145        policy: String,
146        fallback_index: usize,
147        tool_call_count: usize,
148    },
149    EmptyCompletionRetry {
150        iteration: usize,
151        attempt: usize,
152        error: String,
153    },
154}
155
156thread_local! {
157    static AGENT_TRACE: RefCell<Vec<AgentTraceEvent>> = const { RefCell::new(Vec::new()) };
158}
159
160/// Emit an agent trace event.
161pub(crate) fn emit_agent_event(event: AgentTraceEvent) {
162    AGENT_TRACE.with(|v| v.borrow_mut().push(event));
163}
164
165/// Get and clear the agent trace log.
166pub fn take_agent_trace() -> Vec<AgentTraceEvent> {
167    AGENT_TRACE.with(|v| std::mem::take(&mut *v.borrow_mut()))
168}
169
170/// Clone the current agent trace log without consuming it.
171pub fn peek_agent_trace() -> Vec<AgentTraceEvent> {
172    AGENT_TRACE.with(|v| v.borrow().clone())
173}
174
175/// Produce a rolled-up summary of agent trace events as JSON.
176pub fn agent_trace_summary() -> serde_json::Value {
177    AGENT_TRACE.with(|v| {
178        let events = v.borrow();
179        let mut llm_calls = 0usize;
180        let mut tool_executions = 0usize;
181        let mut tool_rejections = 0usize;
182        let mut interventions = 0usize;
183        let mut compactions = 0usize;
184        let mut native_text_tool_fallbacks = 0usize;
185        let mut native_text_tool_fallback_rejections = 0usize;
186        let mut empty_completion_retries = 0usize;
187        let mut typed_checkpoints = 0usize;
188        let mut typed_checkpoint_failures = 0usize;
189        let mut total_input_tokens = 0i64;
190        let mut total_output_tokens = 0i64;
191        let mut total_llm_duration_ms = 0u64;
192        let mut total_tool_duration_ms = 0u64;
193        let mut tools_used: Vec<String> = Vec::new();
194        let mut status = "unknown".to_string();
195        let mut iterations = 0usize;
196        let mut total_duration_ms = 0u64;
197
198        for event in events.iter() {
199            match event {
200                AgentTraceEvent::LlmCall {
201                    input_tokens,
202                    output_tokens,
203                    duration_ms,
204                    ..
205                } => {
206                    llm_calls += 1;
207                    total_input_tokens += input_tokens;
208                    total_output_tokens += output_tokens;
209                    total_llm_duration_ms += duration_ms;
210                }
211                AgentTraceEvent::ToolExecution {
212                    tool_name,
213                    duration_ms,
214                    ..
215                } => {
216                    tool_executions += 1;
217                    total_tool_duration_ms += duration_ms;
218                    if !tools_used.contains(tool_name) {
219                        tools_used.push(tool_name.clone());
220                    }
221                }
222                AgentTraceEvent::ToolRejected { .. } => {
223                    tool_rejections += 1;
224                }
225                AgentTraceEvent::LoopIntervention { .. } => {
226                    interventions += 1;
227                }
228                AgentTraceEvent::ContextCompaction { .. } => {
229                    compactions += 1;
230                }
231                AgentTraceEvent::PhaseChange { .. } => {}
232                AgentTraceEvent::LoopComplete {
233                    status: s,
234                    iterations: i,
235                    total_duration_ms: d,
236                    ..
237                } => {
238                    status = s.clone();
239                    iterations = *i;
240                    total_duration_ms = *d;
241                }
242                AgentTraceEvent::SchemaRetry { .. } => {}
243                AgentTraceEvent::TypedCheckpoint { final_accepted, .. } => {
244                    typed_checkpoints += 1;
245                    if !final_accepted {
246                        typed_checkpoint_failures += 1;
247                    }
248                }
249                AgentTraceEvent::NativeToolFallback { accepted, .. } => {
250                    native_text_tool_fallbacks += 1;
251                    if !accepted {
252                        native_text_tool_fallback_rejections += 1;
253                    }
254                }
255                AgentTraceEvent::EmptyCompletionRetry { .. } => {
256                    empty_completion_retries += 1;
257                }
258            }
259        }
260
261        serde_json::json!({
262            "status": status,
263            "iterations": iterations,
264            "total_duration_ms": total_duration_ms,
265            "llm_calls": llm_calls,
266            "tool_executions": tool_executions,
267            "tool_rejections": tool_rejections,
268            "interventions": interventions,
269            "compactions": compactions,
270            "native_text_tool_fallbacks": native_text_tool_fallbacks,
271            "native_text_tool_fallback_rejections": native_text_tool_fallback_rejections,
272            "empty_completion_retries": empty_completion_retries,
273            "typed_checkpoints": typed_checkpoints,
274            "typed_checkpoint_failures": typed_checkpoint_failures,
275            "total_input_tokens": total_input_tokens,
276            "total_output_tokens": total_output_tokens,
277            "total_llm_duration_ms": total_llm_duration_ms,
278            "total_tool_duration_ms": total_tool_duration_ms,
279            "tools_used": tools_used,
280        })
281    })
282}
283
284/// Reset agent trace state. Call between test runs.
285pub(crate) fn reset_agent_trace_state() {
286    AGENT_TRACE.with(|v| v.borrow_mut().clear());
287}