Skip to main content

harn_vm/llm/
trace.rs

1use std::cell::RefCell;
2
3/// A single LLM call trace entry.
4#[derive(Debug, Clone)]
5pub struct LlmTraceEntry {
6    pub model: String,
7    pub input_tokens: i64,
8    pub output_tokens: i64,
9    pub duration_ms: u64,
10}
11
12thread_local! {
13    static LLM_TRACE: RefCell<Vec<LlmTraceEntry>> = const { RefCell::new(Vec::new()) };
14    static LLM_TRACING_ENABLED: RefCell<bool> = const { RefCell::new(false) };
15}
16
17/// Enable LLM tracing for the current thread.
18pub fn enable_tracing() {
19    LLM_TRACING_ENABLED.with(|v| *v.borrow_mut() = true);
20}
21
22/// Get and clear the trace log.
23pub fn take_trace() -> Vec<LlmTraceEntry> {
24    LLM_TRACE.with(|v| std::mem::take(&mut *v.borrow_mut()))
25}
26
27/// Clone the current trace log without consuming it.
28pub fn peek_trace() -> Vec<LlmTraceEntry> {
29    LLM_TRACE.with(|v| v.borrow().clone())
30}
31
32/// Summarize trace usage without consuming entries.
33pub fn peek_trace_summary() -> (i64, i64, i64, i64) {
34    LLM_TRACE.with(|v| {
35        let entries = v.borrow();
36        let mut input = 0i64;
37        let mut output = 0i64;
38        let mut duration = 0i64;
39        let count = entries.len() as i64;
40        for e in entries.iter() {
41            input += e.input_tokens;
42            output += e.output_tokens;
43            duration += e.duration_ms as i64;
44        }
45        (input, output, duration, count)
46    })
47}
48
49/// Reset thread-local trace state. Call between test runs.
50pub(crate) fn reset_trace_state() {
51    LLM_TRACE.with(|v| v.borrow_mut().clear());
52    LLM_TRACING_ENABLED.with(|v| *v.borrow_mut() = false);
53}
54
55pub(crate) fn trace_llm_call(entry: LlmTraceEntry) {
56    LLM_TRACING_ENABLED.with(|enabled| {
57        if *enabled.borrow() {
58            LLM_TRACE.with(|v| v.borrow_mut().push(entry));
59        }
60    });
61}
62
63/// Fine-grained event emitted during agent loop execution. Captures tool
64/// calls, LLM calls, interventions, compaction, and phase changes so
65/// downstream consumers (portal, IDE hosts, cloud runners) can display
66/// execution traces without reconstructing them from raw JSON.
67#[derive(Debug, Clone, serde::Serialize)]
68#[serde(tag = "type", rename_all = "snake_case")]
69pub enum AgentTraceEvent {
70    LlmCall {
71        call_id: String,
72        model: String,
73        input_tokens: i64,
74        output_tokens: i64,
75        cache_tokens: i64,
76        duration_ms: u64,
77        iteration: usize,
78    },
79    ToolExecution {
80        tool_name: String,
81        tool_use_id: String,
82        duration_ms: u64,
83        status: String,
84        classification: String,
85        iteration: usize,
86    },
87    ToolRejected {
88        tool_name: String,
89        reason: String,
90        iteration: usize,
91    },
92    LoopIntervention {
93        tool_name: String,
94        kind: String,
95        count: usize,
96        iteration: usize,
97    },
98    ContextCompaction {
99        archived_messages: usize,
100        new_summary_len: usize,
101        iteration: usize,
102    },
103    PhaseChange {
104        from_phase: String,
105        to_phase: String,
106        iteration: usize,
107    },
108    LoopComplete {
109        status: String,
110        iterations: usize,
111        total_duration_ms: u64,
112        tools_used: Vec<String>,
113        successful_tools: Vec<String>,
114    },
115    /// Emitted when `llm_call` re-prompts the model after the previous
116    /// response failed `output_schema` validation. One event per retry;
117    /// `attempt` counts retries (the initial call is attempt 0 and
118    /// produces no event; the first retry emits `attempt: 1`).
119    ///
120    /// The retry does **not** persist the invalid response — the
121    /// original messages are replayed with a single appended user-role
122    /// correction that cites the validation errors and schema. That
123    /// correction text is surfaced here as `correction_prompt` so
124    /// transcripts show both why the retry happened and what was sent.
125    SchemaRetry {
126        attempt: usize,
127        errors: Vec<String>,
128        nudge_used: bool,
129        correction_prompt: String,
130    },
131    NativeToolFallback {
132        iteration: usize,
133        accepted: bool,
134        policy: String,
135        fallback_index: usize,
136        tool_call_count: usize,
137    },
138    EmptyCompletionRetry {
139        iteration: usize,
140        attempt: usize,
141        error: String,
142    },
143}
144
145thread_local! {
146    static AGENT_TRACE: RefCell<Vec<AgentTraceEvent>> = const { RefCell::new(Vec::new()) };
147}
148
149/// Emit an agent trace event.
150pub(crate) fn emit_agent_event(event: AgentTraceEvent) {
151    AGENT_TRACE.with(|v| v.borrow_mut().push(event));
152}
153
154/// Get and clear the agent trace log.
155pub fn take_agent_trace() -> Vec<AgentTraceEvent> {
156    AGENT_TRACE.with(|v| std::mem::take(&mut *v.borrow_mut()))
157}
158
159/// Clone the current agent trace log without consuming it.
160pub fn peek_agent_trace() -> Vec<AgentTraceEvent> {
161    AGENT_TRACE.with(|v| v.borrow().clone())
162}
163
164/// Produce a rolled-up summary of agent trace events as JSON.
165pub fn agent_trace_summary() -> serde_json::Value {
166    AGENT_TRACE.with(|v| {
167        let events = v.borrow();
168        let mut llm_calls = 0usize;
169        let mut tool_executions = 0usize;
170        let mut tool_rejections = 0usize;
171        let mut interventions = 0usize;
172        let mut compactions = 0usize;
173        let mut native_text_tool_fallbacks = 0usize;
174        let mut native_text_tool_fallback_rejections = 0usize;
175        let mut empty_completion_retries = 0usize;
176        let mut total_input_tokens = 0i64;
177        let mut total_output_tokens = 0i64;
178        let mut total_llm_duration_ms = 0u64;
179        let mut total_tool_duration_ms = 0u64;
180        let mut tools_used: Vec<String> = Vec::new();
181        let mut status = "unknown".to_string();
182        let mut iterations = 0usize;
183        let mut total_duration_ms = 0u64;
184
185        for event in events.iter() {
186            match event {
187                AgentTraceEvent::LlmCall {
188                    input_tokens,
189                    output_tokens,
190                    duration_ms,
191                    ..
192                } => {
193                    llm_calls += 1;
194                    total_input_tokens += input_tokens;
195                    total_output_tokens += output_tokens;
196                    total_llm_duration_ms += duration_ms;
197                }
198                AgentTraceEvent::ToolExecution {
199                    tool_name,
200                    duration_ms,
201                    ..
202                } => {
203                    tool_executions += 1;
204                    total_tool_duration_ms += duration_ms;
205                    if !tools_used.contains(tool_name) {
206                        tools_used.push(tool_name.clone());
207                    }
208                }
209                AgentTraceEvent::ToolRejected { .. } => {
210                    tool_rejections += 1;
211                }
212                AgentTraceEvent::LoopIntervention { .. } => {
213                    interventions += 1;
214                }
215                AgentTraceEvent::ContextCompaction { .. } => {
216                    compactions += 1;
217                }
218                AgentTraceEvent::PhaseChange { .. } => {}
219                AgentTraceEvent::LoopComplete {
220                    status: s,
221                    iterations: i,
222                    total_duration_ms: d,
223                    ..
224                } => {
225                    status = s.clone();
226                    iterations = *i;
227                    total_duration_ms = *d;
228                }
229                AgentTraceEvent::SchemaRetry { .. } => {}
230                AgentTraceEvent::NativeToolFallback { accepted, .. } => {
231                    native_text_tool_fallbacks += 1;
232                    if !accepted {
233                        native_text_tool_fallback_rejections += 1;
234                    }
235                }
236                AgentTraceEvent::EmptyCompletionRetry { .. } => {
237                    empty_completion_retries += 1;
238                }
239            }
240        }
241
242        serde_json::json!({
243            "status": status,
244            "iterations": iterations,
245            "total_duration_ms": total_duration_ms,
246            "llm_calls": llm_calls,
247            "tool_executions": tool_executions,
248            "tool_rejections": tool_rejections,
249            "interventions": interventions,
250            "compactions": compactions,
251            "native_text_tool_fallbacks": native_text_tool_fallbacks,
252            "native_text_tool_fallback_rejections": native_text_tool_fallback_rejections,
253            "empty_completion_retries": empty_completion_retries,
254            "total_input_tokens": total_input_tokens,
255            "total_output_tokens": total_output_tokens,
256            "total_llm_duration_ms": total_llm_duration_ms,
257            "total_tool_duration_ms": total_tool_duration_ms,
258            "tools_used": tools_used,
259        })
260    })
261}
262
263/// Reset agent trace state. Call between test runs.
264pub(crate) fn reset_agent_trace_state() {
265    AGENT_TRACE.with(|v| v.borrow_mut().clear());
266}