Skip to main content

harn_vm/llm/
trace.rs

1use std::cell::RefCell;
2
3/// A single LLM call trace entry.
4#[derive(Debug, Clone)]
5pub struct LlmTraceEntry {
6    pub model: String,
7    pub input_tokens: i64,
8    pub output_tokens: i64,
9    pub duration_ms: u64,
10}
11
12thread_local! {
13    static LLM_TRACE: RefCell<Vec<LlmTraceEntry>> = const { RefCell::new(Vec::new()) };
14    static LLM_TRACING_ENABLED: RefCell<bool> = const { RefCell::new(false) };
15}
16
17/// Enable LLM tracing for the current thread.
18pub fn enable_tracing() {
19    LLM_TRACING_ENABLED.with(|v| *v.borrow_mut() = true);
20}
21
22/// Get and clear the trace log.
23pub fn take_trace() -> Vec<LlmTraceEntry> {
24    LLM_TRACE.with(|v| std::mem::take(&mut *v.borrow_mut()))
25}
26
27/// Clone the current trace log without consuming it.
28pub fn peek_trace() -> Vec<LlmTraceEntry> {
29    LLM_TRACE.with(|v| v.borrow().clone())
30}
31
32/// Summarize trace usage without consuming entries.
33pub fn peek_trace_summary() -> (i64, i64, i64, i64) {
34    LLM_TRACE.with(|v| {
35        let entries = v.borrow();
36        let mut input = 0i64;
37        let mut output = 0i64;
38        let mut duration = 0i64;
39        let count = entries.len() as i64;
40        for e in entries.iter() {
41            input += e.input_tokens;
42            output += e.output_tokens;
43            duration += e.duration_ms as i64;
44        }
45        (input, output, duration, count)
46    })
47}
48
49/// Reset thread-local trace state. Call between test runs.
50pub(crate) fn reset_trace_state() {
51    LLM_TRACE.with(|v| v.borrow_mut().clear());
52    LLM_TRACING_ENABLED.with(|v| *v.borrow_mut() = false);
53}
54
55pub(crate) fn trace_llm_call(entry: LlmTraceEntry) {
56    LLM_TRACING_ENABLED.with(|enabled| {
57        if *enabled.borrow() {
58            LLM_TRACE.with(|v| v.borrow_mut().push(entry));
59        }
60    });
61}
62
63/// Fine-grained event emitted during agent loop execution. Captures tool
64/// calls, LLM calls, interventions, compaction, and phase changes so
65/// downstream consumers (portal, burin-code) can display execution traces
66/// without reconstructing them from raw JSON.
67#[derive(Debug, Clone, serde::Serialize)]
68#[serde(tag = "type", rename_all = "snake_case")]
69pub enum AgentTraceEvent {
70    LlmCall {
71        call_id: String,
72        model: String,
73        input_tokens: i64,
74        output_tokens: i64,
75        cache_tokens: i64,
76        duration_ms: u64,
77        iteration: usize,
78    },
79    ToolExecution {
80        tool_name: String,
81        tool_use_id: String,
82        duration_ms: u64,
83        status: String,
84        classification: String,
85        iteration: usize,
86    },
87    ToolRejected {
88        tool_name: String,
89        reason: String,
90        iteration: usize,
91    },
92    LoopIntervention {
93        tool_name: String,
94        kind: String,
95        count: usize,
96        iteration: usize,
97    },
98    ContextCompaction {
99        archived_messages: usize,
100        new_summary_len: usize,
101        iteration: usize,
102    },
103    PhaseChange {
104        from_phase: String,
105        to_phase: String,
106        iteration: usize,
107    },
108    LoopComplete {
109        status: String,
110        iterations: usize,
111        total_duration_ms: u64,
112        tools_used: Vec<String>,
113        successful_tools: Vec<String>,
114    },
115    /// Emitted when `llm_call` re-prompts the model after the previous
116    /// response failed `output_schema` validation. One event per retry;
117    /// `attempt` counts retries (the initial call is attempt 0 and
118    /// produces no event; the first retry emits `attempt: 1`).
119    SchemaRetry {
120        attempt: usize,
121        errors: Vec<String>,
122        nudge_used: bool,
123    },
124}
125
126thread_local! {
127    static AGENT_TRACE: RefCell<Vec<AgentTraceEvent>> = const { RefCell::new(Vec::new()) };
128}
129
130/// Emit an agent trace event.
131pub(crate) fn emit_agent_event(event: AgentTraceEvent) {
132    AGENT_TRACE.with(|v| v.borrow_mut().push(event));
133}
134
135/// Get and clear the agent trace log.
136pub fn take_agent_trace() -> Vec<AgentTraceEvent> {
137    AGENT_TRACE.with(|v| std::mem::take(&mut *v.borrow_mut()))
138}
139
140/// Clone the current agent trace log without consuming it.
141pub fn peek_agent_trace() -> Vec<AgentTraceEvent> {
142    AGENT_TRACE.with(|v| v.borrow().clone())
143}
144
145/// Produce a rolled-up summary of agent trace events as JSON.
146pub fn agent_trace_summary() -> serde_json::Value {
147    AGENT_TRACE.with(|v| {
148        let events = v.borrow();
149        let mut llm_calls = 0usize;
150        let mut tool_executions = 0usize;
151        let mut tool_rejections = 0usize;
152        let mut interventions = 0usize;
153        let mut compactions = 0usize;
154        let mut total_input_tokens = 0i64;
155        let mut total_output_tokens = 0i64;
156        let mut total_llm_duration_ms = 0u64;
157        let mut total_tool_duration_ms = 0u64;
158        let mut tools_used: Vec<String> = Vec::new();
159        let mut status = "unknown".to_string();
160        let mut iterations = 0usize;
161        let mut total_duration_ms = 0u64;
162
163        for event in events.iter() {
164            match event {
165                AgentTraceEvent::LlmCall {
166                    input_tokens,
167                    output_tokens,
168                    duration_ms,
169                    ..
170                } => {
171                    llm_calls += 1;
172                    total_input_tokens += input_tokens;
173                    total_output_tokens += output_tokens;
174                    total_llm_duration_ms += duration_ms;
175                }
176                AgentTraceEvent::ToolExecution {
177                    tool_name,
178                    duration_ms,
179                    ..
180                } => {
181                    tool_executions += 1;
182                    total_tool_duration_ms += duration_ms;
183                    if !tools_used.contains(tool_name) {
184                        tools_used.push(tool_name.clone());
185                    }
186                }
187                AgentTraceEvent::ToolRejected { .. } => {
188                    tool_rejections += 1;
189                }
190                AgentTraceEvent::LoopIntervention { .. } => {
191                    interventions += 1;
192                }
193                AgentTraceEvent::ContextCompaction { .. } => {
194                    compactions += 1;
195                }
196                AgentTraceEvent::PhaseChange { .. } => {}
197                AgentTraceEvent::LoopComplete {
198                    status: s,
199                    iterations: i,
200                    total_duration_ms: d,
201                    ..
202                } => {
203                    status = s.clone();
204                    iterations = *i;
205                    total_duration_ms = *d;
206                }
207                AgentTraceEvent::SchemaRetry { .. } => {}
208            }
209        }
210
211        serde_json::json!({
212            "status": status,
213            "iterations": iterations,
214            "total_duration_ms": total_duration_ms,
215            "llm_calls": llm_calls,
216            "tool_executions": tool_executions,
217            "tool_rejections": tool_rejections,
218            "interventions": interventions,
219            "compactions": compactions,
220            "total_input_tokens": total_input_tokens,
221            "total_output_tokens": total_output_tokens,
222            "total_llm_duration_ms": total_llm_duration_ms,
223            "total_tool_duration_ms": total_tool_duration_ms,
224            "tools_used": tools_used,
225        })
226    })
227}
228
229/// Reset agent trace state. Call between test runs.
230pub(crate) fn reset_agent_trace_state() {
231    AGENT_TRACE.with(|v| v.borrow_mut().clear());
232}