Skip to main content

lash_trace/
lib.rs

1use std::collections::BTreeMap;
2use std::fs::OpenOptions;
3use std::io::{self, Write};
4use std::path::{Path, PathBuf};
5use std::sync::Mutex;
6
7use serde::{Deserialize, Serialize};
8use serde_json::Value;
9use sha2::{Digest, Sha256};
10
11#[cfg(feature = "otel")]
12pub mod otel;
13
14pub const TRACE_SCHEMA_VERSION: u32 = 2;
15
16#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
17#[serde(rename_all = "snake_case")]
18pub enum TraceLevel {
19    #[default]
20    Standard,
21    Extended,
22}
23
24impl TraceLevel {
25    pub fn is_extended(self) -> bool {
26        matches!(self, Self::Extended)
27    }
28}
29
30#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
31pub struct TraceContext {
32    #[serde(default, skip_serializing_if = "Option::is_none")]
33    pub run_id: Option<String>,
34    #[serde(default, skip_serializing_if = "Option::is_none")]
35    pub experiment_id: Option<String>,
36    #[serde(default, skip_serializing_if = "Option::is_none")]
37    pub candidate_id: Option<String>,
38    #[serde(default, skip_serializing_if = "Option::is_none")]
39    pub candidate_parent_id: Option<String>,
40    #[serde(default, skip_serializing_if = "Option::is_none")]
41    pub example_id: Option<String>,
42    #[serde(default, skip_serializing_if = "Option::is_none")]
43    pub split: Option<String>,
44    #[serde(default, skip_serializing_if = "Option::is_none")]
45    pub session_id: Option<String>,
46    #[serde(default, skip_serializing_if = "Option::is_none")]
47    pub turn_id: Option<String>,
48    #[serde(default, skip_serializing_if = "Option::is_none")]
49    pub graph_node_id: Option<String>,
50    #[serde(default, skip_serializing_if = "Option::is_none")]
51    pub parent_graph_node_id: Option<String>,
52    #[serde(default, skip_serializing_if = "Option::is_none")]
53    pub turn_index: Option<usize>,
54    #[serde(default, skip_serializing_if = "Option::is_none")]
55    pub mode_iteration: Option<usize>,
56    #[serde(default, skip_serializing_if = "Option::is_none")]
57    pub effect_id: Option<String>,
58    #[serde(default, skip_serializing_if = "Option::is_none")]
59    pub llm_call_id: Option<String>,
60    /// When the LLM call was issued from inside a tool's `execute` via
61    /// `direct_completion`, this carries the originating tool's call id so
62    /// the renderer can group fan-outs (e.g. tournament_rerank's batch
63    /// reranks) under their parent tool.
64    #[serde(default, skip_serializing_if = "Option::is_none")]
65    pub originating_tool_call_id: Option<String>,
66    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
67    pub metadata: BTreeMap<String, Value>,
68}
69
70impl TraceContext {
71    pub fn for_session(mut self, session_id: impl Into<String>) -> Self {
72        self.session_id = Some(session_id.into());
73        self
74    }
75
76    pub fn for_turn_index(mut self, turn_index: usize) -> Self {
77        self.turn_index = Some(turn_index);
78        self
79    }
80
81    pub fn for_turn(mut self, turn_id: impl Into<String>) -> Self {
82        self.turn_id = Some(turn_id.into());
83        self
84    }
85
86    pub fn for_mode_iteration(mut self, mode_iteration: usize) -> Self {
87        self.mode_iteration = Some(mode_iteration);
88        self
89    }
90
91    pub fn for_llm_call(mut self, llm_call_id: impl Into<String>) -> Self {
92        self.llm_call_id = Some(llm_call_id.into());
93        self
94    }
95
96    pub fn for_originating_tool_call(mut self, tool_call_id: impl Into<String>) -> Self {
97        self.originating_tool_call_id = Some(tool_call_id.into());
98        self
99    }
100}
101
102#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
103pub struct TraceRecord {
104    pub schema_version: u32,
105    pub id: String,
106    pub timestamp: String,
107    pub context: TraceContext,
108    #[serde(flatten)]
109    pub event: TraceEvent,
110}
111
112impl TraceRecord {
113    pub fn new(context: TraceContext, event: TraceEvent) -> Self {
114        Self::new_with_timestamp(context, event, chrono::Utc::now())
115    }
116
117    pub fn new_with_timestamp(
118        context: TraceContext,
119        event: TraceEvent,
120        timestamp: chrono::DateTime<chrono::Utc>,
121    ) -> Self {
122        Self {
123            schema_version: TRACE_SCHEMA_VERSION,
124            id: uuid::Uuid::new_v4().to_string(),
125            timestamp: timestamp.to_rfc3339(),
126            context,
127            event,
128        }
129    }
130}
131
132#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
133#[serde(tag = "type", rename_all = "snake_case")]
134pub enum TraceEvent {
135    SessionStarted {
136        #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
137        metadata: BTreeMap<String, Value>,
138    },
139    TurnStarted {
140        #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
141        metadata: BTreeMap<String, Value>,
142    },
143    PromptBuilt {
144        prompt_hash: String,
145        prompt_chars: usize,
146        #[serde(default, skip_serializing_if = "Vec::is_empty")]
147        components: Vec<TracePromptComponent>,
148    },
149    LlmCallStarted {
150        request: TraceLlmRequest,
151    },
152    LlmCallCompleted {
153        response: TraceLlmResponse,
154        #[serde(default, skip_serializing_if = "Option::is_none")]
155        usage: Option<TraceTokenUsage>,
156        #[serde(default, skip_serializing_if = "Option::is_none")]
157        provider_usage: Option<Value>,
158        #[serde(default, skip_serializing_if = "Option::is_none")]
159        stream_summary: Option<Value>,
160    },
161    LlmCallFailed {
162        error: TraceError,
163        #[serde(default, skip_serializing_if = "Option::is_none")]
164        stream_summary: Option<Value>,
165    },
166    ProviderStreamEvent {
167        event: TraceProviderStreamEvent,
168    },
169    RuntimeStreamEvent {
170        event: TraceRuntimeStreamEvent,
171    },
172    ToolCallStarted {
173        call_id: Option<String>,
174        name: String,
175        args: Value,
176    },
177    ToolCallCompleted {
178        call_id: Option<String>,
179        name: String,
180        args: Value,
181        output: TraceToolCallOutput,
182        duration_ms: u64,
183    },
184    ModeStep {
185        mode: String,
186        payload: Value,
187    },
188    TokenUsage {
189        usage: TraceTokenUsage,
190        #[serde(default, skip_serializing_if = "Option::is_none")]
191        cumulative: Option<TraceTokenUsage>,
192    },
193    TurnCompleted {
194        status: String,
195        done_reason: String,
196        #[serde(default, skip_serializing_if = "Option::is_none")]
197        handoff: Option<TraceHandoff>,
198    },
199    Custom {
200        name: String,
201        payload: Value,
202    },
203}
204
205#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
206pub struct TraceToolCallOutput {
207    pub outcome: TraceToolCallOutcome,
208    #[serde(default, skip_serializing_if = "Option::is_none")]
209    pub control: Option<Value>,
210}
211
212impl TraceToolCallOutput {
213    pub fn status(&self) -> TraceToolCallStatus {
214        match self.outcome {
215            TraceToolCallOutcome::Success(_) => TraceToolCallStatus::Success,
216            TraceToolCallOutcome::Failure(_) => TraceToolCallStatus::Failure,
217            TraceToolCallOutcome::Cancelled(_) => TraceToolCallStatus::Cancelled,
218        }
219    }
220
221    pub fn is_success(&self) -> bool {
222        self.status() == TraceToolCallStatus::Success
223    }
224
225    pub fn value_for_projection(&self) -> Value {
226        match &self.outcome {
227            TraceToolCallOutcome::Success(value)
228            | TraceToolCallOutcome::Failure(value)
229            | TraceToolCallOutcome::Cancelled(value) => value.clone(),
230        }
231    }
232}
233
234#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
235#[serde(tag = "status", content = "payload", rename_all = "snake_case")]
236pub enum TraceToolCallOutcome {
237    Success(Value),
238    Failure(Value),
239    Cancelled(Value),
240}
241
242#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
243#[serde(rename_all = "snake_case")]
244pub enum TraceToolCallStatus {
245    Success,
246    Failure,
247    Cancelled,
248}
249
250#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
251pub struct TracePromptComponent {
252    pub id: String,
253    pub kind: String,
254    pub hash: String,
255    #[serde(default, skip_serializing_if = "Option::is_none")]
256    pub chars: Option<usize>,
257}
258
259#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
260pub struct TraceLlmRequest {
261    pub model: String,
262    #[serde(default, skip_serializing_if = "Option::is_none")]
263    pub model_variant: Option<String>,
264    pub messages: Vec<TraceLlmMessage>,
265    #[serde(default, skip_serializing_if = "Vec::is_empty")]
266    pub attachments: Vec<TraceAttachment>,
267    #[serde(default, skip_serializing_if = "Vec::is_empty")]
268    pub tools: Vec<TraceToolSpec>,
269    pub tool_choice: String,
270    #[serde(default, skip_serializing_if = "Option::is_none")]
271    pub output_spec: Option<Value>,
272    pub stream: bool,
273}
274
275#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
276pub struct TraceLlmMessage {
277    pub role: String,
278    pub blocks: Vec<TraceContentBlock>,
279}
280
281#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
282#[serde(tag = "kind", rename_all = "snake_case")]
283pub enum TraceContentBlock {
284    Text {
285        text: String,
286        #[serde(default, skip_serializing_if = "is_false")]
287        cache_breakpoint: bool,
288    },
289    Image {
290        attachment_idx: usize,
291    },
292    ToolCall {
293        call_id: Option<String>,
294        tool_name: String,
295        input_json: Value,
296        item_id: Option<String>,
297        has_signature: bool,
298    },
299    ToolResult {
300        call_id: Option<String>,
301        tool_name: Option<String>,
302        content: String,
303    },
304    Reasoning {
305        text: String,
306        item_id: Option<String>,
307        summary: Vec<String>,
308        has_encrypted: bool,
309        redacted: bool,
310    },
311}
312
313fn is_false(value: &bool) -> bool {
314    !*value
315}
316
317#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
318pub struct TraceAttachment {
319    pub mime: String,
320    #[serde(default, skip_serializing_if = "Option::is_none")]
321    pub filename: Option<String>,
322    #[serde(default, skip_serializing_if = "Option::is_none")]
323    pub bytes_sha256: Option<String>,
324    #[serde(default, skip_serializing_if = "Option::is_none")]
325    pub bytes_len: Option<usize>,
326}
327
328#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
329pub struct TraceToolSpec {
330    pub name: String,
331    pub description: String,
332    pub input_schema: Value,
333    pub output_schema: Value,
334}
335
336#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
337pub struct TraceLlmResponse {
338    pub text: String,
339    pub duration_ms: u64,
340    #[serde(default, skip_serializing_if = "Option::is_none")]
341    pub terminal_reason: Option<String>,
342    #[serde(default, skip_serializing_if = "Option::is_none")]
343    pub parts: Option<Value>,
344}
345
346#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
347pub struct TraceProviderStreamEvent {
348    pub provider: String,
349    pub sequence: u64,
350    pub elapsed_ms: u64,
351    pub event_name: String,
352    #[serde(default, skip_serializing_if = "Option::is_none")]
353    pub item_id: Option<String>,
354    #[serde(default, skip_serializing_if = "Option::is_none")]
355    pub output_index: Option<i64>,
356    pub raw_len: usize,
357    pub raw_sha256: String,
358    #[serde(default, skip_serializing_if = "Option::is_none")]
359    pub raw_json: Option<Value>,
360}
361
362#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
363pub struct TraceRuntimeStreamEvent {
364    pub sequence: u64,
365    pub elapsed_ms: u64,
366    pub event_name: String,
367    #[serde(default, skip_serializing_if = "Option::is_none")]
368    pub raw_text: Option<String>,
369    #[serde(default, skip_serializing_if = "Option::is_none")]
370    pub visible_text: Option<String>,
371    #[serde(default, skip_serializing_if = "Option::is_none")]
372    pub item_id: Option<String>,
373    #[serde(default, skip_serializing_if = "Option::is_none")]
374    pub output_index: Option<i64>,
375    #[serde(default, skip_serializing_if = "Option::is_none")]
376    pub call_id: Option<String>,
377    #[serde(default, skip_serializing_if = "Option::is_none")]
378    pub tool_name: Option<String>,
379    #[serde(default, skip_serializing_if = "Option::is_none")]
380    pub input_json: Option<Value>,
381    #[serde(default, skip_serializing_if = "Option::is_none")]
382    pub usage: Option<TraceTokenUsage>,
383}
384
385#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
386pub struct TraceTokenUsage {
387    pub input_tokens: i64,
388    pub output_tokens: i64,
389    pub cached_input_tokens: i64,
390    pub reasoning_tokens: i64,
391}
392
393#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
394pub struct TraceHandoff {
395    pub successor_session_id: String,
396}
397
398#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
399pub struct TraceError {
400    pub message: String,
401    pub retryable: bool,
402    #[serde(default, skip_serializing_if = "Option::is_none")]
403    pub terminal_reason: Option<String>,
404    #[serde(default, skip_serializing_if = "Option::is_none")]
405    pub code: Option<String>,
406    #[serde(default, skip_serializing_if = "Option::is_none")]
407    pub raw: Option<String>,
408}
409
410#[derive(Debug, thiserror::Error)]
411pub enum TraceSinkError {
412    #[error("failed to serialize trace record: {0}")]
413    Serialize(#[from] serde_json::Error),
414    #[error("trace sink lock poisoned")]
415    LockPoisoned,
416    #[error("failed to create trace directory {path}: {source}")]
417    CreateDir { path: PathBuf, source: io::Error },
418    #[error("failed to open trace file {path}: {source}")]
419    Open { path: PathBuf, source: io::Error },
420    #[error("failed to write trace file {path}: {source}")]
421    Write { path: PathBuf, source: io::Error },
422}
423
424pub trait TraceSink: Send + Sync {
425    fn append(&self, record: &TraceRecord) -> Result<(), TraceSinkError>;
426}
427
428pub struct JsonlTraceSink {
429    path: PathBuf,
430    lock: Mutex<()>,
431}
432
433impl JsonlTraceSink {
434    pub fn new(path: impl Into<PathBuf>) -> Self {
435        Self {
436            path: path.into(),
437            lock: Mutex::new(()),
438        }
439    }
440
441    pub fn path(&self) -> &Path {
442        &self.path
443    }
444}
445
446impl TraceSink for JsonlTraceSink {
447    fn append(&self, record: &TraceRecord) -> Result<(), TraceSinkError> {
448        let line = serde_json::to_string(record)?;
449        let _guard = self.lock.lock().map_err(|_| TraceSinkError::LockPoisoned)?;
450        if let Some(parent) = self.path.parent()
451            && !parent.as_os_str().is_empty()
452        {
453            std::fs::create_dir_all(parent).map_err(|source| TraceSinkError::CreateDir {
454                path: parent.to_path_buf(),
455                source,
456            })?;
457        }
458        let mut file = OpenOptions::new()
459            .create(true)
460            .append(true)
461            .open(&self.path)
462            .map_err(|source| TraceSinkError::Open {
463                path: self.path.clone(),
464                source,
465            })?;
466        writeln!(file, "{line}").map_err(|source| TraceSinkError::Write {
467            path: self.path.clone(),
468            source,
469        })
470    }
471}
472
473pub fn sha256_hex(input: impl AsRef<[u8]>) -> String {
474    let mut hasher = Sha256::new();
475    hasher.update(input.as_ref());
476    format!("{:x}", hasher.finalize())
477}
478
479pub fn json_hash(value: &Value) -> String {
480    sha256_hex(serde_json::to_vec(value).unwrap_or_default())
481}
482
483#[cfg(test)]
484mod tests {
485    use super::*;
486
487    #[test]
488    fn jsonl_sink_writes_record() {
489        let dir = std::env::temp_dir().join(format!("lash-trace-{}", uuid::Uuid::new_v4()));
490        std::fs::create_dir_all(&dir).unwrap();
491        let path = dir.join("trace.jsonl");
492        let sink = JsonlTraceSink::new(&path);
493        sink.append(&TraceRecord::new(
494            TraceContext::default().for_session("root"),
495            TraceEvent::Custom {
496                name: "test.event".to_string(),
497                payload: serde_json::json!({"ok": true}),
498            },
499        ))
500        .unwrap();
501        let text = std::fs::read_to_string(&path).unwrap();
502        assert!(text.contains("\"type\":\"custom\""));
503        assert!(text.contains("\"session_id\":\"root\""));
504    }
505
506    #[test]
507    fn tool_start_and_handoff_records_are_jsonl_shaped() {
508        let started = TraceRecord::new(
509            TraceContext::default().for_session("root"),
510            TraceEvent::ToolCallStarted {
511                call_id: Some("call-1".to_string()),
512                name: "read_file".to_string(),
513                args: serde_json::json!({"path": "README.md"}),
514            },
515        );
516        let completed = TraceRecord::new(
517            TraceContext::default().for_session("root"),
518            TraceEvent::TurnCompleted {
519                status: "completed".to_string(),
520                done_reason: "modelstop".to_string(),
521                handoff: Some(TraceHandoff {
522                    successor_session_id: "child-1".to_string(),
523                }),
524            },
525        );
526
527        let started_json = serde_json::to_value(started).unwrap();
528        assert_eq!(started_json["type"], "tool_call_started");
529        assert_eq!(started_json["call_id"], "call-1");
530
531        let completed_json = serde_json::to_value(completed).unwrap();
532        assert_eq!(completed_json["type"], "turn_completed");
533        assert_eq!(completed_json["handoff"]["successor_session_id"], "child-1");
534    }
535
536    #[test]
537    fn tool_completion_serializes_typed_failure_output() {
538        let record = TraceRecord::new(
539            TraceContext::default().for_session("root"),
540            TraceEvent::ToolCallCompleted {
541                call_id: Some("call-1".to_string()),
542                name: "read_file".to_string(),
543                args: serde_json::json!({"path": "missing"}),
544                output: TraceToolCallOutput {
545                    outcome: TraceToolCallOutcome::Failure(serde_json::json!({
546                        "class": "invalid_request",
547                        "code": "invalid_tool_args",
548                        "message": "bad args",
549                        "source": "runtime",
550                        "retry": { "type": "never" },
551                        "raw": { "path": "missing" }
552                    })),
553                    control: None,
554                },
555                duration_ms: 3,
556            },
557        );
558
559        let json = serde_json::to_value(record).unwrap();
560        assert_eq!(json["type"], "tool_call_completed");
561        assert_eq!(json["output"]["outcome"]["status"], "failure");
562        assert_eq!(
563            json["output"]["outcome"]["payload"]["code"],
564            "invalid_tool_args"
565        );
566        assert_eq!(
567            json["output"]["outcome"]["payload"]["raw"]["path"],
568            "missing"
569        );
570    }
571
572    #[test]
573    fn jsonl_sink_creates_parent_directories() {
574        let dir = std::env::temp_dir().join(format!("lash-trace-{}", uuid::Uuid::new_v4()));
575        let path = dir.join("nested").join("trace.jsonl");
576        let sink = JsonlTraceSink::new(&path);
577        sink.append(&TraceRecord::new(
578            TraceContext::default().for_session("root"),
579            TraceEvent::RuntimeStreamEvent {
580                event: TraceRuntimeStreamEvent {
581                    sequence: 1,
582                    elapsed_ms: 0,
583                    event_name: "delta".to_string(),
584                    raw_text: Some("hello".to_string()),
585                    visible_text: Some("hello".to_string()),
586                    item_id: None,
587                    output_index: None,
588                    call_id: None,
589                    tool_name: None,
590                    input_json: None,
591                    usage: None,
592                },
593            },
594        ))
595        .unwrap();
596        assert!(path.exists());
597        let _ = std::fs::remove_dir_all(dir);
598    }
599}