Skip to main content

obol_core/transcript/
claude.rs

1//! Claude Code session JSONL -> Vec<MessageUsage>.
2//! Reconciled with AgentsView internal/parser/claude.go (MIT, © 2026 Kenn Software LLC).
3
4use crate::error::ObolError;
5use crate::model::{MessageUsage, Provider};
6use serde_json::Value;
7
8#[derive(Debug, Default)]
9pub struct ClaudeParse {
10    pub usages: Vec<MessageUsage>,
11    pub malformed_lines: usize,
12}
13
14/// Parse a Claude session file. Within-file dedup: assistant entries sharing a
15/// `message.id` collapse to one, keeping the LAST entry's usage (streaming
16/// snapshots overwrite; never sum).
17pub fn parse(bytes: &[u8]) -> Result<ClaudeParse, ObolError> {
18    let text = std::str::from_utf8(bytes).map_err(|e| ObolError::MalformedTranscript {
19        line: 0,
20        msg: e.to_string(),
21    })?;
22
23    // Preserve first-seen order of message ids; overwrite usage on each new line.
24    let mut order: Vec<String> = Vec::new();
25    let mut by_id: std::collections::HashMap<String, MessageUsage> =
26        std::collections::HashMap::new();
27    let mut out = ClaudeParse::default();
28
29    for line in text.lines() {
30        let line = line.trim();
31        if line.is_empty() {
32            continue;
33        }
34        let v: Value = match serde_json::from_str(line) {
35            Ok(v) => v,
36            Err(_) => {
37                out.malformed_lines += 1;
38                continue;
39            }
40        };
41        if v.get("type").and_then(Value::as_str) != Some("assistant") {
42            continue;
43        }
44        if v.get("isMeta").and_then(Value::as_bool) == Some(true) {
45            continue;
46        }
47        if v.get("isCompactSummary").and_then(Value::as_bool) == Some(true) {
48            continue;
49        }
50        let msg = match v.get("message") {
51            Some(m) => m,
52            None => continue,
53        };
54        let usage = match msg.get("usage") {
55            Some(u) if u.is_object() => u,
56            _ => continue,
57        };
58
59        let id = msg
60            .get("id")
61            .and_then(Value::as_str)
62            .unwrap_or("")
63            .to_string();
64        let g = |k: &str| usage.get(k).and_then(Value::as_u64).unwrap_or(0);
65        let input = g("input_tokens");
66        let cache_read = g("cache_read_input_tokens");
67        let cache_creation = g("cache_creation_input_tokens");
68        let (cw5, cw1) = match usage.get("cache_creation") {
69            Some(cc) if cc.is_object() => (
70                cc.get("ephemeral_5m_input_tokens")
71                    .and_then(Value::as_u64)
72                    .unwrap_or(0),
73                cc.get("ephemeral_1h_input_tokens")
74                    .and_then(Value::as_u64)
75                    .unwrap_or(0),
76            ),
77            _ => (cache_creation, 0), // no split -> treat all creation as 5m
78        };
79
80        let mu = MessageUsage {
81            model: msg
82                .get("model")
83                .and_then(Value::as_str)
84                .unwrap_or("")
85                .to_string(),
86            provider: Provider::Anthropic,
87            namespace: "litellm".into(),
88            input_uncached: input,
89            cache_read,
90            cache_write_5m: cw5,
91            cache_write_1h: cw1,
92            output: g("output_tokens"),
93            request_input_tokens: input + cache_read + cache_creation,
94            service_tier: usage
95                .get("service_tier")
96                .and_then(Value::as_str)
97                .map(String::from),
98        };
99
100        let key = if id.is_empty() {
101            format!("__anon_{}", order.len())
102        } else {
103            id
104        };
105        if !by_id.contains_key(&key) {
106            order.push(key.clone());
107        }
108        by_id.insert(key, mu); // last-write-wins
109    }
110
111    out.usages = order.into_iter().filter_map(|k| by_id.remove(&k)).collect();
112    Ok(out)
113}
114
115#[cfg(test)]
116mod tests {
117    use super::*;
118
119    #[test]
120    fn dedups_streaming_and_keeps_last_usage() {
121        let bytes = include_bytes!("../../tests/fixtures/claude-mini.jsonl");
122        let p = parse(bytes).unwrap();
123        // msg_a (collapsed) + msg_b = 2 usages
124        assert_eq!(p.usages.len(), 2, "usages: {:?}", p.usages);
125        assert_eq!(p.malformed_lines, 1);
126
127        let a = &p.usages[0];
128        assert_eq!(a.output, 9, "should keep LAST msg_a output, not sum");
129        assert_eq!(a.input_uncached, 12);
130        assert_eq!(a.cache_read, 120);
131        assert_eq!(a.cache_write_5m, 60);
132        assert_eq!(a.request_input_tokens, 12 + 120 + 60);
133
134        let b = &p.usages[1];
135        assert_eq!(b.output, 7);
136        assert_eq!(b.input_uncached, 0); // present-and-zero, still a real record
137    }
138}