Skip to main content

obol_core/transcript/
claude.rs

1//! Claude Code session JSONL -> Vec<MessageUsage>.
2//! Reconciled with AgentsView internal/parser/claude.go (MIT, © 2026 Kenn Software LLC).
3
4use crate::error::ObolError;
5use crate::model::{MessageUsage, Provider};
6use serde_json::Value;
7
8#[derive(Debug, Default)]
9pub struct ClaudeParse {
10    pub usages: Vec<MessageUsage>,
11    pub malformed_lines: usize,
12}
13
14/// Parse a Claude session file. Within-file dedup: assistant entries sharing a
15/// `message.id` collapse to one, keeping the LAST entry's usage (streaming
16/// snapshots overwrite; never sum).
17pub fn parse(bytes: &[u8]) -> Result<ClaudeParse, ObolError> {
18    let text = std::str::from_utf8(bytes).map_err(|e| ObolError::MalformedTranscript {
19        line: 0,
20        msg: e.to_string(),
21    })?;
22
23    // Preserve first-seen order of message ids; overwrite usage on each new line.
24    let mut order: Vec<String> = Vec::new();
25    let mut by_id: std::collections::HashMap<String, MessageUsage> =
26        std::collections::HashMap::new();
27    let mut out = ClaudeParse::default();
28
29    for line in text.lines() {
30        let line = line.trim();
31        if line.is_empty() {
32            continue;
33        }
34        let v: Value = match serde_json::from_str(line) {
35            Ok(v) => v,
36            Err(_) => {
37                out.malformed_lines += 1;
38                continue;
39            }
40        };
41        if v.get("type").and_then(Value::as_str) != Some("assistant") {
42            continue;
43        }
44        if v.get("isMeta").and_then(Value::as_bool) == Some(true) {
45            continue;
46        }
47        if v.get("isCompactSummary").and_then(Value::as_bool) == Some(true) {
48            continue;
49        }
50        let msg = match v.get("message") {
51            Some(m) => m,
52            None => continue,
53        };
54        let usage = match msg.get("usage") {
55            Some(u) if u.is_object() => u,
56            _ => continue,
57        };
58
59        let id = msg
60            .get("id")
61            .and_then(Value::as_str)
62            .unwrap_or("")
63            .to_string();
64        // Token math lives in the shared Anthropic normalizer (Claude Code
65        // embeds the Anthropic usage object near-verbatim). The envelope
66        // navigation + streaming dedup below is this dialect's own job.
67        let t = super::provider::anthropic::normalize(usage);
68        let mu = MessageUsage {
69            model: msg
70                .get("model")
71                .and_then(Value::as_str)
72                .unwrap_or("")
73                .to_string(),
74            provider: Provider::Anthropic,
75            namespace: "litellm".into(),
76            input_uncached: t.input_uncached,
77            cache_read: t.cache_read,
78            cache_write_5m: t.cache_write_5m,
79            cache_write_1h: t.cache_write_1h,
80            output: t.output,
81            request_input_tokens: t.input_uncached
82                + t.cache_read
83                + t.cache_write_5m
84                + t.cache_write_1h,
85            service_tier: usage
86                .get("service_tier")
87                .and_then(Value::as_str)
88                .map(String::from),
89        };
90
91        let key = if id.is_empty() {
92            format!("__anon_{}", order.len())
93        } else {
94            id
95        };
96        if !by_id.contains_key(&key) {
97            order.push(key.clone());
98        }
99        by_id.insert(key, mu); // last-write-wins
100    }
101
102    out.usages = order.into_iter().filter_map(|k| by_id.remove(&k)).collect();
103    Ok(out)
104}
105
106#[cfg(test)]
107mod tests {
108    use super::*;
109
110    #[test]
111    fn dedups_streaming_and_keeps_last_usage() {
112        let bytes = include_bytes!("../../tests/fixtures/claude-mini.jsonl");
113        let p = parse(bytes).unwrap();
114        // msg_a (collapsed) + msg_b = 2 usages
115        assert_eq!(p.usages.len(), 2, "usages: {:?}", p.usages);
116        assert_eq!(p.malformed_lines, 1);
117
118        let a = &p.usages[0];
119        assert_eq!(a.output, 9, "should keep LAST msg_a output, not sum");
120        assert_eq!(a.input_uncached, 12);
121        assert_eq!(a.cache_read, 120);
122        assert_eq!(a.cache_write_5m, 60);
123        assert_eq!(a.request_input_tokens, 12 + 120 + 60);
124
125        let b = &p.usages[1];
126        assert_eq!(b.output, 7);
127        assert_eq!(b.input_uncached, 0); // present-and-zero, still a real record
128    }
129}