obol_core/transcript/
claude.rs1use crate::error::ObolError;
5use crate::model::{MessageUsage, Provider};
6use serde_json::Value;
7
8#[derive(Debug, Default)]
9pub struct ClaudeParse {
10 pub usages: Vec<MessageUsage>,
11 pub malformed_lines: usize,
12}
13
14pub fn parse(bytes: &[u8]) -> Result<ClaudeParse, ObolError> {
18 let text = std::str::from_utf8(bytes).map_err(|e| ObolError::MalformedTranscript {
19 line: 0,
20 msg: e.to_string(),
21 })?;
22
23 let mut order: Vec<String> = Vec::new();
25 let mut by_id: std::collections::HashMap<String, MessageUsage> =
26 std::collections::HashMap::new();
27 let mut out = ClaudeParse::default();
28
29 for line in text.lines() {
30 let line = line.trim();
31 if line.is_empty() {
32 continue;
33 }
34 let v: Value = match serde_json::from_str(line) {
35 Ok(v) => v,
36 Err(_) => {
37 out.malformed_lines += 1;
38 continue;
39 }
40 };
41 if v.get("type").and_then(Value::as_str) != Some("assistant") {
42 continue;
43 }
44 if v.get("isMeta").and_then(Value::as_bool) == Some(true) {
45 continue;
46 }
47 if v.get("isCompactSummary").and_then(Value::as_bool) == Some(true) {
48 continue;
49 }
50 let msg = match v.get("message") {
51 Some(m) => m,
52 None => continue,
53 };
54 let usage = match msg.get("usage") {
55 Some(u) if u.is_object() => u,
56 _ => continue,
57 };
58
59 let id = msg
60 .get("id")
61 .and_then(Value::as_str)
62 .unwrap_or("")
63 .to_string();
64 let g = |k: &str| usage.get(k).and_then(Value::as_u64).unwrap_or(0);
65 let input = g("input_tokens");
66 let cache_read = g("cache_read_input_tokens");
67 let cache_creation = g("cache_creation_input_tokens");
68 let (cw5, cw1) = match usage.get("cache_creation") {
69 Some(cc) if cc.is_object() => (
70 cc.get("ephemeral_5m_input_tokens")
71 .and_then(Value::as_u64)
72 .unwrap_or(0),
73 cc.get("ephemeral_1h_input_tokens")
74 .and_then(Value::as_u64)
75 .unwrap_or(0),
76 ),
77 _ => (cache_creation, 0), };
79
80 let mu = MessageUsage {
81 model: msg
82 .get("model")
83 .and_then(Value::as_str)
84 .unwrap_or("")
85 .to_string(),
86 provider: Provider::Anthropic,
87 namespace: "litellm".into(),
88 input_uncached: input,
89 cache_read,
90 cache_write_5m: cw5,
91 cache_write_1h: cw1,
92 output: g("output_tokens"),
93 request_input_tokens: input + cache_read + cache_creation,
94 service_tier: usage
95 .get("service_tier")
96 .and_then(Value::as_str)
97 .map(String::from),
98 };
99
100 let key = if id.is_empty() {
101 format!("__anon_{}", order.len())
102 } else {
103 id
104 };
105 if !by_id.contains_key(&key) {
106 order.push(key.clone());
107 }
108 by_id.insert(key, mu); }
110
111 out.usages = order.into_iter().filter_map(|k| by_id.remove(&k)).collect();
112 Ok(out)
113}
114
115#[cfg(test)]
116mod tests {
117 use super::*;
118
119 #[test]
120 fn dedups_streaming_and_keeps_last_usage() {
121 let bytes = include_bytes!("../../tests/fixtures/claude-mini.jsonl");
122 let p = parse(bytes).unwrap();
123 assert_eq!(p.usages.len(), 2, "usages: {:?}", p.usages);
125 assert_eq!(p.malformed_lines, 1);
126
127 let a = &p.usages[0];
128 assert_eq!(a.output, 9, "should keep LAST msg_a output, not sum");
129 assert_eq!(a.input_uncached, 12);
130 assert_eq!(a.cache_read, 120);
131 assert_eq!(a.cache_write_5m, 60);
132 assert_eq!(a.request_input_tokens, 12 + 120 + 60);
133
134 let b = &p.usages[1];
135 assert_eq!(b.output, 7);
136 assert_eq!(b.input_uncached, 0); }
138}