obol_core/transcript/
claude.rs1use crate::error::ObolError;
5use crate::model::{MessageUsage, Provider};
6use serde_json::Value;
7
8#[derive(Debug, Default)]
9pub struct ClaudeParse {
10 pub usages: Vec<MessageUsage>,
11 pub malformed_lines: usize,
12}
13
14pub fn parse(bytes: &[u8]) -> Result<ClaudeParse, ObolError> {
18 let text = std::str::from_utf8(bytes).map_err(|e| ObolError::MalformedTranscript {
19 line: 0,
20 msg: e.to_string(),
21 })?;
22
23 let mut order: Vec<String> = Vec::new();
25 let mut by_id: std::collections::HashMap<String, MessageUsage> =
26 std::collections::HashMap::new();
27 let mut out = ClaudeParse::default();
28
29 for line in text.lines() {
30 let line = line.trim();
31 if line.is_empty() {
32 continue;
33 }
34 let v: Value = match serde_json::from_str(line) {
35 Ok(v) => v,
36 Err(_) => {
37 out.malformed_lines += 1;
38 continue;
39 }
40 };
41 if v.get("type").and_then(Value::as_str) != Some("assistant") {
42 continue;
43 }
44 if v.get("isMeta").and_then(Value::as_bool) == Some(true) {
45 continue;
46 }
47 if v.get("isCompactSummary").and_then(Value::as_bool) == Some(true) {
48 continue;
49 }
50 let msg = match v.get("message") {
51 Some(m) => m,
52 None => continue,
53 };
54 let usage = match msg.get("usage") {
55 Some(u) if u.is_object() => u,
56 _ => continue,
57 };
58
59 let id = msg
60 .get("id")
61 .and_then(Value::as_str)
62 .unwrap_or("")
63 .to_string();
64 let t = super::provider::anthropic::normalize(usage);
68 let mu = MessageUsage {
69 model: msg
70 .get("model")
71 .and_then(Value::as_str)
72 .unwrap_or("")
73 .to_string(),
74 provider: Provider::Anthropic,
75 namespace: "litellm".into(),
76 input_uncached: t.input_uncached,
77 cache_read: t.cache_read,
78 cache_write_5m: t.cache_write_5m,
79 cache_write_1h: t.cache_write_1h,
80 output: t.output,
81 request_input_tokens: t.input_uncached
82 + t.cache_read
83 + t.cache_write_5m
84 + t.cache_write_1h,
85 service_tier: usage
86 .get("service_tier")
87 .and_then(Value::as_str)
88 .map(String::from),
89 };
90
91 let key = if id.is_empty() {
92 format!("__anon_{}", order.len())
93 } else {
94 id
95 };
96 if !by_id.contains_key(&key) {
97 order.push(key.clone());
98 }
99 by_id.insert(key, mu); }
101
102 out.usages = order.into_iter().filter_map(|k| by_id.remove(&k)).collect();
103 Ok(out)
104}
105
106#[cfg(test)]
107mod tests {
108 use super::*;
109
110 #[test]
111 fn dedups_streaming_and_keeps_last_usage() {
112 let bytes = include_bytes!("../../tests/fixtures/claude-mini.jsonl");
113 let p = parse(bytes).unwrap();
114 assert_eq!(p.usages.len(), 2, "usages: {:?}", p.usages);
116 assert_eq!(p.malformed_lines, 1);
117
118 let a = &p.usages[0];
119 assert_eq!(a.output, 9, "should keep LAST msg_a output, not sum");
120 assert_eq!(a.input_uncached, 12);
121 assert_eq!(a.cache_read, 120);
122 assert_eq!(a.cache_write_5m, 60);
123 assert_eq!(a.request_input_tokens, 12 + 120 + 60);
124
125 let b = &p.usages[1];
126 assert_eq!(b.output, 7);
127 assert_eq!(b.input_uncached, 0); }
129}