1use super::provider::{self, ProviderTokens};
15use crate::error::ObolError;
16use crate::model::{MessageUsage, Provider};
17use serde_json::Value;
18
19const SCHEMA_VERSIONS: &[&str] = &["2026-06-08"];
23
24pub fn parse(bytes: &[u8]) -> Result<Vec<MessageUsage>, ObolError> {
25 let text = std::str::from_utf8(bytes).map_err(|e| ObolError::MalformedTranscript {
26 line: 0,
27 msg: e.to_string(),
28 })?;
29
30 let mut out = Vec::new();
31 for (i, line) in text.lines().enumerate() {
32 let line = line.trim();
33 if line.is_empty() {
34 continue;
35 }
36 let v: Value = match serde_json::from_str(line) {
39 Ok(v) => v,
40 Err(_) => continue,
41 };
42 if v.get("type").and_then(Value::as_str) != Some("obol.usage") {
43 continue;
44 }
45 out.push(parse_row(&v, i + 1)?);
46 }
47 Ok(out)
48}
49
50fn parse_row(v: &Value, line: usize) -> Result<MessageUsage, ObolError> {
51 let err = |msg: String| ObolError::MalformedTranscript { line, msg };
52
53 let ver = v.get("v").and_then(Value::as_str).unwrap_or("");
54 if !SCHEMA_VERSIONS.contains(&ver) {
55 return Err(err(format!("unknown obol.usage schema version {ver:?}")));
56 }
57
58 let provider_tag = v
59 .get("provider")
60 .and_then(Value::as_str)
61 .ok_or_else(|| err("obol.usage row missing `provider`".into()))?;
62 let usage = v
63 .get("usage")
64 .filter(|u| u.is_object())
65 .ok_or_else(|| err("obol.usage row missing `usage` object".into()))?;
66
67 let (provider, tokens): (Provider, ProviderTokens) = match provider_tag {
68 "anthropic" => (Provider::Anthropic, provider::anthropic::normalize(usage)),
69 "openai" => (Provider::OpenAI, provider::openai::normalize(usage)),
70 other => {
71 return Err(err(format!(
72 "no usage normalizer for provider {other:?} (supported: anthropic, openai)"
73 )))
74 }
75 };
76
77 let request_input_tokens =
78 tokens.input_uncached + tokens.cache_read + tokens.cache_write_5m + tokens.cache_write_1h;
79
80 Ok(MessageUsage {
81 model: v
82 .get("model")
83 .and_then(Value::as_str)
84 .unwrap_or("")
85 .to_string(),
86 provider,
87 namespace: "litellm".into(),
88 input_uncached: tokens.input_uncached,
89 cache_read: tokens.cache_read,
90 cache_write_5m: tokens.cache_write_5m,
91 cache_write_1h: tokens.cache_write_1h,
92 output: tokens.output,
93 request_input_tokens,
94 service_tier: v
95 .get("service_tier")
96 .and_then(Value::as_str)
97 .map(String::from),
98 native_cost_usd: None,
99 })
100}
101
102#[cfg(test)]
103mod tests {
104 use super::*;
105
106 fn anthropic_line() -> &'static str {
107 r#"{"type":"obol.usage","v":"2026-06-08","provider":"anthropic","model":"claude-opus-4-8","service_tier":"standard","usage":{"input_tokens":12,"cache_read_input_tokens":120,"cache_creation_input_tokens":60,"cache_creation":{"ephemeral_5m_input_tokens":50,"ephemeral_1h_input_tokens":10},"output_tokens":9}}"#
108 }
109 fn openai_line() -> &'static str {
110 r#"{"type":"obol.usage","v":"2026-06-08","provider":"openai","model":"gpt-5.5","usage":{"input_tokens":100,"input_tokens_details":{"cached_tokens":40},"output_tokens":20,"output_tokens_details":{"reasoning_tokens":5}}}"#
111 }
112
113 #[test]
114 fn parses_anthropic_and_openai_rows() {
115 let bytes = format!("{}\n{}\n", anthropic_line(), openai_line());
116 let usages = parse(bytes.as_bytes()).unwrap();
117 assert_eq!(
118 usages,
119 vec![
120 MessageUsage {
121 model: "claude-opus-4-8".into(),
122 provider: Provider::Anthropic,
123 namespace: "litellm".into(),
124 input_uncached: 12,
125 cache_read: 120,
126 cache_write_5m: 50,
127 cache_write_1h: 10,
128 output: 9,
129 request_input_tokens: 192,
130 service_tier: Some("standard".into()),
131 native_cost_usd: None,
132 },
133 MessageUsage {
134 model: "gpt-5.5".into(),
135 provider: Provider::OpenAI,
136 namespace: "litellm".into(),
137 input_uncached: 60,
138 cache_read: 40,
139 cache_write_5m: 0,
140 cache_write_1h: 0,
141 output: 25,
142 request_input_tokens: 100,
143 service_tier: None,
144 native_cost_usd: None,
145 },
146 ]
147 );
148 }
149
150 #[test]
151 fn unknown_schema_version_is_a_loud_error() {
152 let line = r#"{"type":"obol.usage","v":"2099-12-31","provider":"anthropic","model":"x","usage":{"input_tokens":1,"output_tokens":1}}"#;
153 let e = parse(line.as_bytes()).unwrap_err();
154 assert!(
155 matches!(e, ObolError::MalformedTranscript { line: 1, .. }),
156 "got {e:?}"
157 );
158 }
159
160 #[test]
161 fn missing_usage_object_is_a_loud_error() {
162 let line = r#"{"type":"obol.usage","v":"2026-06-08","provider":"anthropic","model":"x"}"#;
163 assert!(parse(line.as_bytes()).is_err());
164 }
165
166 #[test]
167 fn unknown_provider_is_a_loud_error() {
168 let line = r#"{"type":"obol.usage","v":"2026-06-08","provider":"mystery","model":"x","usage":{"input_tokens":1}}"#;
169 assert!(parse(line.as_bytes()).is_err());
170 }
171
172 #[test]
173 fn skips_blank_and_non_obol_lines_but_keeps_valid_rows() {
174 let bytes = format!(
175 "\nnot json\n{{\"type\":\"something_else\"}}\n{}\n",
176 anthropic_line()
177 );
178 let usages = parse(bytes.as_bytes()).unwrap();
179 assert_eq!(usages.len(), 1);
180 assert_eq!(usages[0].model, "claude-opus-4-8");
181 }
182
183 #[test]
184 fn missing_model_yields_empty_model_for_loud_unpriced() {
185 let line = r#"{"type":"obol.usage","v":"2026-06-08","provider":"anthropic","usage":{"input_tokens":1,"output_tokens":1}}"#;
186 let usages = parse(line.as_bytes()).unwrap();
187 assert_eq!(usages[0].model, "");
188 }
189
190 #[test]
195 fn anthropic_buckets_match_the_claude_dialect() {
196 let usage = r#"{"input_tokens":12,"cache_read_input_tokens":120,"cache_creation_input_tokens":60,"cache_creation":{"ephemeral_5m_input_tokens":50,"ephemeral_1h_input_tokens":10},"output_tokens":9}"#;
197 let claude_line = format!(
198 r#"{{"type":"assistant","message":{{"id":"m1","model":"m","usage":{usage}}}}}"#
199 );
200 let obol_line = format!(
201 r#"{{"type":"obol.usage","v":"2026-06-08","provider":"anthropic","model":"m","usage":{usage}}}"#
202 );
203 let c = crate::transcript::claude::parse(claude_line.as_bytes())
204 .unwrap()
205 .usages;
206 let o = parse(obol_line.as_bytes()).unwrap();
207 let f = |u: &MessageUsage| {
208 (
209 u.input_uncached,
210 u.cache_read,
211 u.cache_write_5m,
212 u.cache_write_1h,
213 u.output,
214 u.request_input_tokens,
215 )
216 };
217 assert_eq!(f(&c[0]), f(&o[0]));
218 }
219}