Skip to main content

cc_token_usage/data/
models.rs

1use chrono::{DateTime, Utc};
2use serde::Deserialize;
3use std::path::PathBuf;
4
5// ─── JSONL Deserialization Layer ─────────────────────────────────────────────
6
7/// Top-level tagged union for each line in the JSONL session file.
8#[derive(Debug, Deserialize)]
9#[serde(tag = "type")]
10#[allow(clippy::large_enum_variant)]
11pub enum JournalEntry {
12    #[serde(rename = "user")]
13    User(UserMessage),
14    #[serde(rename = "assistant")]
15    Assistant(AssistantMessage),
16    #[serde(rename = "queue-operation")]
17    QueueOperation(serde_json::Value),
18    #[serde(rename = "progress")]
19    Progress(serde_json::Value),
20    #[serde(rename = "system")]
21    System(serde_json::Value),
22    #[serde(rename = "last-prompt")]
23    LastPrompt(serde_json::Value),
24    #[serde(rename = "file-history-snapshot")]
25    FileHistorySnapshot(serde_json::Value),
26    #[serde(other)]
27    Unknown,
28}
29
30/// A user-authored message entry.
31#[derive(Debug, Deserialize)]
32#[serde(rename_all = "camelCase")]
33pub struct UserMessage {
34    pub uuid: Option<String>,
35    pub session_id: Option<String>,
36    pub timestamp: Option<String>,
37    pub cwd: Option<String>,
38    pub version: Option<String>,
39    pub git_branch: Option<String>,
40    pub message: Option<serde_json::Value>,
41    pub parent_uuid: Option<String>,
42    pub is_sidechain: Option<bool>,
43    pub user_type: Option<String>,
44}
45
46/// An assistant response entry.
47#[derive(Debug, Deserialize)]
48#[serde(rename_all = "camelCase")]
49pub struct AssistantMessage {
50    pub uuid: Option<String>,
51    pub session_id: Option<String>,
52    pub timestamp: Option<String>,
53    pub request_id: Option<String>,
54    pub agent_id: Option<String>,
55    pub message: Option<ApiMessage>,
56    pub cwd: Option<String>,
57    pub version: Option<String>,
58    pub git_branch: Option<String>,
59    pub parent_uuid: Option<String>,
60    pub is_sidechain: Option<bool>,
61    pub user_type: Option<String>,
62}
63
64/// The inner API message returned by Claude.
65#[derive(Debug, Deserialize)]
66pub struct ApiMessage {
67    pub model: Option<String>,
68    pub role: Option<String>,
69    pub stop_reason: Option<String>,
70    pub usage: Option<TokenUsage>,
71    pub content: Option<Vec<ContentBlock>>,
72}
73
74/// Token usage statistics for a single API call.
75#[derive(Debug, Clone, Default, Deserialize)]
76pub struct TokenUsage {
77    pub input_tokens: Option<u64>,
78    pub output_tokens: Option<u64>,
79    pub cache_creation_input_tokens: Option<u64>,
80    pub cache_read_input_tokens: Option<u64>,
81    pub cache_creation: Option<CacheCreationDetail>,
82    pub server_tool_use: Option<ServerToolUse>,
83    pub service_tier: Option<String>,
84    pub speed: Option<String>,
85}
86
87/// Breakdown of cache creation tokens by TTL bucket.
88#[derive(Debug, Clone, PartialEq, Deserialize)]
89pub struct CacheCreationDetail {
90    pub ephemeral_5m_input_tokens: Option<u64>,
91    pub ephemeral_1h_input_tokens: Option<u64>,
92}
93
94/// Server-side tool usage counters.
95#[derive(Debug, Clone, Deserialize)]
96pub struct ServerToolUse {
97    pub web_search_requests: Option<u64>,
98    pub web_fetch_requests: Option<u64>,
99}
100
101/// A content block inside a message. Only `text` and `tool_use` are parsed;
102/// everything else is captured as `Other`.
103#[derive(Debug, Deserialize)]
104#[serde(tag = "type")]
105pub enum ContentBlock {
106    #[serde(rename = "text")]
107    Text {
108        text: Option<String>,
109    },
110    #[serde(rename = "tool_use")]
111    ToolUse {
112        id: Option<String>,
113        name: Option<String>,
114        input: Option<serde_json::Value>,
115    },
116    #[serde(rename = "thinking")]
117    Thinking {
118        thinking: Option<String>,
119        signature: Option<String>,
120    },
121    #[serde(rename = "tool_result")]
122    ToolResult {
123        tool_use_id: Option<String>,
124        content: Option<serde_json::Value>,
125        is_error: Option<bool>,
126    },
127    #[serde(other)]
128    Other,
129}
130
131// ─── Validated Data Layer ────────────────────────────────────────────────────
132
133/// A single validated assistant turn, ready for analysis.
134#[derive(Debug)]
135pub struct ValidatedTurn {
136    pub uuid: String,
137    pub request_id: Option<String>,
138    pub timestamp: DateTime<Utc>,
139    pub model: String,
140    pub usage: TokenUsage,
141    pub stop_reason: Option<String>,
142    pub content_types: Vec<String>,
143    pub is_agent: bool,
144    pub agent_id: Option<String>,
145    pub user_text: Option<String>,       // 对应的用户消息文本(截断)
146    pub assistant_text: Option<String>,  // assistant 回复文本(截断)
147    pub tool_names: Vec<String>,         // 使用的工具名列表
148}
149
150/// Metadata about a session JSONL file on disk.
151#[derive(Debug)]
152pub struct SessionFile {
153    pub session_id: String,
154    pub project: Option<String>,
155    pub file_path: PathBuf,
156    pub is_agent: bool,
157    pub parent_session_id: Option<String>,
158}
159
160/// Aggregated data from a single session.
161#[derive(Debug)]
162pub struct SessionData {
163    pub session_id: String,
164    pub project: Option<String>,
165    pub turns: Vec<ValidatedTurn>,
166    pub agent_turns: Vec<ValidatedTurn>,
167    pub first_timestamp: Option<DateTime<Utc>>,
168    pub last_timestamp: Option<DateTime<Utc>>,
169    pub version: Option<String>,
170    pub quality: DataQuality,
171}
172
173impl SessionData {
174    /// All API responses (main + agent), sorted by timestamp.
175    pub fn all_responses(&self) -> Vec<&ValidatedTurn> {
176        let mut all: Vec<&ValidatedTurn> = self.turns.iter()
177            .chain(self.agent_turns.iter())
178            .collect();
179        all.sort_by_key(|r| r.timestamp);
180        all
181    }
182
183    /// Total number of API responses (main + agent).
184    pub fn total_turn_count(&self) -> usize {
185        self.turns.len() + self.agent_turns.len()
186    }
187
188    /// Number of agent API responses.
189    pub fn agent_turn_count(&self) -> usize {
190        self.agent_turns.len()
191    }
192}
193
194/// Quality metrics for a single session file.
195#[derive(Debug, Default)]
196pub struct DataQuality {
197    pub total_lines: usize,
198    pub valid_turns: usize,
199    pub skipped_synthetic: usize,
200    pub skipped_sidechain: usize,
201    pub skipped_invalid: usize,
202    pub skipped_parse_error: usize,
203    pub duplicate_turns: usize,
204}
205
206/// Quality metrics aggregated across all session files.
207#[derive(Debug, Default, Clone)]
208pub struct GlobalDataQuality {
209    pub total_session_files: usize,
210    pub total_agent_files: usize,
211    pub orphan_agents: usize,
212    pub total_valid_turns: usize,
213    pub total_skipped: usize,
214    pub time_range: Option<(DateTime<Utc>, DateTime<Utc>)>,
215}
216
217// ─── Tests ───────────────────────────────────────────────────────────────────
218
219#[cfg(test)]
220mod tests {
221    use super::*;
222
223    #[test]
224    fn test_parse_assistant_message() {
225        let json = r#"{"parentUuid":"abc","isSidechain":false,"type":"assistant","uuid":"def","timestamp":"2026-03-16T13:51:35.912Z","message":{"model":"claude-opus-4-6","role":"assistant","stop_reason":"end_turn","usage":{"input_tokens":3,"cache_creation_input_tokens":1281,"cache_read_input_tokens":15204,"cache_creation":{"ephemeral_5m_input_tokens":1281,"ephemeral_1h_input_tokens":0},"output_tokens":108,"service_tier":"standard"},"content":[{"type":"text","text":"Hello"}]},"sessionId":"abc-123","version":"2.0.77","cwd":"/tmp","gitBranch":"main","userType":"external","requestId":"req_1"}"#;
226
227        let entry: JournalEntry = serde_json::from_str(json).unwrap();
228
229        match entry {
230            JournalEntry::Assistant(msg) => {
231                assert_eq!(msg.uuid.as_deref(), Some("def"));
232                assert_eq!(msg.session_id.as_deref(), Some("abc-123"));
233                assert_eq!(msg.request_id.as_deref(), Some("req_1"));
234                assert_eq!(msg.parent_uuid.as_deref(), Some("abc"));
235                assert_eq!(msg.is_sidechain, Some(false));
236
237                let api = msg.message.unwrap();
238                assert_eq!(api.model.as_deref(), Some("claude-opus-4-6"));
239                assert_eq!(api.stop_reason.as_deref(), Some("end_turn"));
240
241                let usage = api.usage.unwrap();
242                assert_eq!(usage.input_tokens, Some(3));
243                assert_eq!(usage.output_tokens, Some(108));
244                assert_eq!(usage.cache_creation_input_tokens, Some(1281));
245                assert_eq!(usage.cache_read_input_tokens, Some(15204));
246                assert_eq!(usage.service_tier.as_deref(), Some("standard"));
247
248                let cache = usage.cache_creation.unwrap();
249                assert_eq!(cache.ephemeral_5m_input_tokens, Some(1281));
250                assert_eq!(cache.ephemeral_1h_input_tokens, Some(0));
251
252                let content = api.content.unwrap();
253                assert_eq!(content.len(), 1);
254                match &content[0] {
255                    ContentBlock::Text { text } => {
256                        assert_eq!(text.as_deref(), Some("Hello"));
257                    }
258                    _ => panic!("expected Text content block"),
259                }
260            }
261            _ => panic!("expected Assistant variant"),
262        }
263    }
264
265    #[test]
266    fn test_parse_user_message() {
267        let json = r#"{"parentUuid":null,"isSidechain":false,"type":"user","message":{"role":"user","content":[{"type":"text","text":"hello"}]},"uuid":"u1","timestamp":"2026-03-16T13:51:19.053Z","sessionId":"s1","version":"2.1.80","cwd":"/tmp","gitBranch":"main","userType":"external"}"#;
268
269        let entry: JournalEntry = serde_json::from_str(json).unwrap();
270
271        match entry {
272            JournalEntry::User(msg) => {
273                assert_eq!(msg.uuid.as_deref(), Some("u1"));
274                assert_eq!(msg.session_id.as_deref(), Some("s1"));
275                assert_eq!(msg.version.as_deref(), Some("2.1.80"));
276                assert_eq!(msg.cwd.as_deref(), Some("/tmp"));
277                assert_eq!(msg.git_branch.as_deref(), Some("main"));
278                assert!(msg.parent_uuid.is_none());
279            }
280            _ => panic!("expected User variant"),
281        }
282    }
283
284    #[test]
285    fn test_parse_queue_operation() {
286        let json = r#"{"type":"queue-operation","operation":"dequeue","timestamp":"2026-03-16T13:51:19.041Z","sessionId":"abc"}"#;
287
288        let entry: JournalEntry = serde_json::from_str(json).unwrap();
289
290        match entry {
291            JournalEntry::QueueOperation(val) => {
292                assert_eq!(val.get("operation").and_then(|v| v.as_str()), Some("dequeue"));
293                assert_eq!(val.get("sessionId").and_then(|v| v.as_str()), Some("abc"));
294            }
295            _ => panic!("expected QueueOperation variant"),
296        }
297    }
298
299    #[test]
300    fn test_parse_progress_entry() {
301        let json = r#"{"type":"progress","data":{"type":"hook_progress"},"uuid":"u1","timestamp":"2026-03-16T13:51:19.053Z","sessionId":"s1"}"#;
302        let entry: JournalEntry = serde_json::from_str(json).unwrap();
303        assert!(matches!(entry, JournalEntry::Progress(_)));
304    }
305
306    #[test]
307    fn test_parse_system_entry() {
308        let json = r#"{"type":"system","subtype":"turn_duration","durationMs":1234,"uuid":"u1","timestamp":"2026-03-16T13:51:19.053Z","sessionId":"s1"}"#;
309        let entry: JournalEntry = serde_json::from_str(json).unwrap();
310        assert!(matches!(entry, JournalEntry::System(_)));
311    }
312
313    #[test]
314    fn test_parse_unknown_entry_type() {
315        let json = r#"{"type":"some-future-type","data":"whatever","uuid":"u1","timestamp":"2026-03-16T13:51:19.053Z"}"#;
316        let entry: JournalEntry = serde_json::from_str(json).unwrap();
317        assert!(matches!(entry, JournalEntry::Unknown));
318    }
319
320    #[test]
321    fn test_parse_thinking_content_block() {
322        let json = r#"{"type":"assistant","uuid":"u1","timestamp":"2026-03-16T10:00:00Z","message":{"model":"claude-opus-4-6","role":"assistant","stop_reason":"end_turn","usage":{"input_tokens":3,"output_tokens":100,"cache_creation_input_tokens":500,"cache_read_input_tokens":10000},"content":[{"type":"thinking","thinking":"Let me analyze this...","signature":"abc123"},{"type":"text","text":"Here is my answer."}]},"sessionId":"s1","cwd":"/tmp","gitBranch":"","userType":"external","isSidechain":false,"parentUuid":null,"requestId":"r1"}"#;
323        let entry: JournalEntry = serde_json::from_str(json).unwrap();
324        match entry {
325            JournalEntry::Assistant(msg) => {
326                let content = msg.message.unwrap().content.unwrap();
327                assert_eq!(content.len(), 2);
328                assert!(matches!(&content[0], ContentBlock::Thinking { thinking: Some(t), .. } if t.contains("analyze")));
329                assert!(matches!(&content[1], ContentBlock::Text { .. }));
330            }
331            _ => panic!("expected Assistant variant"),
332        }
333    }
334
335    #[test]
336    fn test_parse_synthetic_message() {
337        let json = r#"{"type":"assistant","uuid":"x","timestamp":"2026-03-16T00:00:00Z","message":{"model":"<synthetic>","role":"assistant","stop_reason":"stop_sequence","usage":{"input_tokens":0,"output_tokens":0,"cache_creation_input_tokens":0,"cache_read_input_tokens":0},"content":[{"type":"text","text":"error"}]},"sessionId":"s1","cwd":"/tmp","gitBranch":"","userType":"external","isSidechain":false,"parentUuid":null}"#;
338
339        let entry: JournalEntry = serde_json::from_str(json).unwrap();
340
341        match entry {
342            JournalEntry::Assistant(msg) => {
343                let api = msg.message.unwrap();
344                assert_eq!(api.model.as_deref(), Some("<synthetic>"));
345                assert_eq!(api.stop_reason.as_deref(), Some("stop_sequence"));
346
347                let usage = api.usage.unwrap();
348                assert_eq!(usage.input_tokens, Some(0));
349                assert_eq!(usage.output_tokens, Some(0));
350
351                // synthetic messages typically lack cache_creation detail
352                assert!(usage.cache_creation.is_none());
353            }
354            _ => panic!("expected Assistant variant"),
355        }
356    }
357}