Skip to main content

cc_token_usage/data/
models.rs

1use chrono::{DateTime, Utc};
2use serde::Deserialize;
3use std::path::PathBuf;
4
5// ─── JSONL Deserialization Layer ─────────────────────────────────────────────
6
7/// Top-level tagged union for each line in the JSONL session file.
8#[derive(Debug, Deserialize)]
9#[serde(tag = "type")]
10#[allow(clippy::large_enum_variant)]
11pub enum JournalEntry {
12    #[serde(rename = "user")]
13    User(UserMessage),
14    #[serde(rename = "assistant")]
15    Assistant(AssistantMessage),
16    #[serde(rename = "queue-operation")]
17    QueueOperation(serde_json::Value),
18}
19
20/// A user-authored message entry.
21#[derive(Debug, Deserialize)]
22#[serde(rename_all = "camelCase")]
23pub struct UserMessage {
24    pub uuid: Option<String>,
25    pub session_id: Option<String>,
26    pub timestamp: Option<String>,
27    pub cwd: Option<String>,
28    pub version: Option<String>,
29    pub git_branch: Option<String>,
30    pub message: Option<serde_json::Value>,
31    pub parent_uuid: Option<String>,
32    pub is_sidechain: Option<bool>,
33    pub user_type: Option<String>,
34}
35
36/// An assistant response entry.
37#[derive(Debug, Deserialize)]
38#[serde(rename_all = "camelCase")]
39pub struct AssistantMessage {
40    pub uuid: Option<String>,
41    pub session_id: Option<String>,
42    pub timestamp: Option<String>,
43    pub request_id: Option<String>,
44    pub agent_id: Option<String>,
45    pub message: Option<ApiMessage>,
46    pub cwd: Option<String>,
47    pub version: Option<String>,
48    pub git_branch: Option<String>,
49    pub parent_uuid: Option<String>,
50    pub is_sidechain: Option<bool>,
51    pub user_type: Option<String>,
52}
53
54/// The inner API message returned by Claude.
55#[derive(Debug, Deserialize)]
56pub struct ApiMessage {
57    pub model: Option<String>,
58    pub role: Option<String>,
59    pub stop_reason: Option<String>,
60    pub usage: Option<TokenUsage>,
61    pub content: Option<Vec<ContentBlock>>,
62}
63
64/// Token usage statistics for a single API call.
65#[derive(Debug, Clone, Deserialize)]
66pub struct TokenUsage {
67    pub input_tokens: Option<u64>,
68    pub output_tokens: Option<u64>,
69    pub cache_creation_input_tokens: Option<u64>,
70    pub cache_read_input_tokens: Option<u64>,
71    pub cache_creation: Option<CacheCreationDetail>,
72    pub server_tool_use: Option<ServerToolUse>,
73    pub service_tier: Option<String>,
74    pub speed: Option<String>,
75}
76
77/// Breakdown of cache creation tokens by TTL bucket.
78#[derive(Debug, Clone, PartialEq, Deserialize)]
79pub struct CacheCreationDetail {
80    pub ephemeral_5m_input_tokens: Option<u64>,
81    pub ephemeral_1h_input_tokens: Option<u64>,
82}
83
84/// Server-side tool usage counters.
85#[derive(Debug, Clone, Deserialize)]
86pub struct ServerToolUse {
87    pub web_search_requests: Option<u64>,
88    pub web_fetch_requests: Option<u64>,
89}
90
91/// A content block inside a message. Only `text` and `tool_use` are parsed;
92/// everything else is captured as `Other`.
93#[derive(Debug, Deserialize)]
94#[serde(tag = "type")]
95pub enum ContentBlock {
96    #[serde(rename = "text")]
97    Text {
98        text: Option<String>,
99    },
100    #[serde(rename = "tool_use")]
101    ToolUse {
102        id: Option<String>,
103        name: Option<String>,
104        input: Option<serde_json::Value>,
105    },
106    #[serde(other)]
107    Other,
108}
109
110// ─── Validated Data Layer ────────────────────────────────────────────────────
111
112/// A single validated assistant turn, ready for analysis.
113#[derive(Debug)]
114pub struct ValidatedTurn {
115    pub uuid: String,
116    pub request_id: Option<String>,
117    pub timestamp: DateTime<Utc>,
118    pub model: String,
119    pub usage: TokenUsage,
120    pub stop_reason: Option<String>,
121    pub content_types: Vec<String>,
122    pub is_agent: bool,
123    pub agent_id: Option<String>,
124    pub user_text: Option<String>,       // 对应的用户消息文本(截断)
125    pub assistant_text: Option<String>,  // assistant 回复文本(截断)
126    pub tool_names: Vec<String>,         // 使用的工具名列表
127}
128
129/// Metadata about a session JSONL file on disk.
130#[derive(Debug)]
131pub struct SessionFile {
132    pub session_id: String,
133    pub project: Option<String>,
134    pub file_path: PathBuf,
135    pub is_agent: bool,
136    pub parent_session_id: Option<String>,
137}
138
139/// Aggregated data from a single session.
140#[derive(Debug)]
141pub struct SessionData {
142    pub session_id: String,
143    pub project: Option<String>,
144    pub turns: Vec<ValidatedTurn>,
145    pub agent_turns: Vec<ValidatedTurn>,
146    pub first_timestamp: Option<DateTime<Utc>>,
147    pub last_timestamp: Option<DateTime<Utc>>,
148    pub version: Option<String>,
149    pub quality: DataQuality,
150}
151
152/// Quality metrics for a single session file.
153#[derive(Debug, Default)]
154pub struct DataQuality {
155    pub total_lines: usize,
156    pub valid_turns: usize,
157    pub skipped_synthetic: usize,
158    pub skipped_sidechain: usize,
159    pub skipped_invalid: usize,
160    pub skipped_parse_error: usize,
161    pub duplicate_turns: usize,
162}
163
164/// Quality metrics aggregated across all session files.
165#[derive(Debug, Default, Clone)]
166pub struct GlobalDataQuality {
167    pub total_session_files: usize,
168    pub total_agent_files: usize,
169    pub orphan_agents: usize,
170    pub total_valid_turns: usize,
171    pub total_skipped: usize,
172    pub time_range: Option<(DateTime<Utc>, DateTime<Utc>)>,
173}
174
175// ─── Tests ───────────────────────────────────────────────────────────────────
176
177#[cfg(test)]
178mod tests {
179    use super::*;
180
181    #[test]
182    fn test_parse_assistant_message() {
183        let json = r#"{"parentUuid":"abc","isSidechain":false,"type":"assistant","uuid":"def","timestamp":"2026-03-16T13:51:35.912Z","message":{"model":"claude-opus-4-6","role":"assistant","stop_reason":"end_turn","usage":{"input_tokens":3,"cache_creation_input_tokens":1281,"cache_read_input_tokens":15204,"cache_creation":{"ephemeral_5m_input_tokens":1281,"ephemeral_1h_input_tokens":0},"output_tokens":108,"service_tier":"standard"},"content":[{"type":"text","text":"Hello"}]},"sessionId":"abc-123","version":"2.0.77","cwd":"/tmp","gitBranch":"main","userType":"external","requestId":"req_1"}"#;
184
185        let entry: JournalEntry = serde_json::from_str(json).unwrap();
186
187        match entry {
188            JournalEntry::Assistant(msg) => {
189                assert_eq!(msg.uuid.as_deref(), Some("def"));
190                assert_eq!(msg.session_id.as_deref(), Some("abc-123"));
191                assert_eq!(msg.request_id.as_deref(), Some("req_1"));
192                assert_eq!(msg.parent_uuid.as_deref(), Some("abc"));
193                assert_eq!(msg.is_sidechain, Some(false));
194
195                let api = msg.message.unwrap();
196                assert_eq!(api.model.as_deref(), Some("claude-opus-4-6"));
197                assert_eq!(api.stop_reason.as_deref(), Some("end_turn"));
198
199                let usage = api.usage.unwrap();
200                assert_eq!(usage.input_tokens, Some(3));
201                assert_eq!(usage.output_tokens, Some(108));
202                assert_eq!(usage.cache_creation_input_tokens, Some(1281));
203                assert_eq!(usage.cache_read_input_tokens, Some(15204));
204                assert_eq!(usage.service_tier.as_deref(), Some("standard"));
205
206                let cache = usage.cache_creation.unwrap();
207                assert_eq!(cache.ephemeral_5m_input_tokens, Some(1281));
208                assert_eq!(cache.ephemeral_1h_input_tokens, Some(0));
209
210                let content = api.content.unwrap();
211                assert_eq!(content.len(), 1);
212                match &content[0] {
213                    ContentBlock::Text { text } => {
214                        assert_eq!(text.as_deref(), Some("Hello"));
215                    }
216                    _ => panic!("expected Text content block"),
217                }
218            }
219            _ => panic!("expected Assistant variant"),
220        }
221    }
222
223    #[test]
224    fn test_parse_user_message() {
225        let json = r#"{"parentUuid":null,"isSidechain":false,"type":"user","message":{"role":"user","content":[{"type":"text","text":"hello"}]},"uuid":"u1","timestamp":"2026-03-16T13:51:19.053Z","sessionId":"s1","version":"2.1.80","cwd":"/tmp","gitBranch":"main","userType":"external"}"#;
226
227        let entry: JournalEntry = serde_json::from_str(json).unwrap();
228
229        match entry {
230            JournalEntry::User(msg) => {
231                assert_eq!(msg.uuid.as_deref(), Some("u1"));
232                assert_eq!(msg.session_id.as_deref(), Some("s1"));
233                assert_eq!(msg.version.as_deref(), Some("2.1.80"));
234                assert_eq!(msg.cwd.as_deref(), Some("/tmp"));
235                assert_eq!(msg.git_branch.as_deref(), Some("main"));
236                assert!(msg.parent_uuid.is_none());
237            }
238            _ => panic!("expected User variant"),
239        }
240    }
241
242    #[test]
243    fn test_parse_queue_operation() {
244        let json = r#"{"type":"queue-operation","operation":"dequeue","timestamp":"2026-03-16T13:51:19.041Z","sessionId":"abc"}"#;
245
246        let entry: JournalEntry = serde_json::from_str(json).unwrap();
247
248        match entry {
249            JournalEntry::QueueOperation(val) => {
250                assert_eq!(val.get("operation").and_then(|v| v.as_str()), Some("dequeue"));
251                assert_eq!(val.get("sessionId").and_then(|v| v.as_str()), Some("abc"));
252            }
253            _ => panic!("expected QueueOperation variant"),
254        }
255    }
256
257    #[test]
258    fn test_parse_synthetic_message() {
259        let json = r#"{"type":"assistant","uuid":"x","timestamp":"2026-03-16T00:00:00Z","message":{"model":"<synthetic>","role":"assistant","stop_reason":"stop_sequence","usage":{"input_tokens":0,"output_tokens":0,"cache_creation_input_tokens":0,"cache_read_input_tokens":0},"content":[{"type":"text","text":"error"}]},"sessionId":"s1","cwd":"/tmp","gitBranch":"","userType":"external","isSidechain":false,"parentUuid":null}"#;
260
261        let entry: JournalEntry = serde_json::from_str(json).unwrap();
262
263        match entry {
264            JournalEntry::Assistant(msg) => {
265                let api = msg.message.unwrap();
266                assert_eq!(api.model.as_deref(), Some("<synthetic>"));
267                assert_eq!(api.stop_reason.as_deref(), Some("stop_sequence"));
268
269                let usage = api.usage.unwrap();
270                assert_eq!(usage.input_tokens, Some(0));
271                assert_eq!(usage.output_tokens, Some(0));
272
273                // synthetic messages typically lack cache_creation detail
274                assert!(usage.cache_creation.is_none());
275            }
276            _ => panic!("expected Assistant variant"),
277        }
278    }
279}