Skip to main content

cc_token_usage/data/
models.rs

1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3
4// ─── Re-exports from cc-session-jsonl ───────────────────────────────────────
5
6/// Re-export the session file metadata type from cc-session-jsonl.
7///
8/// Note: field name is `path` (not `file_path`). All call-sites have been
9/// updated accordingly.
10pub use cc_session_jsonl::scanner::SessionFile;
11
12// ─── Local Token/Usage Types (kept to avoid changing analysis/pricing/output) ─
13
14/// Token usage statistics for a single API call.
15#[derive(Debug, Clone, Default, Deserialize)]
16pub struct TokenUsage {
17    pub input_tokens: Option<u64>,
18    pub output_tokens: Option<u64>,
19    pub cache_creation_input_tokens: Option<u64>,
20    pub cache_read_input_tokens: Option<u64>,
21    pub cache_creation: Option<CacheCreationDetail>,
22    pub server_tool_use: Option<ServerToolUse>,
23    pub service_tier: Option<String>,
24    pub speed: Option<String>,
25    pub inference_geo: Option<String>,
26}
27
28/// Breakdown of cache creation tokens by TTL bucket.
29#[derive(Debug, Clone, PartialEq, Deserialize)]
30pub struct CacheCreationDetail {
31    pub ephemeral_5m_input_tokens: Option<u64>,
32    pub ephemeral_1h_input_tokens: Option<u64>,
33}
34
35/// Server-side tool usage counters.
36#[derive(Debug, Clone, Deserialize)]
37pub struct ServerToolUse {
38    pub web_search_requests: Option<u64>,
39    pub web_fetch_requests: Option<u64>,
40}
41
42// ─── Conversions from cc-session-jsonl types ─────────────────────────────────
43
44impl From<cc_session_jsonl::types::Usage> for TokenUsage {
45    fn from(u: cc_session_jsonl::types::Usage) -> Self {
46        Self {
47            input_tokens: u.input_tokens,
48            output_tokens: u.output_tokens,
49            cache_creation_input_tokens: u.cache_creation_input_tokens,
50            cache_read_input_tokens: u.cache_read_input_tokens,
51            cache_creation: u.cache_creation.map(|c| CacheCreationDetail {
52                ephemeral_5m_input_tokens: c.ephemeral_5m_input_tokens,
53                ephemeral_1h_input_tokens: c.ephemeral_1h_input_tokens,
54            }),
55            server_tool_use: u.server_tool_use.map(|s| ServerToolUse {
56                web_search_requests: s.web_search_requests,
57                web_fetch_requests: s.web_fetch_requests,
58            }),
59            service_tier: u.service_tier,
60            inference_geo: u.inference_geo,
61            speed: u.speed,
62        }
63    }
64}
65
66// ─── Validated Data Layer ────────────────────────────────────────────────────
67
68/// A single validated assistant turn, ready for analysis.
69#[derive(Debug, Clone)]
70pub struct ValidatedTurn {
71    pub uuid: String,
72    pub request_id: Option<String>,
73    pub timestamp: DateTime<Utc>,
74    pub model: String,
75    pub usage: TokenUsage,
76    pub stop_reason: Option<String>,
77    pub content_types: Vec<String>,
78    pub is_agent: bool,
79    pub agent_id: Option<String>,
80    pub user_text: Option<String>,      // 对应的用户消息文本(截断)
81    pub assistant_text: Option<String>, // assistant 回复文本(截断)
82    pub tool_names: Vec<String>,        // 使用的工具名列表
83    pub service_tier: Option<String>,
84    pub speed: Option<String>,
85    pub inference_geo: Option<String>,
86    pub tool_error_count: usize, // ToolResult blocks with is_error=true
87    pub git_branch: Option<String>, // from the assistant entry's gitBranch field
88}
89
90/// Aggregated data from a single session.
91#[derive(Debug, Clone)]
92pub struct SessionData {
93    pub session_id: String,
94    pub project: Option<String>,
95    pub turns: Vec<ValidatedTurn>,
96    pub agent_turns: Vec<ValidatedTurn>,
97    pub first_timestamp: Option<DateTime<Utc>>,
98    pub last_timestamp: Option<DateTime<Utc>>,
99    pub version: Option<String>,
100    pub quality: DataQuality,
101    pub metadata: SessionMetadata,
102}
103
104// ─── Session Metadata ───────────────────────────────────────────────────────
105
106/// PR link info extracted from pr-link entries.
107#[derive(Debug, Clone, serde::Serialize)]
108pub struct PrLinkInfo {
109    pub number: u64,
110    pub url: String,
111    pub repository: String,
112}
113
114/// A committed context collapse event.
115#[derive(Debug, Clone)]
116pub struct CollapseCommit {
117    pub collapse_id: String,
118    pub summary: String,
119}
120
121/// Snapshot of context collapse risk state.
122#[derive(Debug, Clone)]
123pub struct CollapseSnapshot {
124    pub staged_count: usize,
125    pub avg_risk: f64,
126    pub max_risk: f64,
127    pub armed: bool,
128    pub last_spawn_tokens: u64,
129}
130
131/// Attribution data extracted from attribution-snapshot entries.
132#[derive(Debug, Clone, serde::Serialize)]
133pub struct AttributionData {
134    pub surface: String,
135    pub file_count: usize,
136    pub total_claude_contribution: u64,
137    pub prompt_count: Option<u64>,
138    pub escape_count: Option<u64>,
139    pub permission_prompt_count: Option<u64>,
140}
141
142/// Metadata collected from non-assistant/user entries during parsing.
143#[derive(Debug, Default, Clone)]
144pub struct SessionMetadata {
145    pub title: Option<String>, // custom-title > ai-title
146    pub tags: Vec<String>,
147    pub mode: Option<String>, // last-wins
148    pub pr_links: Vec<PrLinkInfo>,
149    pub speculation_accepts: usize,
150    pub speculation_time_saved_ms: f64,
151    pub queue_enqueues: usize,
152    pub queue_dequeues: usize,
153    pub api_error_count: usize,   // assistant entries with api_error/error
154    pub user_prompt_count: usize, // count of user entries
155    pub collapse_commits: Vec<CollapseCommit>,
156    pub collapse_snapshot: Option<CollapseSnapshot>,
157    pub attribution: Option<AttributionData>,
158}
159
160impl SessionData {
161    /// All API responses (main + agent), sorted by timestamp.
162    pub fn all_responses(&self) -> Vec<&ValidatedTurn> {
163        let mut all: Vec<&ValidatedTurn> =
164            self.turns.iter().chain(self.agent_turns.iter()).collect();
165        all.sort_by_key(|r| r.timestamp);
166        all
167    }
168
169    /// Total number of API responses (main + agent).
170    pub fn total_turn_count(&self) -> usize {
171        self.turns.len() + self.agent_turns.len()
172    }
173
174    /// Number of agent API responses.
175    pub fn agent_turn_count(&self) -> usize {
176        self.agent_turns.len()
177    }
178}
179
180/// Quality metrics for a single session file.
181#[derive(Debug, Default, Clone)]
182pub struct DataQuality {
183    pub total_lines: usize,
184    pub valid_turns: usize,
185    pub skipped_synthetic: usize,
186    pub skipped_sidechain: usize,
187    pub skipped_invalid: usize,
188    pub skipped_parse_error: usize,
189    pub duplicate_turns: usize,
190}
191
192/// Quality metrics aggregated across all session files.
193#[derive(Debug, Default, Clone, Serialize)]
194pub struct GlobalDataQuality {
195    pub total_session_files: usize,
196    pub total_agent_files: usize,
197    pub orphan_agents: usize,
198    pub total_valid_turns: usize,
199    pub total_skipped: usize,
200    pub time_range: Option<(DateTime<Utc>, DateTime<Utc>)>,
201}
202
203// ─── Tests ───────────────────────────────────────────────────────────────────
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208
209    #[test]
210    fn test_parse_assistant_message() {
211        let json = r#"{"parentUuid":"abc","isSidechain":false,"type":"assistant","uuid":"def","timestamp":"2026-03-16T13:51:35.912Z","message":{"model":"claude-opus-4-6","role":"assistant","stop_reason":"end_turn","usage":{"input_tokens":3,"cache_creation_input_tokens":1281,"cache_read_input_tokens":15204,"cache_creation":{"ephemeral_5m_input_tokens":1281,"ephemeral_1h_input_tokens":0},"output_tokens":108,"service_tier":"standard"},"content":[{"type":"text","text":"Hello"}]},"sessionId":"abc-123","version":"2.0.77","cwd":"/tmp","gitBranch":"main","userType":"external","requestId":"req_1"}"#;
212
213        let entry: cc_session_jsonl::types::Entry = serde_json::from_str(json).unwrap();
214
215        match entry {
216            cc_session_jsonl::types::Entry::Assistant(msg) => {
217                assert_eq!(msg.uuid.as_deref(), Some("def"));
218                assert_eq!(msg.session_id.as_deref(), Some("abc-123"));
219                assert_eq!(msg.request_id.as_deref(), Some("req_1"));
220                assert_eq!(msg.parent_uuid.as_deref(), Some("abc"));
221                assert_eq!(msg.is_sidechain, Some(false));
222
223                let api = msg.message.unwrap();
224                assert_eq!(api.model.as_deref(), Some("claude-opus-4-6"));
225                assert_eq!(api.stop_reason.as_deref(), Some("end_turn"));
226
227                let usage: TokenUsage = api.usage.unwrap().into();
228                assert_eq!(usage.input_tokens, Some(3));
229                assert_eq!(usage.output_tokens, Some(108));
230                assert_eq!(usage.cache_creation_input_tokens, Some(1281));
231                assert_eq!(usage.cache_read_input_tokens, Some(15204));
232                assert_eq!(usage.service_tier.as_deref(), Some("standard"));
233
234                let cache = usage.cache_creation.unwrap();
235                assert_eq!(cache.ephemeral_5m_input_tokens, Some(1281));
236                assert_eq!(cache.ephemeral_1h_input_tokens, Some(0));
237
238                let content = api.content.unwrap();
239                assert_eq!(content.len(), 1);
240                match &content[0] {
241                    cc_session_jsonl::types::ContentBlock::Text { text } => {
242                        assert_eq!(text.as_deref(), Some("Hello"));
243                    }
244                    _ => panic!("expected Text content block"),
245                }
246            }
247            _ => panic!("expected Assistant variant"),
248        }
249    }
250
251    #[test]
252    fn test_parse_user_message() {
253        let json = r#"{"parentUuid":null,"isSidechain":false,"type":"user","message":{"role":"user","content":[{"type":"text","text":"hello"}]},"uuid":"u1","timestamp":"2026-03-16T13:51:19.053Z","sessionId":"s1","version":"2.1.80","cwd":"/tmp","gitBranch":"main","userType":"external"}"#;
254
255        let entry: cc_session_jsonl::types::Entry = serde_json::from_str(json).unwrap();
256
257        match entry {
258            cc_session_jsonl::types::Entry::User(msg) => {
259                assert_eq!(msg.uuid.as_deref(), Some("u1"));
260                assert_eq!(msg.session_id.as_deref(), Some("s1"));
261                assert_eq!(msg.version.as_deref(), Some("2.1.80"));
262                assert_eq!(msg.cwd.as_deref(), Some("/tmp"));
263                assert_eq!(msg.git_branch.as_deref(), Some("main"));
264                assert!(msg.parent_uuid.is_none());
265            }
266            _ => panic!("expected User variant"),
267        }
268    }
269
270    #[test]
271    fn test_parse_queue_operation() {
272        let json = r#"{"type":"queue-operation","operation":"dequeue","timestamp":"2026-03-16T13:51:19.041Z","sessionId":"abc"}"#;
273
274        let entry: cc_session_jsonl::types::Entry = serde_json::from_str(json).unwrap();
275
276        match entry {
277            cc_session_jsonl::types::Entry::QueueOperation(val) => {
278                assert_eq!(val.operation.as_deref(), Some("dequeue"));
279                assert_eq!(val.session_id.as_deref(), Some("abc"));
280            }
281            _ => panic!("expected QueueOperation variant"),
282        }
283    }
284
285    #[test]
286    fn test_parse_progress_entry() {
287        // progress is not a named variant in cc-session-jsonl; it maps to Unknown
288        let json = r#"{"type":"progress","data":{"type":"hook_progress"},"uuid":"u1","timestamp":"2026-03-16T13:51:19.053Z","sessionId":"s1"}"#;
289        let entry: cc_session_jsonl::types::Entry = serde_json::from_str(json).unwrap();
290        assert!(matches!(entry, cc_session_jsonl::types::Entry::Unknown));
291    }
292
293    #[test]
294    fn test_parse_system_entry() {
295        let json = r#"{"type":"system","subtype":"turn_duration","durationMs":1234,"uuid":"u1","timestamp":"2026-03-16T13:51:19.053Z","sessionId":"s1"}"#;
296        let entry: cc_session_jsonl::types::Entry = serde_json::from_str(json).unwrap();
297        assert!(matches!(entry, cc_session_jsonl::types::Entry::System(_)));
298    }
299
300    #[test]
301    fn test_parse_unknown_entry_type() {
302        let json = r#"{"type":"some-future-type","data":"whatever","uuid":"u1","timestamp":"2026-03-16T13:51:19.053Z"}"#;
303        let entry: cc_session_jsonl::types::Entry = serde_json::from_str(json).unwrap();
304        assert!(matches!(entry, cc_session_jsonl::types::Entry::Unknown));
305    }
306
307    #[test]
308    fn test_parse_thinking_content_block() {
309        let json = r#"{"type":"assistant","uuid":"u1","timestamp":"2026-03-16T10:00:00Z","message":{"model":"claude-opus-4-6","role":"assistant","stop_reason":"end_turn","usage":{"input_tokens":3,"output_tokens":100,"cache_creation_input_tokens":500,"cache_read_input_tokens":10000},"content":[{"type":"thinking","thinking":"Let me analyze this...","signature":"abc123"},{"type":"text","text":"Here is my answer."}]},"sessionId":"s1","cwd":"/tmp","gitBranch":"","userType":"external","isSidechain":false,"parentUuid":null,"requestId":"r1"}"#;
310        let entry: cc_session_jsonl::types::Entry = serde_json::from_str(json).unwrap();
311        match entry {
312            cc_session_jsonl::types::Entry::Assistant(msg) => {
313                let content = msg.message.unwrap().content.unwrap();
314                assert_eq!(content.len(), 2);
315                assert!(
316                    matches!(&content[0], cc_session_jsonl::types::ContentBlock::Thinking { thinking: Some(t), .. } if t.contains("analyze"))
317                );
318                assert!(matches!(
319                    &content[1],
320                    cc_session_jsonl::types::ContentBlock::Text { .. }
321                ));
322            }
323            _ => panic!("expected Assistant variant"),
324        }
325    }
326
327    #[test]
328    fn test_parse_synthetic_message() {
329        let json = r#"{"type":"assistant","uuid":"x","timestamp":"2026-03-16T00:00:00Z","message":{"model":"<synthetic>","role":"assistant","stop_reason":"stop_sequence","usage":{"input_tokens":0,"output_tokens":0,"cache_creation_input_tokens":0,"cache_read_input_tokens":0},"content":[{"type":"text","text":"error"}]},"sessionId":"s1","cwd":"/tmp","gitBranch":"","userType":"external","isSidechain":false,"parentUuid":null}"#;
330
331        let entry: cc_session_jsonl::types::Entry = serde_json::from_str(json).unwrap();
332
333        match entry {
334            cc_session_jsonl::types::Entry::Assistant(msg) => {
335                let api = msg.message.unwrap();
336                assert_eq!(api.model.as_deref(), Some("<synthetic>"));
337                assert_eq!(api.stop_reason.as_deref(), Some("stop_sequence"));
338
339                let usage: TokenUsage = api.usage.unwrap().into();
340                assert_eq!(usage.input_tokens, Some(0));
341                assert_eq!(usage.output_tokens, Some(0));
342
343                // synthetic messages typically lack cache_creation detail
344                assert!(usage.cache_creation.is_none());
345            }
346            _ => panic!("expected Assistant variant"),
347        }
348    }
349
350    #[test]
351    fn test_token_usage_from_conversion() {
352        let lib_usage = cc_session_jsonl::types::Usage {
353            input_tokens: Some(100),
354            output_tokens: Some(200),
355            cache_creation_input_tokens: Some(50),
356            cache_read_input_tokens: Some(300),
357            cache_creation: Some(cc_session_jsonl::types::CacheCreation {
358                ephemeral_5m_input_tokens: Some(30),
359                ephemeral_1h_input_tokens: Some(20),
360            }),
361            server_tool_use: Some(cc_session_jsonl::types::ServerToolUse {
362                web_search_requests: Some(2),
363                web_fetch_requests: Some(1),
364            }),
365            service_tier: Some("standard".into()),
366            inference_geo: Some("us".into()), // dropped in conversion
367            iterations: None,                 // dropped in conversion
368            speed: Some("fast".into()),
369        };
370
371        let local: TokenUsage = lib_usage.into();
372        assert_eq!(local.input_tokens, Some(100));
373        assert_eq!(local.output_tokens, Some(200));
374        assert_eq!(local.cache_creation_input_tokens, Some(50));
375        assert_eq!(local.cache_read_input_tokens, Some(300));
376        assert_eq!(local.service_tier.as_deref(), Some("standard"));
377        assert_eq!(local.speed.as_deref(), Some("fast"));
378
379        let cache = local.cache_creation.unwrap();
380        assert_eq!(cache.ephemeral_5m_input_tokens, Some(30));
381        assert_eq!(cache.ephemeral_1h_input_tokens, Some(20));
382
383        let stu = local.server_tool_use.unwrap();
384        assert_eq!(stu.web_search_requests, Some(2));
385        assert_eq!(stu.web_fetch_requests, Some(1));
386    }
387}