Skip to main content

cc_token_usage/analysis/
mod.rs

1pub mod heatmap;
2pub mod overview;
3pub mod project;
4pub mod session;
5pub mod trend;
6pub mod validate;
7pub mod wrapped;
8
9use crate::data::models::{
10    AttributionData, GlobalDataQuality, HookUsage, PluginUsage, PrLinkInfo, SkillUsage,
11    SubagentTypeAggregate, TokenUsage,
12};
13use chrono::{DateTime, NaiveDate, Utc};
14use serde::Serialize;
15use std::collections::HashMap;
16
17// ─── Common Aggregation ──────────────────────────────────────────────────────
18
19#[derive(Debug, Default, Clone, Serialize)]
20pub struct AggregatedTokens {
21    pub input_tokens: u64,
22    pub output_tokens: u64,
23    pub cache_creation_tokens: u64, // 保留总量
24    pub cache_write_5m_tokens: u64, // 5分钟TTL缓存写入
25    pub cache_write_1h_tokens: u64, // 1小时TTL缓存写入
26    pub cache_read_tokens: u64,
27    pub turns: usize,
28}
29
30impl AggregatedTokens {
31    pub fn add_usage(&mut self, usage: &TokenUsage) {
32        self.input_tokens += usage.input_tokens.unwrap_or(0);
33        self.output_tokens += usage.output_tokens.unwrap_or(0);
34        self.cache_creation_tokens += usage.cache_creation_input_tokens.unwrap_or(0);
35        self.cache_read_tokens += usage.cache_read_input_tokens.unwrap_or(0);
36
37        // Extract 5m/1h TTL breakdown from cache_creation detail
38        if let Some(ref detail) = usage.cache_creation {
39            self.cache_write_5m_tokens += detail.ephemeral_5m_input_tokens.unwrap_or(0);
40            self.cache_write_1h_tokens += detail.ephemeral_1h_input_tokens.unwrap_or(0);
41        }
42
43        self.turns += 1;
44    }
45
46    pub fn context_tokens(&self) -> u64 {
47        self.input_tokens + self.cache_creation_tokens + self.cache_read_tokens
48    }
49}
50
51// ─── Cost Breakdown ─────────────────────────────────────────────────────────
52
53#[derive(Debug, Clone, Default, Serialize)]
54pub struct TurnCostBreakdown {
55    pub input_cost: f64,
56    pub output_cost: f64,
57    pub cache_write_5m_cost: f64,
58    pub cache_write_1h_cost: f64,
59    pub cache_read_cost: f64,
60    pub total: f64,
61}
62
63#[derive(Debug, Default, Serialize)]
64pub struct CostByCategory {
65    pub input_cost: f64,
66    pub output_cost: f64,
67    pub cache_write_5m_cost: f64,
68    pub cache_write_1h_cost: f64,
69    pub cache_read_cost: f64,
70}
71
72// ─── Overview ────────────────────────────────────────────────────────────────
73
74pub struct OverviewResult {
75    pub total_sessions: usize,
76    pub total_turns: usize,
77    pub total_agent_turns: usize,
78    pub tokens_by_model: HashMap<String, AggregatedTokens>,
79    pub cost_by_model: HashMap<String, f64>,
80    pub total_cost: f64,
81    pub hourly_distribution: [usize; 24],
82    pub quality: GlobalDataQuality,
83    pub subscription_value: Option<SubscriptionValue>,
84    // 新增
85    pub weekday_hour_matrix: [[usize; 24]; 7], // [weekday][hour] -> turn count
86    pub tool_counts: Vec<(String, usize)>,     // 工具名 -> 使用次数,排序
87    pub cost_by_category: CostByCategory,      // 费用按类别分拆
88    pub session_summaries: Vec<SessionSummary>, // 所有 session 的汇总
89    pub total_output_tokens: u64,
90    pub total_context_tokens: u64,
91    pub avg_cache_hit_rate: f64,
92    pub cache_savings: CacheSavings,
93    // Efficiency metrics
94    pub output_ratio: f64,           // output / total input (as percentage)
95    pub cost_per_turn: f64,          // $/turn
96    pub tokens_per_output_turn: u64, // avg output tokens per turn
97    /// Models whose pricing was estimated via fallback to the latest built-in
98    /// Claude. One entry per distinct unknown model name. Empty when every
99    /// observed model has explicit pricing.
100    pub pricing_warnings: Vec<PricingWarning>,
101}
102
103/// One unknown-model fallback aggregated across all sessions.
104#[derive(Debug, Clone, Serialize)]
105pub struct PricingWarning {
106    /// The model name as it appeared in the JSONL (e.g. "claude-opus-4-8").
107    pub unknown_model: String,
108    /// The built-in entry whose prices were used as a stand-in.
109    pub fallback_to: String,
110    /// How many turns from this model contributed to the totals.
111    pub turn_count: u64,
112    /// Total estimated cost charged at the fallback's rates.
113    pub fallback_cost: f64,
114}
115
116/// How much money was saved by cache hits vs paying full input price.
117#[derive(Debug, Default, Serialize)]
118pub struct CacheSavings {
119    pub total_saved: f64,             // $ saved by cache reads
120    pub without_cache_cost: f64,      // hypothetical cost if all cache_read charged at base_input
121    pub with_cache_cost: f64,         // actual cache_read cost
122    pub savings_pct: f64,             // percentage saved
123    pub by_model: Vec<(String, f64)>, // model -> savings, sorted desc
124}
125
126#[derive(Debug, Serialize)]
127pub struct SubscriptionValue {
128    pub monthly_price: f64,
129    pub api_equivalent: f64,
130    pub value_multiplier: f64,
131}
132
133// ─── Project ─────────────────────────────────────────────────────────────────
134
135#[derive(Debug, Serialize)]
136pub struct ProjectResult {
137    pub projects: Vec<ProjectSummary>,
138}
139
140#[derive(Debug, Serialize)]
141pub struct ProjectSummary {
142    pub name: String,
143    pub display_name: String,
144    pub session_count: usize,
145    pub total_turns: usize,
146    pub agent_turns: usize,
147    pub tokens: AggregatedTokens,
148    pub cost: f64,
149    pub primary_model: String,
150}
151
152// ─── Session ─────────────────────────────────────────────────────────────────
153
154#[derive(Debug, Serialize)]
155pub struct SessionResult {
156    pub session_id: String,
157    pub project: String,
158    pub turn_details: Vec<TurnDetail>,
159    pub agent_summary: AgentSummary,
160    pub total_tokens: AggregatedTokens,
161    pub total_cost: f64,
162    pub stop_reason_counts: HashMap<String, usize>,
163    // 新增
164    pub duration_minutes: f64,
165    pub max_context: u64,
166    pub compaction_count: usize,
167    pub cache_write_5m_pct: f64, // 5m TTL 占比
168    pub cache_write_1h_pct: f64, // 1h TTL 占比
169    pub model: String,           // 主力模型
170    // ── Phase 1: Data mining metadata ──
171    pub title: Option<String>,
172    pub tags: Vec<String>,
173    pub mode: Option<String>,
174    pub pr_links: Vec<PrLinkInfo>,
175    // Autonomy
176    pub user_prompt_count: usize,
177    pub autonomy_ratio: f64, // total_turns / user_prompt_count
178    // Errors
179    pub api_error_count: usize,
180    pub tool_error_count: usize,
181    pub truncated_count: usize, // stop_reason == "max_tokens"
182    // Speculation
183    pub speculation_accepts: usize,
184    pub speculation_time_saved_ms: f64,
185    // Service info
186    pub service_tiers: HashMap<String, usize>,
187    pub speeds: HashMap<String, usize>,
188    pub inference_geos: HashMap<String, usize>,
189    // Git
190    pub git_branches: HashMap<String, usize>,
191    // Context Collapse
192    pub collapse_count: usize,
193    pub collapse_summaries: Vec<String>,
194    pub collapse_avg_risk: f64,
195    pub collapse_max_risk: f64,
196    // Attribution
197    pub attribution: Option<AttributionData>,
198    // Phase 2: session-level capability inventory (Claude Code 2.1.104+/2.1.138+).
199    // All empty for older sessions; renderers skip the row when empty.
200    pub subagents: Vec<SubagentSummary>,
201    pub plugins: Vec<PluginUsage>,
202    pub skills: Vec<SkillUsage>,
203    pub hooks: Vec<HookUsage>,
204    /// Subagents grouped by `agent_type` for chip rendering. Always
205    /// derivable from `subagents` (per-agent_id) but exposed as a stable,
206    /// pre-aggregated structure for the frontend / text renderer.
207    pub subagent_types: Vec<SubagentTypeAggregate>,
208    /// Orphan session: scanner picked up subagent jsonl files whose parent
209    /// main session jsonl was deleted. Totals still include this session.
210    pub is_orphan: bool,
211}
212
213/// One subagent's roll-up for the session detail view.
214///
215/// Distinct from the previous `AgentDetail` (which keyed off `turn.agent_id`):
216/// this is keyed off the **file-level grouping** (one entry per agent JSONL).
217#[derive(Debug, Serialize, Clone)]
218pub struct SubagentSummary {
219    pub agent_id: String,
220    pub agent_type: Option<String>,
221    pub description: Option<String>,
222    pub turns: usize,
223    pub output_tokens: u64,
224    pub cost: f64,
225}
226
227#[derive(Debug, Serialize)]
228pub struct TurnDetail {
229    pub turn_number: usize,
230    pub timestamp: DateTime<Utc>,
231    pub model: String,
232    pub input_tokens: u64,
233    pub output_tokens: u64,
234    pub cache_write_5m_tokens: u64, // 5分钟TTL缓存写入
235    pub cache_write_1h_tokens: u64, // 1小时TTL缓存写入
236    pub cache_read_tokens: u64,
237    pub context_size: u64,
238    pub cache_hit_rate: f64,
239    pub cost: f64,
240    pub cost_breakdown: TurnCostBreakdown, // 费用分拆
241    pub stop_reason: Option<String>,
242    pub is_agent: bool,
243    pub is_compaction: bool,            // 是否是 compaction 事件
244    pub context_delta: i64,             // 与上一 turn 的 context 变化
245    pub user_text: Option<String>,      // 用户消息文本
246    pub assistant_text: Option<String>, // 模型回复文本
247    pub tool_names: Vec<String>,        // 使用的工具名
248}
249
250#[derive(Debug, Default, Serialize)]
251pub struct AgentSummary {
252    pub total_agent_turns: usize,
253    pub agent_output_tokens: u64,
254    pub agent_cost: f64,
255    pub agents: Vec<AgentDetail>,
256}
257
258#[derive(Debug, Serialize)]
259pub struct AgentDetail {
260    pub agent_id: String,
261    pub agent_type: String,
262    pub description: String,
263    pub turns: usize,
264    pub output_tokens: u64,
265    pub cost: f64,
266}
267
268// ─── Session Summary ────────────────────────────────────────────────────────
269
270/// Session-level summary for overview reports and session ranking tables.
271#[derive(Debug, Serialize)]
272pub struct SessionSummary {
273    pub session_id: String,
274    pub project_display_name: String,
275    pub first_timestamp: Option<DateTime<Utc>>,
276    pub duration_minutes: f64,
277    pub model: String, // 主要使用的模型
278    pub turn_count: usize,
279    pub agent_turn_count: usize,
280    pub output_tokens: u64,
281    pub context_tokens: u64,
282    pub max_context: u64,
283    pub cache_hit_rate: f64,     // 平均
284    pub cache_write_5m_pct: f64, // 5m TTL 占比
285    pub compaction_count: usize,
286    pub cost: f64,
287    pub tool_use_count: usize,           // tool_use stop_reason 的次数
288    pub top_tools: Vec<(String, usize)>, // 工具名 -> 使用次数,前5
289    pub turn_details: Option<Vec<TurnDetail>>, // 仅 top sessions 有详情
290    // Efficiency metrics
291    pub output_ratio: f64,  // output / total context (as percentage)
292    pub cost_per_turn: f64, // $/turn
293    /// True for sessions reconstructed only from subagent files (parent
294    /// jsonl deleted). Totals still include these.
295    pub is_orphan: bool,
296}
297
298// ─── Trend ───────────────────────────────────────────────────────────────────
299
300#[derive(Debug, Serialize)]
301pub struct TrendResult {
302    pub entries: Vec<TrendEntry>,
303    pub group_label: String, // "Day" or "Month"
304}
305
306#[derive(Debug, Serialize)]
307pub struct TrendEntry {
308    pub label: String, // "2026-03-15" or "2026-03"
309    pub date: NaiveDate,
310    pub session_count: usize,
311    pub turn_count: usize,
312    pub tokens: AggregatedTokens,
313    pub cost: f64,
314    pub models: HashMap<String, u64>,
315    // 新增
316    pub cost_by_category: CostByCategory,
317}
318
319// Keep DailyStats as alias for internal use
320pub type DailyStats = TrendEntry;