cc_token_usage/analysis/
mod.rs

1pub mod heatmap;
2pub mod overview;
3pub mod project;
4pub mod session;
5pub mod trend;
6pub mod validate;
7pub mod wrapped;
8
9use crate::data::models::{
10    AttributionData, GlobalDataQuality, HookUsage, PluginUsage, PrLinkInfo, SkillUsage,
11    SubagentTypeAggregate, TokenUsage,
12};
13use chrono::{DateTime, NaiveDate, Utc};
14use serde::Serialize;
15use std::collections::HashMap;
16
17// ─── Common Aggregation ──────────────────────────────────────────────────────
18
19#[derive(Debug, Default, Clone, Serialize)]
20pub struct AggregatedTokens {
21    pub input_tokens: u64,
22    pub output_tokens: u64,
23    pub cache_creation_tokens: u64, // 保留总量
24    pub cache_write_5m_tokens: u64, // 5分钟TTL缓存写入
25    pub cache_write_1h_tokens: u64, // 1小时TTL缓存写入
26    pub cache_read_tokens: u64,
27    pub turns: usize,
28}
29
30impl AggregatedTokens {
31    pub fn add_usage(&mut self, usage: &TokenUsage) {
32        self.input_tokens += usage.input_tokens.unwrap_or(0);
33        self.output_tokens += usage.output_tokens.unwrap_or(0);
34        self.cache_creation_tokens += usage.cache_creation_input_tokens.unwrap_or(0);
35        self.cache_read_tokens += usage.cache_read_input_tokens.unwrap_or(0);
36
37        // Extract 5m/1h TTL breakdown from cache_creation detail
38        if let Some(ref detail) = usage.cache_creation {
39            self.cache_write_5m_tokens += detail.ephemeral_5m_input_tokens.unwrap_or(0);
40            self.cache_write_1h_tokens += detail.ephemeral_1h_input_tokens.unwrap_or(0);
41        }
42
43        self.turns += 1;
44    }
45
46    pub fn context_tokens(&self) -> u64 {
47        self.input_tokens + self.cache_creation_tokens + self.cache_read_tokens
48    }
49}
50
51// ─── Cost Breakdown ─────────────────────────────────────────────────────────
52
53#[derive(Debug, Clone, Default, Serialize)]
54pub struct TurnCostBreakdown {
55    pub input_cost: f64,
56    pub output_cost: f64,
57    pub cache_write_5m_cost: f64,
58    pub cache_write_1h_cost: f64,
59    pub cache_read_cost: f64,
60    pub total: f64,
61}
62
63#[derive(Debug, Default, Serialize)]
64pub struct CostByCategory {
65    pub input_cost: f64,
66    pub output_cost: f64,
67    pub cache_write_5m_cost: f64,
68    pub cache_write_1h_cost: f64,
69    pub cache_read_cost: f64,
70}
71
72// ─── Overview ────────────────────────────────────────────────────────────────
73
74pub struct OverviewResult {
75    pub total_sessions: usize,
76    pub total_turns: usize,
77    pub total_agent_turns: usize,
78    pub tokens_by_model: HashMap<String, AggregatedTokens>,
79    pub cost_by_model: HashMap<String, f64>,
80    pub total_cost: f64,
81    pub hourly_distribution: [usize; 24],
82    pub quality: GlobalDataQuality,
83    pub subscription_value: Option<SubscriptionValue>,
84    // 新增
85    pub weekday_hour_matrix: [[usize; 24]; 7], // [weekday][hour] -> turn count
86    pub tool_counts: Vec<(String, usize)>,     // 工具名 -> 使用次数，排序
87    pub cost_by_category: CostByCategory,      // 费用按类别分拆
88    pub session_summaries: Vec<SessionSummary>, // 所有 session 的汇总
89    pub total_output_tokens: u64,
90    pub total_context_tokens: u64,
91    pub avg_cache_hit_rate: f64,
92    pub cache_savings: CacheSavings,
93    // Efficiency metrics
94    pub output_ratio: f64,           // output / total input (as percentage)
95    pub cost_per_turn: f64,          // $/turn
96    pub tokens_per_output_turn: u64, // avg output tokens per turn
97    /// Models whose pricing was estimated via fallback to the latest built-in
98    /// Claude. One entry per distinct unknown model name. Empty when every
99    /// observed model has explicit pricing.
100    pub pricing_warnings: Vec<PricingWarning>,
101}
102
103/// One unknown-model fallback aggregated across all sessions.
104#[derive(Debug, Clone, Serialize)]
105pub struct PricingWarning {
106    /// The model name as it appeared in the JSONL (e.g. "claude-opus-4-8").
107    pub unknown_model: String,
108    /// The built-in entry whose prices were used as a stand-in.
109    pub fallback_to: String,
110    /// How many turns from this model contributed to the totals.
111    pub turn_count: u64,
112    /// Total estimated cost charged at the fallback's rates.
113    pub fallback_cost: f64,
114}
115
116/// How much money was saved by cache hits vs paying full input price.
117#[derive(Debug, Default, Serialize)]
118pub struct CacheSavings {
119    pub total_saved: f64,             // $ saved by cache reads
120    pub without_cache_cost: f64,      // hypothetical cost if all cache_read charged at base_input
121    pub with_cache_cost: f64,         // actual cache_read cost
122    pub savings_pct: f64,             // percentage saved
123    pub by_model: Vec<(String, f64)>, // model -> savings, sorted desc
124}
125
126#[derive(Debug, Serialize)]
127pub struct SubscriptionValue {
128    pub monthly_price: f64,
129    pub api_equivalent: f64,
130    pub value_multiplier: f64,
131}
132
133// ─── Project ─────────────────────────────────────────────────────────────────
134
135#[derive(Debug, Serialize)]
136pub struct ProjectResult {
137    pub projects: Vec<ProjectSummary>,
138}
139
140#[derive(Debug, Serialize)]
141pub struct ProjectSummary {
142    pub name: String,
143    pub display_name: String,
144    pub session_count: usize,
145    pub total_turns: usize,
146    pub agent_turns: usize,
147    pub tokens: AggregatedTokens,
148    pub cost: f64,
149    pub primary_model: String,
150}
151
152// ─── Session ─────────────────────────────────────────────────────────────────
153
154#[derive(Debug, Serialize)]
155pub struct SessionResult {
156    pub session_id: String,
157    pub project: String,
158    pub turn_details: Vec<TurnDetail>,
159    pub agent_summary: AgentSummary,
160    pub total_tokens: AggregatedTokens,
161    pub total_cost: f64,
162    pub stop_reason_counts: HashMap<String, usize>,
163    // 新增
164    pub duration_minutes: f64,
165    pub max_context: u64,
166    pub compaction_count: usize,
167    pub cache_write_5m_pct: f64, // 5m TTL 占比
168    pub cache_write_1h_pct: f64, // 1h TTL 占比
169    pub model: String,           // 主力模型
170    // ── Phase 1: Data mining metadata ──
171    pub title: Option<String>,
172    pub tags: Vec<String>,
173    pub mode: Option<String>,
174    pub pr_links: Vec<PrLinkInfo>,
175    // Autonomy
176    pub user_prompt_count: usize,
177    pub autonomy_ratio: f64, // total_turns / user_prompt_count
178    // Errors
179    pub api_error_count: usize,
180    pub tool_error_count: usize,
181    pub truncated_count: usize, // stop_reason == "max_tokens"
182    // Speculation
183    pub speculation_accepts: usize,
184    pub speculation_time_saved_ms: f64,
185    // Service info
186    pub service_tiers: HashMap<String, usize>,
187    pub speeds: HashMap<String, usize>,
188    pub inference_geos: HashMap<String, usize>,
189    // Git
190    pub git_branches: HashMap<String, usize>,
191    // Context Collapse
192    pub collapse_count: usize,
193    pub collapse_summaries: Vec<String>,
194    pub collapse_avg_risk: f64,
195    pub collapse_max_risk: f64,
196    // Attribution
197    pub attribution: Option<AttributionData>,
198    // Phase 2: session-level capability inventory (Claude Code 2.1.104+/2.1.138+).
199    // All empty for older sessions; renderers skip the row when empty.
200    pub subagents: Vec<SubagentSummary>,
201    pub plugins: Vec<PluginUsage>,
202    pub skills: Vec<SkillUsage>,
203    pub hooks: Vec<HookUsage>,
204    /// Subagents grouped by `agent_type` for chip rendering. Always
205    /// derivable from `subagents` (per-agent_id) but exposed as a stable,
206    /// pre-aggregated structure for the frontend / text renderer.
207    pub subagent_types: Vec<SubagentTypeAggregate>,
208    /// Workflow runs (`agent()` orchestrations, Claude Code 2.1.159+) discovered
209    /// for this session under `<uuid>/subagents/workflows/wf_<runId>/`. Each
210    /// entry combines the run's `wf_<runId>.json` snapshot with the actually
211    /// parsed token/cost/agent totals (summed from `subagents` whose
212    /// `workflow_run_id` matches). Empty for sessions with no workflow runs.
213    pub workflows: Vec<WorkflowSummary>,
214    /// Orphan session: scanner picked up subagent jsonl files whose parent
215    /// main session jsonl was deleted. Totals still include this session.
216    pub is_orphan: bool,
217}
218
219/// Summary of one workflow run within a session.
220///
221/// Combines two data sources:
222/// 1. **Declared** (from the `wf_<runId>.json` snapshot): `workflow_name`,
223///    `status`, `snapshot_duration_ms`, `snapshot_agent_count`,
224///    `snapshot_total_tokens`, `phases`. These are what Claude Code itself
225///    recorded for the run; they may be absent if the snapshot is missing.
226/// 2. **Measured** (re-aggregated from the parsed `agent-*.jsonl` transcripts
227///    whose `workflow_run_id == run_id`): `parsed_agent_count`, `parsed_turns`,
228///    `parsed_output_tokens`, `parsed_cost`. These are the ground-truth numbers
229///    the rest of the tool charges into session/overview totals.
230///
231/// Comparing the snapshot's `snapshot_total_tokens` with `parsed_*` lets the
232/// validator confirm workflow tokens are not lost (see `validate.rs`).
233///
234/// Serializes to camelCase — this is part of the frontend data contract.
235#[derive(Debug, Clone, Serialize)]
236#[serde(rename_all = "camelCase")]
237pub struct WorkflowSummary {
238    /// The workflow run id, e.g. `wf_7c0e6255-566`.
239    pub run_id: String,
240    /// Human-readable workflow name from the snapshot (e.g. `code-review`).
241    pub workflow_name: Option<String>,
242    /// Run status from the snapshot, e.g. `completed`, `running`, `failed`.
243    pub status: Option<String>,
244    /// Wall-clock duration of the run in milliseconds, from the snapshot.
245    pub snapshot_duration_ms: Option<u64>,
246    /// Agent count as reported by the snapshot (`agentCount`).
247    pub snapshot_agent_count: Option<u64>,
248    /// Aggregate token count as reported by the snapshot (`totalTokens`).
249    pub snapshot_total_tokens: Option<u64>,
250    /// Declared phases of the workflow (title + detail), from the snapshot.
251    pub phases: Vec<WorkflowPhaseSummary>,
252    /// Number of agent transcripts actually parsed for this run.
253    pub parsed_agent_count: usize,
254    /// Total parsed assistant turns across this run's agents.
255    pub parsed_turns: usize,
256    /// Total parsed output tokens across this run's agents.
257    pub parsed_output_tokens: u64,
258    /// Total cost (USD) charged for this run's parsed turns.
259    pub parsed_cost: f64,
260}
261
262/// One declared workflow phase, surfaced for display.
263///
264/// Serializes to camelCase — part of the frontend data contract.
265#[derive(Debug, Clone, Serialize)]
266#[serde(rename_all = "camelCase")]
267pub struct WorkflowPhaseSummary {
268    pub title: Option<String>,
269    pub detail: Option<String>,
270}
271
272/// One subagent's roll-up for the session detail view.
273///
274/// Distinct from the previous `AgentDetail` (which keyed off `turn.agent_id`):
275/// this is keyed off the **file-level grouping** (one entry per agent JSONL).
276#[derive(Debug, Serialize, Clone)]
277pub struct SubagentSummary {
278    pub agent_id: String,
279    pub agent_type: Option<String>,
280    pub description: Option<String>,
281    pub turns: usize,
282    pub output_tokens: u64,
283    pub cost: f64,
284}
285
286#[derive(Debug, Serialize)]
287pub struct TurnDetail {
288    pub turn_number: usize,
289    pub timestamp: DateTime<Utc>,
290    pub model: String,
291    pub input_tokens: u64,
292    pub output_tokens: u64,
293    pub cache_write_5m_tokens: u64, // 5分钟TTL缓存写入
294    pub cache_write_1h_tokens: u64, // 1小时TTL缓存写入
295    pub cache_read_tokens: u64,
296    pub context_size: u64,
297    pub cache_hit_rate: f64,
298    pub cost: f64,
299    pub cost_breakdown: TurnCostBreakdown, // 费用分拆
300    pub stop_reason: Option<String>,
301    pub is_agent: bool,
302    pub is_compaction: bool,            // 是否是 compaction 事件
303    pub context_delta: i64,             // 与上一 turn 的 context 变化
304    pub user_text: Option<String>,      // 用户消息文本
305    pub assistant_text: Option<String>, // 模型回复文本
306    pub tool_names: Vec<String>,        // 使用的工具名
307}
308
309#[derive(Debug, Default, Serialize)]
310pub struct AgentSummary {
311    pub total_agent_turns: usize,
312    pub agent_output_tokens: u64,
313    pub agent_cost: f64,
314    pub agents: Vec<AgentDetail>,
315}
316
317#[derive(Debug, Serialize)]
318pub struct AgentDetail {
319    pub agent_id: String,
320    pub agent_type: String,
321    pub description: String,
322    pub turns: usize,
323    pub output_tokens: u64,
324    pub cost: f64,
325}
326
327// ─── Session Summary ────────────────────────────────────────────────────────
328
329/// Session-level summary for overview reports and session ranking tables.
330#[derive(Debug, Serialize)]
331pub struct SessionSummary {
332    pub session_id: String,
333    pub project_display_name: String,
334    /// Human-readable title (`custom-title` > `ai-title`), if Claude Code
335    /// recorded one. Mirrors `HtmlSessionSummary.title` so the standalone JSON
336    /// export carries the same field as the HTML dashboard payload.
337    pub title: Option<String>,
338    pub first_timestamp: Option<DateTime<Utc>>,
339    pub duration_minutes: f64,
340    pub model: String, // 主要使用的模型
341    pub turn_count: usize,
342    pub agent_turn_count: usize,
343    pub output_tokens: u64,
344    pub context_tokens: u64,
345    pub max_context: u64,
346    pub cache_hit_rate: f64,     // 平均
347    pub cache_write_5m_pct: f64, // 5m TTL 占比
348    pub compaction_count: usize,
349    pub cost: f64,
350    pub tool_use_count: usize,           // tool_use stop_reason 的次数
351    pub top_tools: Vec<(String, usize)>, // 工具名 -> 使用次数，前5
352    pub turn_details: Option<Vec<TurnDetail>>, // 仅 top sessions 有详情
353    // Efficiency metrics
354    pub output_ratio: f64,  // output / total context (as percentage)
355    pub cost_per_turn: f64, // $/turn
356    /// True for sessions reconstructed only from subagent files (parent
357    /// jsonl deleted). Totals still include these.
358    pub is_orphan: bool,
359}
360
361// ─── Trend ───────────────────────────────────────────────────────────────────
362
363#[derive(Debug, Serialize)]
364pub struct TrendResult {
365    pub entries: Vec<TrendEntry>,
366    pub group_label: String, // "Day" or "Month"
367}
368
369#[derive(Debug, Serialize)]
370pub struct TrendEntry {
371    pub label: String, // "2026-03-15" or "2026-03"
372    pub date: NaiveDate,
373    pub session_count: usize,
374    pub turn_count: usize,
375    pub tokens: AggregatedTokens,
376    pub cost: f64,
377    pub models: HashMap<String, u64>,
378    // 新增
379    pub cost_by_category: CostByCategory,
380}
381
382// Keep DailyStats as alias for internal use
383pub type DailyStats = TrendEntry;
cc_token_usage/analysis/mod.rs

cc_token_usage/analysis/
mod.rs