cc-token-usage 3.1.1

pub mod heatmap;
pub mod overview;
pub mod project;
pub mod session;
pub mod trend;
pub mod validate;
pub mod wrapped;

use crate::data::models::{
    AttributionData, GlobalDataQuality, HookUsage, PluginUsage, PrLinkInfo, SkillUsage,
    SubagentTypeAggregate, TokenUsage,
};
use chrono::{DateTime, NaiveDate, Utc};
use serde::Serialize;
use std::collections::HashMap;

// ─── Common Aggregation ──────────────────────────────────────────────────────

#[derive(Debug, Default, Clone, Serialize)]
pub struct AggregatedTokens {
    pub input_tokens: u64,
    pub output_tokens: u64,
    pub cache_creation_tokens: u64, // 保留总量
    pub cache_write_5m_tokens: u64, // 5分钟TTL缓存写入
    pub cache_write_1h_tokens: u64, // 1小时TTL缓存写入
    pub cache_read_tokens: u64,
    pub turns: usize,
}

impl AggregatedTokens {
    pub fn add_usage(&mut self, usage: &TokenUsage) {
        self.input_tokens += usage.input_tokens.unwrap_or(0);
        self.output_tokens += usage.output_tokens.unwrap_or(0);
        self.cache_creation_tokens += usage.cache_creation_input_tokens.unwrap_or(0);
        self.cache_read_tokens += usage.cache_read_input_tokens.unwrap_or(0);

        // Extract 5m/1h TTL breakdown from cache_creation detail
        if let Some(ref detail) = usage.cache_creation {
            self.cache_write_5m_tokens += detail.ephemeral_5m_input_tokens.unwrap_or(0);
            self.cache_write_1h_tokens += detail.ephemeral_1h_input_tokens.unwrap_or(0);
        }

        self.turns += 1;
    }

    pub fn context_tokens(&self) -> u64 {
        self.input_tokens + self.cache_creation_tokens + self.cache_read_tokens
    }
}

// ─── Cost Breakdown ─────────────────────────────────────────────────────────

#[derive(Debug, Clone, Default, Serialize)]
pub struct TurnCostBreakdown {
    pub input_cost: f64,
    pub output_cost: f64,
    pub cache_write_5m_cost: f64,
    pub cache_write_1h_cost: f64,
    pub cache_read_cost: f64,
    pub total: f64,
}

#[derive(Debug, Default, Serialize)]
pub struct CostByCategory {
    pub input_cost: f64,
    pub output_cost: f64,
    pub cache_write_5m_cost: f64,
    pub cache_write_1h_cost: f64,
    pub cache_read_cost: f64,
}

// ─── Overview ────────────────────────────────────────────────────────────────

pub struct OverviewResult {
    pub total_sessions: usize,
    pub total_turns: usize,
    pub total_agent_turns: usize,
    pub tokens_by_model: HashMap<String, AggregatedTokens>,
    pub cost_by_model: HashMap<String, f64>,
    pub total_cost: f64,
    pub hourly_distribution: [usize; 24],
    pub quality: GlobalDataQuality,
    pub subscription_value: Option<SubscriptionValue>,
    // 新增
    pub weekday_hour_matrix: [[usize; 24]; 7], // [weekday][hour] -> turn count
    pub tool_counts: Vec<(String, usize)>,     // 工具名 -> 使用次数，排序
    pub cost_by_category: CostByCategory,      // 费用按类别分拆
    pub session_summaries: Vec<SessionSummary>, // 所有 session 的汇总
    pub total_output_tokens: u64,
    pub total_context_tokens: u64,
    pub avg_cache_hit_rate: f64,
    pub cache_savings: CacheSavings,
    // Efficiency metrics
    pub output_ratio: f64,           // output / total input (as percentage)
    pub cost_per_turn: f64,          // $/turn
    pub tokens_per_output_turn: u64, // avg output tokens per turn
    /// Models whose pricing was estimated via fallback to the latest built-in
    /// Claude. One entry per distinct unknown model name. Empty when every
    /// observed model has explicit pricing.
    pub pricing_warnings: Vec<PricingWarning>,
}

/// One unknown-model fallback aggregated across all sessions.
#[derive(Debug, Clone, Serialize)]
pub struct PricingWarning {
    /// The model name as it appeared in the JSONL (e.g. "claude-opus-4-8").
    pub unknown_model: String,
    /// The built-in entry whose prices were used as a stand-in.
    pub fallback_to: String,
    /// How many turns from this model contributed to the totals.
    pub turn_count: u64,
    /// Total estimated cost charged at the fallback's rates.
    pub fallback_cost: f64,
}

/// How much money was saved by cache hits vs paying full input price.
#[derive(Debug, Default, Serialize)]
pub struct CacheSavings {
    pub total_saved: f64,             // $ saved by cache reads
    pub without_cache_cost: f64,      // hypothetical cost if all cache_read charged at base_input
    pub with_cache_cost: f64,         // actual cache_read cost
    pub savings_pct: f64,             // percentage saved
    pub by_model: Vec<(String, f64)>, // model -> savings, sorted desc
}

#[derive(Debug, Serialize)]
pub struct SubscriptionValue {
    pub monthly_price: f64,
    pub api_equivalent: f64,
    pub value_multiplier: f64,
}

// ─── Project ─────────────────────────────────────────────────────────────────

#[derive(Debug, Serialize)]
pub struct ProjectResult {
    pub projects: Vec<ProjectSummary>,
}

#[derive(Debug, Serialize)]
pub struct ProjectSummary {
    pub name: String,
    pub display_name: String,
    pub session_count: usize,
    pub total_turns: usize,
    pub agent_turns: usize,
    pub tokens: AggregatedTokens,
    pub cost: f64,
    pub primary_model: String,
}

// ─── Session ─────────────────────────────────────────────────────────────────

#[derive(Debug, Serialize)]
pub struct SessionResult {
    pub session_id: String,
    pub project: String,
    pub turn_details: Vec<TurnDetail>,
    pub agent_summary: AgentSummary,
    pub total_tokens: AggregatedTokens,
    pub total_cost: f64,
    pub stop_reason_counts: HashMap<String, usize>,
    // 新增
    pub duration_minutes: f64,
    pub max_context: u64,
    pub compaction_count: usize,
    pub cache_write_5m_pct: f64, // 5m TTL 占比
    pub cache_write_1h_pct: f64, // 1h TTL 占比
    pub model: String,           // 主力模型
    // ── Phase 1: Data mining metadata ──
    pub title: Option<String>,
    pub tags: Vec<String>,
    pub mode: Option<String>,
    pub pr_links: Vec<PrLinkInfo>,
    // Autonomy
    pub user_prompt_count: usize,
    pub autonomy_ratio: f64, // total_turns / user_prompt_count
    // Errors
    pub api_error_count: usize,
    pub tool_error_count: usize,
    pub truncated_count: usize, // stop_reason == "max_tokens"
    // Speculation
    pub speculation_accepts: usize,
    pub speculation_time_saved_ms: f64,
    // Service info
    pub service_tiers: HashMap<String, usize>,
    pub speeds: HashMap<String, usize>,
    pub inference_geos: HashMap<String, usize>,
    // Git
    pub git_branches: HashMap<String, usize>,
    // Context Collapse
    pub collapse_count: usize,
    pub collapse_summaries: Vec<String>,
    pub collapse_avg_risk: f64,
    pub collapse_max_risk: f64,
    // Attribution
    pub attribution: Option<AttributionData>,
    // Phase 2: session-level capability inventory (Claude Code 2.1.104+/2.1.138+).
    // All empty for older sessions; renderers skip the row when empty.
    pub subagents: Vec<SubagentSummary>,
    pub plugins: Vec<PluginUsage>,
    pub skills: Vec<SkillUsage>,
    pub hooks: Vec<HookUsage>,
    /// Subagents grouped by `agent_type` for chip rendering. Always
    /// derivable from `subagents` (per-agent_id) but exposed as a stable,
    /// pre-aggregated structure for the frontend / text renderer.
    pub subagent_types: Vec<SubagentTypeAggregate>,
    /// Workflow runs (`agent()` orchestrations, Claude Code 2.1.159+) discovered
    /// for this session under `<uuid>/subagents/workflows/wf_<runId>/`. Each
    /// entry combines the run's `wf_<runId>.json` snapshot with the actually
    /// parsed token/cost/agent totals (summed from `subagents` whose
    /// `workflow_run_id` matches). Empty for sessions with no workflow runs.
    pub workflows: Vec<WorkflowSummary>,
    /// Orphan session: scanner picked up subagent jsonl files whose parent
    /// main session jsonl was deleted. Totals still include this session.
    pub is_orphan: bool,
}

/// Summary of one workflow run within a session.
///
/// Combines two data sources:
/// 1. **Declared** (from the `wf_<runId>.json` snapshot): `workflow_name`,
///    `status`, `snapshot_duration_ms`, `snapshot_agent_count`,
///    `snapshot_total_tokens`, `phases`. These are what Claude Code itself
///    recorded for the run; they may be absent if the snapshot is missing.
/// 2. **Measured** (re-aggregated from the parsed `agent-*.jsonl` transcripts
///    whose `workflow_run_id == run_id`): `parsed_agent_count`, `parsed_turns`,
///    `parsed_output_tokens`, `parsed_cost`. These are the ground-truth numbers
///    the rest of the tool charges into session/overview totals.
///
/// Comparing the snapshot's `snapshot_total_tokens` with `parsed_*` lets the
/// validator confirm workflow tokens are not lost (see `validate.rs`).
///
/// Serializes to camelCase — this is part of the frontend data contract.
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct WorkflowSummary {
    /// The workflow run id, e.g. `wf_7c0e6255-566`.
    pub run_id: String,
    /// Human-readable workflow name from the snapshot (e.g. `code-review`).
    pub workflow_name: Option<String>,
    /// Run status from the snapshot, e.g. `completed`, `running`, `failed`.
    pub status: Option<String>,
    /// Wall-clock duration of the run in milliseconds, from the snapshot.
    pub snapshot_duration_ms: Option<u64>,
    /// Agent count as reported by the snapshot (`agentCount`).
    pub snapshot_agent_count: Option<u64>,
    /// Aggregate token count as reported by the snapshot (`totalTokens`).
    pub snapshot_total_tokens: Option<u64>,
    /// Declared phases of the workflow (title + detail), from the snapshot.
    pub phases: Vec<WorkflowPhaseSummary>,
    /// Number of agent transcripts actually parsed for this run.
    pub parsed_agent_count: usize,
    /// Total parsed assistant turns across this run's agents.
    pub parsed_turns: usize,
    /// Total parsed output tokens across this run's agents.
    pub parsed_output_tokens: u64,
    /// Total cost (USD) charged for this run's parsed turns.
    pub parsed_cost: f64,
}

/// One declared workflow phase, surfaced for display.
///
/// Serializes to camelCase — part of the frontend data contract.
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct WorkflowPhaseSummary {
    pub title: Option<String>,
    pub detail: Option<String>,
}

/// One subagent's roll-up for the session detail view.
///
/// Distinct from the previous `AgentDetail` (which keyed off `turn.agent_id`):
/// this is keyed off the **file-level grouping** (one entry per agent JSONL).
#[derive(Debug, Serialize, Clone)]
pub struct SubagentSummary {
    pub agent_id: String,
    pub agent_type: Option<String>,
    pub description: Option<String>,
    pub turns: usize,
    pub output_tokens: u64,
    pub cost: f64,
}

#[derive(Debug, Serialize)]
pub struct TurnDetail {
    pub turn_number: usize,
    pub timestamp: DateTime<Utc>,
    pub model: String,
    pub input_tokens: u64,
    pub output_tokens: u64,
    pub cache_write_5m_tokens: u64, // 5分钟TTL缓存写入
    pub cache_write_1h_tokens: u64, // 1小时TTL缓存写入
    pub cache_read_tokens: u64,
    pub context_size: u64,
    pub cache_hit_rate: f64,
    pub cost: f64,
    pub cost_breakdown: TurnCostBreakdown, // 费用分拆
    pub stop_reason: Option<String>,
    pub is_agent: bool,
    pub is_compaction: bool,            // 是否是 compaction 事件
    pub context_delta: i64,             // 与上一 turn 的 context 变化
    pub user_text: Option<String>,      // 用户消息文本
    pub assistant_text: Option<String>, // 模型回复文本
    pub tool_names: Vec<String>,        // 使用的工具名
}

#[derive(Debug, Default, Serialize)]
pub struct AgentSummary {
    pub total_agent_turns: usize,
    pub agent_output_tokens: u64,
    pub agent_cost: f64,
    pub agents: Vec<AgentDetail>,
}

#[derive(Debug, Serialize)]
pub struct AgentDetail {
    pub agent_id: String,
    pub agent_type: String,
    pub description: String,
    pub turns: usize,
    pub output_tokens: u64,
    pub cost: f64,
}

// ─── Session Summary ────────────────────────────────────────────────────────

/// Session-level summary for overview reports and session ranking tables.
#[derive(Debug, Serialize)]
pub struct SessionSummary {
    pub session_id: String,
    pub project_display_name: String,
    /// Human-readable title (`custom-title` > `ai-title`), if Claude Code
    /// recorded one. Mirrors `HtmlSessionSummary.title` so the standalone JSON
    /// export carries the same field as the HTML dashboard payload.
    pub title: Option<String>,
    pub first_timestamp: Option<DateTime<Utc>>,
    pub duration_minutes: f64,
    pub model: String, // 主要使用的模型
    pub turn_count: usize,
    pub agent_turn_count: usize,
    pub output_tokens: u64,
    pub context_tokens: u64,
    pub max_context: u64,
    pub cache_hit_rate: f64,     // 平均
    pub cache_write_5m_pct: f64, // 5m TTL 占比
    pub compaction_count: usize,
    pub cost: f64,
    pub tool_use_count: usize,           // tool_use stop_reason 的次数
    pub top_tools: Vec<(String, usize)>, // 工具名 -> 使用次数，前5
    pub turn_details: Option<Vec<TurnDetail>>, // 仅 top sessions 有详情
    // Efficiency metrics
    pub output_ratio: f64,  // output / total context (as percentage)
    pub cost_per_turn: f64, // $/turn
    /// True for sessions reconstructed only from subagent files (parent
    /// jsonl deleted). Totals still include these.
    pub is_orphan: bool,
}

// ─── Trend ───────────────────────────────────────────────────────────────────

#[derive(Debug, Serialize)]
pub struct TrendResult {
    pub entries: Vec<TrendEntry>,
    pub group_label: String, // "Day" or "Month"
}

#[derive(Debug, Serialize)]
pub struct TrendEntry {
    pub label: String, // "2026-03-15" or "2026-03"
    pub date: NaiveDate,
    pub session_count: usize,
    pub turn_count: usize,
    pub tokens: AggregatedTokens,
    pub cost: f64,
    pub models: HashMap<String, u64>,
    // 新增
    pub cost_by_category: CostByCategory,
}

// Keep DailyStats as alias for internal use
pub type DailyStats = TrendEntry;