cc-token-usage 1.5.0

Analyze Claude Code session token usage, costs, and efficiency
Documentation
use chrono::{DateTime, Utc};
use serde::Deserialize;
use std::path::PathBuf;

// ─── JSONL Deserialization Layer ─────────────────────────────────────────────

/// Top-level tagged union for each line in the JSONL session file.
#[derive(Debug, Deserialize)]
#[serde(tag = "type")]
#[allow(clippy::large_enum_variant)]
pub enum JournalEntry {
    #[serde(rename = "user")]
    User(UserMessage),
    #[serde(rename = "assistant")]
    Assistant(AssistantMessage),
    #[serde(rename = "queue-operation")]
    QueueOperation(serde_json::Value),
    #[serde(rename = "progress")]
    Progress(serde_json::Value),
    #[serde(rename = "system")]
    System(serde_json::Value),
    #[serde(rename = "last-prompt")]
    LastPrompt(serde_json::Value),
    #[serde(rename = "file-history-snapshot")]
    FileHistorySnapshot(serde_json::Value),
    #[serde(other)]
    Unknown,
}

/// A user-authored message entry.
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct UserMessage {
    pub uuid: Option<String>,
    pub session_id: Option<String>,
    pub timestamp: Option<String>,
    pub cwd: Option<String>,
    pub version: Option<String>,
    pub git_branch: Option<String>,
    pub message: Option<serde_json::Value>,
    pub parent_uuid: Option<String>,
    pub is_sidechain: Option<bool>,
    pub user_type: Option<String>,
}

/// An assistant response entry.
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct AssistantMessage {
    pub uuid: Option<String>,
    pub session_id: Option<String>,
    pub timestamp: Option<String>,
    pub request_id: Option<String>,
    pub agent_id: Option<String>,
    pub message: Option<ApiMessage>,
    pub cwd: Option<String>,
    pub version: Option<String>,
    pub git_branch: Option<String>,
    pub parent_uuid: Option<String>,
    pub is_sidechain: Option<bool>,
    pub user_type: Option<String>,
}

/// The inner API message returned by Claude.
#[derive(Debug, Deserialize)]
pub struct ApiMessage {
    pub model: Option<String>,
    pub role: Option<String>,
    pub stop_reason: Option<String>,
    pub usage: Option<TokenUsage>,
    pub content: Option<Vec<ContentBlock>>,
}

/// Token usage statistics for a single API call.
#[derive(Debug, Clone, Default, Deserialize)]
pub struct TokenUsage {
    pub input_tokens: Option<u64>,
    pub output_tokens: Option<u64>,
    pub cache_creation_input_tokens: Option<u64>,
    pub cache_read_input_tokens: Option<u64>,
    pub cache_creation: Option<CacheCreationDetail>,
    pub server_tool_use: Option<ServerToolUse>,
    pub service_tier: Option<String>,
    pub speed: Option<String>,
}

/// Breakdown of cache creation tokens by TTL bucket.
#[derive(Debug, Clone, PartialEq, Deserialize)]
pub struct CacheCreationDetail {
    pub ephemeral_5m_input_tokens: Option<u64>,
    pub ephemeral_1h_input_tokens: Option<u64>,
}

/// Server-side tool usage counters.
#[derive(Debug, Clone, Deserialize)]
pub struct ServerToolUse {
    pub web_search_requests: Option<u64>,
    pub web_fetch_requests: Option<u64>,
}

/// A content block inside a message. Only `text` and `tool_use` are parsed;
/// everything else is captured as `Other`.
#[derive(Debug, Deserialize)]
#[serde(tag = "type")]
pub enum ContentBlock {
    #[serde(rename = "text")]
    Text {
        text: Option<String>,
    },
    #[serde(rename = "tool_use")]
    ToolUse {
        id: Option<String>,
        name: Option<String>,
        input: Option<serde_json::Value>,
    },
    #[serde(rename = "thinking")]
    Thinking {
        thinking: Option<String>,
        signature: Option<String>,
    },
    #[serde(rename = "tool_result")]
    ToolResult {
        tool_use_id: Option<String>,
        content: Option<serde_json::Value>,
        is_error: Option<bool>,
    },
    #[serde(other)]
    Other,
}

// ─── Validated Data Layer ────────────────────────────────────────────────────

/// A single validated assistant turn, ready for analysis.
#[derive(Debug)]
pub struct ValidatedTurn {
    pub uuid: String,
    pub request_id: Option<String>,
    pub timestamp: DateTime<Utc>,
    pub model: String,
    pub usage: TokenUsage,
    pub stop_reason: Option<String>,
    pub content_types: Vec<String>,
    pub is_agent: bool,
    pub agent_id: Option<String>,
    pub user_text: Option<String>,       // 对应的用户消息文本(截断)
    pub assistant_text: Option<String>,  // assistant 回复文本(截断)
    pub tool_names: Vec<String>,         // 使用的工具名列表
}

/// Metadata about a session JSONL file on disk.
#[derive(Debug)]
pub struct SessionFile {
    pub session_id: String,
    pub project: Option<String>,
    pub file_path: PathBuf,
    pub is_agent: bool,
    pub parent_session_id: Option<String>,
}

/// Aggregated data from a single session.
#[derive(Debug)]
pub struct SessionData {
    pub session_id: String,
    pub project: Option<String>,
    pub turns: Vec<ValidatedTurn>,
    pub agent_turns: Vec<ValidatedTurn>,
    pub first_timestamp: Option<DateTime<Utc>>,
    pub last_timestamp: Option<DateTime<Utc>>,
    pub version: Option<String>,
    pub quality: DataQuality,
}

impl SessionData {
    /// All API responses (main + agent), sorted by timestamp.
    pub fn all_responses(&self) -> Vec<&ValidatedTurn> {
        let mut all: Vec<&ValidatedTurn> = self.turns.iter()
            .chain(self.agent_turns.iter())
            .collect();
        all.sort_by_key(|r| r.timestamp);
        all
    }

    /// Total number of API responses (main + agent).
    pub fn total_turn_count(&self) -> usize {
        self.turns.len() + self.agent_turns.len()
    }

    /// Number of agent API responses.
    pub fn agent_turn_count(&self) -> usize {
        self.agent_turns.len()
    }
}

/// Quality metrics for a single session file.
#[derive(Debug, Default)]
pub struct DataQuality {
    pub total_lines: usize,
    pub valid_turns: usize,
    pub skipped_synthetic: usize,
    pub skipped_sidechain: usize,
    pub skipped_invalid: usize,
    pub skipped_parse_error: usize,
    pub duplicate_turns: usize,
}

/// Quality metrics aggregated across all session files.
#[derive(Debug, Default, Clone)]
pub struct GlobalDataQuality {
    pub total_session_files: usize,
    pub total_agent_files: usize,
    pub orphan_agents: usize,
    pub total_valid_turns: usize,
    pub total_skipped: usize,
    pub time_range: Option<(DateTime<Utc>, DateTime<Utc>)>,
}

// ─── Tests ───────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_parse_assistant_message() {
        let json = r#"{"parentUuid":"abc","isSidechain":false,"type":"assistant","uuid":"def","timestamp":"2026-03-16T13:51:35.912Z","message":{"model":"claude-opus-4-6","role":"assistant","stop_reason":"end_turn","usage":{"input_tokens":3,"cache_creation_input_tokens":1281,"cache_read_input_tokens":15204,"cache_creation":{"ephemeral_5m_input_tokens":1281,"ephemeral_1h_input_tokens":0},"output_tokens":108,"service_tier":"standard"},"content":[{"type":"text","text":"Hello"}]},"sessionId":"abc-123","version":"2.0.77","cwd":"/tmp","gitBranch":"main","userType":"external","requestId":"req_1"}"#;

        let entry: JournalEntry = serde_json::from_str(json).unwrap();

        match entry {
            JournalEntry::Assistant(msg) => {
                assert_eq!(msg.uuid.as_deref(), Some("def"));
                assert_eq!(msg.session_id.as_deref(), Some("abc-123"));
                assert_eq!(msg.request_id.as_deref(), Some("req_1"));
                assert_eq!(msg.parent_uuid.as_deref(), Some("abc"));
                assert_eq!(msg.is_sidechain, Some(false));

                let api = msg.message.unwrap();
                assert_eq!(api.model.as_deref(), Some("claude-opus-4-6"));
                assert_eq!(api.stop_reason.as_deref(), Some("end_turn"));

                let usage = api.usage.unwrap();
                assert_eq!(usage.input_tokens, Some(3));
                assert_eq!(usage.output_tokens, Some(108));
                assert_eq!(usage.cache_creation_input_tokens, Some(1281));
                assert_eq!(usage.cache_read_input_tokens, Some(15204));
                assert_eq!(usage.service_tier.as_deref(), Some("standard"));

                let cache = usage.cache_creation.unwrap();
                assert_eq!(cache.ephemeral_5m_input_tokens, Some(1281));
                assert_eq!(cache.ephemeral_1h_input_tokens, Some(0));

                let content = api.content.unwrap();
                assert_eq!(content.len(), 1);
                match &content[0] {
                    ContentBlock::Text { text } => {
                        assert_eq!(text.as_deref(), Some("Hello"));
                    }
                    _ => panic!("expected Text content block"),
                }
            }
            _ => panic!("expected Assistant variant"),
        }
    }

    #[test]
    fn test_parse_user_message() {
        let json = r#"{"parentUuid":null,"isSidechain":false,"type":"user","message":{"role":"user","content":[{"type":"text","text":"hello"}]},"uuid":"u1","timestamp":"2026-03-16T13:51:19.053Z","sessionId":"s1","version":"2.1.80","cwd":"/tmp","gitBranch":"main","userType":"external"}"#;

        let entry: JournalEntry = serde_json::from_str(json).unwrap();

        match entry {
            JournalEntry::User(msg) => {
                assert_eq!(msg.uuid.as_deref(), Some("u1"));
                assert_eq!(msg.session_id.as_deref(), Some("s1"));
                assert_eq!(msg.version.as_deref(), Some("2.1.80"));
                assert_eq!(msg.cwd.as_deref(), Some("/tmp"));
                assert_eq!(msg.git_branch.as_deref(), Some("main"));
                assert!(msg.parent_uuid.is_none());
            }
            _ => panic!("expected User variant"),
        }
    }

    #[test]
    fn test_parse_queue_operation() {
        let json = r#"{"type":"queue-operation","operation":"dequeue","timestamp":"2026-03-16T13:51:19.041Z","sessionId":"abc"}"#;

        let entry: JournalEntry = serde_json::from_str(json).unwrap();

        match entry {
            JournalEntry::QueueOperation(val) => {
                assert_eq!(val.get("operation").and_then(|v| v.as_str()), Some("dequeue"));
                assert_eq!(val.get("sessionId").and_then(|v| v.as_str()), Some("abc"));
            }
            _ => panic!("expected QueueOperation variant"),
        }
    }

    #[test]
    fn test_parse_progress_entry() {
        let json = r#"{"type":"progress","data":{"type":"hook_progress"},"uuid":"u1","timestamp":"2026-03-16T13:51:19.053Z","sessionId":"s1"}"#;
        let entry: JournalEntry = serde_json::from_str(json).unwrap();
        assert!(matches!(entry, JournalEntry::Progress(_)));
    }

    #[test]
    fn test_parse_system_entry() {
        let json = r#"{"type":"system","subtype":"turn_duration","durationMs":1234,"uuid":"u1","timestamp":"2026-03-16T13:51:19.053Z","sessionId":"s1"}"#;
        let entry: JournalEntry = serde_json::from_str(json).unwrap();
        assert!(matches!(entry, JournalEntry::System(_)));
    }

    #[test]
    fn test_parse_unknown_entry_type() {
        let json = r#"{"type":"some-future-type","data":"whatever","uuid":"u1","timestamp":"2026-03-16T13:51:19.053Z"}"#;
        let entry: JournalEntry = serde_json::from_str(json).unwrap();
        assert!(matches!(entry, JournalEntry::Unknown));
    }

    #[test]
    fn test_parse_thinking_content_block() {
        let json = r#"{"type":"assistant","uuid":"u1","timestamp":"2026-03-16T10:00:00Z","message":{"model":"claude-opus-4-6","role":"assistant","stop_reason":"end_turn","usage":{"input_tokens":3,"output_tokens":100,"cache_creation_input_tokens":500,"cache_read_input_tokens":10000},"content":[{"type":"thinking","thinking":"Let me analyze this...","signature":"abc123"},{"type":"text","text":"Here is my answer."}]},"sessionId":"s1","cwd":"/tmp","gitBranch":"","userType":"external","isSidechain":false,"parentUuid":null,"requestId":"r1"}"#;
        let entry: JournalEntry = serde_json::from_str(json).unwrap();
        match entry {
            JournalEntry::Assistant(msg) => {
                let content = msg.message.unwrap().content.unwrap();
                assert_eq!(content.len(), 2);
                assert!(matches!(&content[0], ContentBlock::Thinking { thinking: Some(t), .. } if t.contains("analyze")));
                assert!(matches!(&content[1], ContentBlock::Text { .. }));
            }
            _ => panic!("expected Assistant variant"),
        }
    }

    #[test]
    fn test_parse_synthetic_message() {
        let json = r#"{"type":"assistant","uuid":"x","timestamp":"2026-03-16T00:00:00Z","message":{"model":"<synthetic>","role":"assistant","stop_reason":"stop_sequence","usage":{"input_tokens":0,"output_tokens":0,"cache_creation_input_tokens":0,"cache_read_input_tokens":0},"content":[{"type":"text","text":"error"}]},"sessionId":"s1","cwd":"/tmp","gitBranch":"","userType":"external","isSidechain":false,"parentUuid":null}"#;

        let entry: JournalEntry = serde_json::from_str(json).unwrap();

        match entry {
            JournalEntry::Assistant(msg) => {
                let api = msg.message.unwrap();
                assert_eq!(api.model.as_deref(), Some("<synthetic>"));
                assert_eq!(api.stop_reason.as_deref(), Some("stop_sequence"));

                let usage = api.usage.unwrap();
                assert_eq!(usage.input_tokens, Some(0));
                assert_eq!(usage.output_tokens, Some(0));

                // synthetic messages typically lack cache_creation detail
                assert!(usage.cache_creation.is_none());
            }
            _ => panic!("expected Assistant variant"),
        }
    }
}