Skip to main content

bamboo_engine/
token_usage_log.rs

1//! Append-only, per-LLM-call token-usage log.
2//!
3//! Each agent LLM call appends one [`TokenUsageRecord`] (as a JSON line) to the
4//! session's `token-usage.jsonl`, written into the session's own directory next
5//! to `session.json`. Unlike `session.token_usage` — a single snapshot that is
6//! overwritten on every call — this log keeps the full per-round history so
7//! cache effectiveness and cost can be analyzed offline: confirming the 1-hour
8//! prompt-cache TTL keeps hitting across pauses, or measuring the cold round
9//! right after a context compression.
10//!
11//! The record bridges two sources: the prompt-side budget snapshot
12//! ([`TokenBudgetUsage`], already on the session) and the server-returned stream
13//! stats — notably `cache_creation_input_tokens`, which is NOT part of
14//! `TokenBudgetUsage` and would otherwise only exist in the logs.
15
16use bamboo_domain::TokenBudgetUsage;
17use serde::Serialize;
18
19/// One per-LLM-call usage record. Flattened for line-oriented analysis
20/// (jq / DuckDB / pandas over the JSONL).
21#[derive(Debug, Clone, Serialize)]
22pub struct TokenUsageRecord {
23    /// RFC3339 wall-clock timestamp captured when the call completed.
24    pub ts: String,
25    pub session_id: String,
26    pub model: String,
27    pub provider: String,
28    /// Conversation length (message count) at emit time — a monotonic-ish
29    /// ordinal to correlate records with conversation growth.
30    pub message_count: usize,
31
32    // --- server-returned usage (this call) ---
33    pub cache_creation_input_tokens: u64,
34    pub cache_read_input_tokens: u64,
35    /// Non-cached "fresh" input tokens (server-reported), disjoint from the two
36    /// cache counts. The precise prompt size is
37    /// `input_tokens + cache_read + cache_creation`, and the exact cache-hit
38    /// ratio is `cache_read / that_sum`.
39    pub input_tokens: u64,
40    pub output_tokens: u64,
41    pub thinking_tokens: u64,
42
43    // --- prompt-side budget snapshot (this call) ---
44    pub system_tokens: u32,
45    pub summary_tokens: u32,
46    pub window_tokens: u32,
47    pub total_tokens: u32,
48    pub max_context_tokens: u32,
49    pub budget_limit: u32,
50    pub prompt_cached_tool_outputs: usize,
51    pub prompt_cached_tool_tokens_saved: u32,
52    pub truncation_occurred: bool,
53    pub segments_removed: usize,
54}
55
56impl TokenUsageRecord {
57    /// Build a record from the prompt-side budget snapshot (`usage`) and the
58    /// server-side stream stats. The cache-creation count lives only on the
59    /// stream output — it is not part of [`TokenBudgetUsage`] — so it is passed
60    /// in explicitly.
61    #[allow(clippy::too_many_arguments)]
62    pub fn new(
63        ts: String,
64        session_id: &str,
65        model: &str,
66        provider: &str,
67        message_count: usize,
68        usage: Option<&TokenBudgetUsage>,
69        cache_creation_input_tokens: u64,
70        cache_read_input_tokens: u64,
71        input_tokens: u64,
72        output_tokens: u64,
73        thinking_tokens: u64,
74    ) -> Self {
75        Self {
76            ts,
77            session_id: session_id.to_string(),
78            model: model.to_string(),
79            provider: provider.to_string(),
80            message_count,
81            cache_creation_input_tokens,
82            cache_read_input_tokens,
83            input_tokens,
84            output_tokens,
85            thinking_tokens,
86            system_tokens: usage.map(|u| u.system_tokens).unwrap_or(0),
87            summary_tokens: usage.map(|u| u.summary_tokens).unwrap_or(0),
88            window_tokens: usage.map(|u| u.window_tokens).unwrap_or(0),
89            total_tokens: usage.map(|u| u.total_tokens).unwrap_or(0),
90            max_context_tokens: usage.map(|u| u.max_context_tokens).unwrap_or(0),
91            budget_limit: usage.map(|u| u.budget_limit).unwrap_or(0),
92            prompt_cached_tool_outputs: usage.map(|u| u.prompt_cached_tool_outputs).unwrap_or(0),
93            prompt_cached_tool_tokens_saved: usage
94                .map(|u| u.prompt_cached_tool_tokens_saved)
95                .unwrap_or(0),
96            truncation_occurred: usage.map(|u| u.truncation_occurred).unwrap_or(false),
97            segments_removed: usage.map(|u| u.segments_removed).unwrap_or(0),
98        }
99    }
100
101    /// Serialize to a single-line JSON string (no trailing newline; the storage
102    /// layer frames the line).
103    pub fn to_json_line(&self) -> Result<String, serde_json::Error> {
104        serde_json::to_string(self)
105    }
106}
107
108#[cfg(test)]
109mod tests {
110    use super::*;
111
112    #[test]
113    fn record_serializes_to_single_json_line_with_cache_creation() {
114        let usage = TokenBudgetUsage {
115            system_tokens: 5000,
116            summary_tokens: 2000,
117            window_tokens: 3000,
118            total_tokens: 10000,
119            max_context_tokens: 200_000,
120            budget_limit: 180_000,
121            truncation_occurred: false,
122            segments_removed: 0,
123            prompt_cached_tool_outputs: 1,
124            prompt_cached_tool_tokens_saved: 42,
125            thinking_tokens: 7,
126            cache_read_input_tokens: 12_000,
127        };
128        let record = TokenUsageRecord::new(
129            "2026-06-15T00:00:00Z".to_string(),
130            "sess-1",
131            "claude-opus-4-8",
132            "anthropic",
133            24,
134            Some(&usage),
135            1500, // cache_creation — only present on the stream output
136            12_000,
137            800, // input_tokens (fresh, non-cached)
138            300,
139            7,
140        );
141        let line = record.to_json_line().expect("serializes");
142        assert!(!line.contains('\n'), "must be a single line");
143        assert!(line.contains("\"cache_creation_input_tokens\":1500"));
144        assert!(line.contains("\"cache_read_input_tokens\":12000"));
145        assert!(line.contains("\"input_tokens\":800"));
146        assert!(line.contains("\"session_id\":\"sess-1\""));
147    }
148}