codetether_agent/telemetry/tokens/counter/record_model.rs
1//! Per-model recording helpers for [`super::AtomicTokenCounter`].
2//!
3//! Split from `counter.rs` to keep both files under the 50-line limit.
4
5use super::AtomicTokenCounter;
6
7impl AtomicTokenCounter {
8 /// Record per-model usage without prompt-cache data.
9 pub fn record_model_usage(&self, model: &str, prompt: u64, completion: u64) {
10 self.record_model_usage_with_cache(model, prompt, completion, 0, 0);
11 }
12
13 /// Record a completion's usage including prompt-cache read/write tokens.
14 ///
15 /// `prompt` is the *non-cached* input tokens billed at full price.
16 /// `cache_read` is billed at 10% of input price; `cache_write` at 125%
17 /// on Anthropic / Bedrock. See [`crate::provider::pricing`].
18 ///
19 /// # Examples
20 ///
21 /// ```rust
22 /// use codetether_agent::telemetry::AtomicTokenCounter;
23 ///
24 /// let c = AtomicTokenCounter::new();
25 /// c.record_model_usage_with_cache("claude-sonnet-4", 500, 200, 1_000, 0);
26 /// assert_eq!(c.cache_usage_for("claude-sonnet-4"), (1_000, 0));
27 /// assert_eq!(c.last_prompt_tokens_for("claude-sonnet-4"), Some(1_500));
28 /// ```
29 pub fn record_model_usage_with_cache(
30 &self,
31 model: &str,
32 prompt: u64,
33 completion: u64,
34 cache_read: u64,
35 cache_write: u64,
36 ) {
37 tracing::debug!(
38 model,
39 prompt,
40 completion,
41 cache_read,
42 cache_write,
43 "Recording model usage"
44 );
45 self.record(prompt, completion);
46
47 if let Ok(mut usage) = self.model_usage.try_lock() {
48 let entry = usage.entry(model.to_string()).or_insert((0, 0));
49 entry.0 += prompt;
50 entry.1 += completion;
51 }
52 if let Ok(mut last) = self.model_last_prompt_tokens.try_lock() {
53 // `prompt` is the *current turn's* full context window
54 // (all prior messages re-sent to the provider), which is the
55 // signal the TUI wants for its context-% badge.
56 last.insert(model.to_string(), prompt + cache_read + cache_write);
57 }
58 if (cache_read > 0 || cache_write > 0)
59 && let Ok(mut cache) = self.model_cache_usage.try_lock()
60 {
61 let entry = cache.entry(model.to_string()).or_insert((0, 0));
62 entry.0 += cache_read;
63 entry.1 += cache_write;
64 }
65 }
66
67 /// Cumulative `(cache_read, cache_write)` token counts for a model.
68 /// Returns `(0, 0)` if the map is contended or the model is unknown.
69 pub fn cache_usage_for(&self, model: &str) -> (u64, u64) {
70 self.model_cache_usage
71 .try_lock()
72 .ok()
73 .and_then(|m| m.get(model).copied())
74 .unwrap_or((0, 0))
75 }
76
77 /// Most recent turn's full prompt token count (including cache) for a model.
78 ///
79 /// This reflects the *current* in-flight context size (what the next
80 /// request will send), not cumulative lifetime tokens. Returns `None`
81 /// if no completion has been recorded for the model yet.
82 pub fn last_prompt_tokens_for(&self, model: &str) -> Option<u64> {
83 self.model_last_prompt_tokens
84 .try_lock()
85 .ok()
86 .and_then(|m| m.get(model).copied())
87 }
88}