Skip to main content

codetether_agent/telemetry/tokens/counter/
record_model.rs

1//! Per-model recording helpers for [`super::AtomicTokenCounter`].
2//!
3//! Split from `counter.rs` to keep both files under the 50-line limit.
4
5use super::AtomicTokenCounter;
6
7impl AtomicTokenCounter {
8    /// Record per-model usage without prompt-cache data.
9    pub fn record_model_usage(&self, model: &str, prompt: u64, completion: u64) {
10        self.record_model_usage_with_cache(model, prompt, completion, 0, 0);
11    }
12
13    /// Record a completion's usage including prompt-cache read/write tokens.
14    ///
15    /// `prompt` is the *non-cached* input tokens billed at full price.
16    /// `cache_read` is billed at 10% of input price; `cache_write` at 125%
17    /// on Anthropic / Bedrock. See [`crate::provider::pricing`].
18    ///
19    /// # Examples
20    ///
21    /// ```rust
22    /// use codetether_agent::telemetry::AtomicTokenCounter;
23    ///
24    /// let c = AtomicTokenCounter::new();
25    /// c.record_model_usage_with_cache("claude-sonnet-4", 500, 200, 1_000, 0);
26    /// assert_eq!(c.cache_usage_for("claude-sonnet-4"), (1_000, 0));
27    /// assert_eq!(c.last_prompt_tokens_for("claude-sonnet-4"), Some(1_500));
28    /// ```
29    pub fn record_model_usage_with_cache(
30        &self,
31        model: &str,
32        prompt: u64,
33        completion: u64,
34        cache_read: u64,
35        cache_write: u64,
36    ) {
37        tracing::debug!(
38            model,
39            prompt,
40            completion,
41            cache_read,
42            cache_write,
43            "Recording model usage"
44        );
45        self.record(prompt, completion);
46
47        if let Ok(mut usage) = self.model_usage.try_lock() {
48            let entry = usage.entry(model.to_string()).or_insert((0, 0));
49            entry.0 += prompt;
50            entry.1 += completion;
51        }
52        if let Ok(mut last) = self.model_last_prompt_tokens.try_lock() {
53            // `prompt` is the *current turn's* full context window
54            // (all prior messages re-sent to the provider), which is the
55            // signal the TUI wants for its context-% badge.
56            last.insert(model.to_string(), prompt + cache_read + cache_write);
57        }
58        if (cache_read > 0 || cache_write > 0)
59            && let Ok(mut cache) = self.model_cache_usage.try_lock()
60        {
61            let entry = cache.entry(model.to_string()).or_insert((0, 0));
62            entry.0 += cache_read;
63            entry.1 += cache_write;
64        }
65    }
66
67    /// Cumulative `(cache_read, cache_write)` token counts for a model.
68    /// Returns `(0, 0)` if the map is contended or the model is unknown.
69    pub fn cache_usage_for(&self, model: &str) -> (u64, u64) {
70        self.model_cache_usage
71            .try_lock()
72            .ok()
73            .and_then(|m| m.get(model).copied())
74            .unwrap_or((0, 0))
75    }
76
77    /// Most recent turn's full prompt token count (including cache) for a model.
78    ///
79    /// This reflects the *current* in-flight context size (what the next
80    /// request will send), not cumulative lifetime tokens. Returns `None`
81    /// if no completion has been recorded for the model yet.
82    pub fn last_prompt_tokens_for(&self, model: &str) -> Option<u64> {
83        self.model_last_prompt_tokens
84            .try_lock()
85            .ok()
86            .and_then(|m| m.get(model).copied())
87    }
88}