Skip to main content

codetether_agent/session/
event_token.rs

1//! Token-accounting payloads carried by [`SessionEvent`].
2//!
3//! These types back the [`SessionEvent::TokenUsage`] and
4//! [`SessionEvent::TokenEstimate`] variants. They are intentionally
5//! newtype-wrapped so their wire format is stable independently of the
6//! enum itself — the durable JSONL sink consumes them directly, and the
7//! TUI status line renders [`TokenEstimate`] against the current model
8//! budget on every request.
9//!
10//! [`SessionEvent`]: crate::session::SessionEvent
11//! [`SessionEvent::TokenUsage`]: crate::session::SessionEvent::TokenUsage
12//! [`SessionEvent::TokenEstimate`]: crate::session::SessionEvent::TokenEstimate
13
14use serde::{Deserialize, Serialize};
15
16/// Origin of a token accounting delta.
17///
18/// Distinguishes the root chat round-trip from tokens spent inside
19/// Recursive Language Model (RLM) sub-processing so cost dashboards can
20/// break down spend by source instead of aggregating everything under the
21/// chat model.
22///
23/// # Examples
24///
25/// ```rust
26/// use codetether_agent::session::TokenSource;
27///
28/// let src = TokenSource::RlmSubcall;
29/// assert_eq!(src.as_str(), "rlm_subcall");
30/// assert!(src.is_rlm());
31/// ```
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
33#[serde(rename_all = "snake_case")]
34pub enum TokenSource {
35    /// A primary chat completion for the user-facing session.
36    Root,
37    /// A single iteration of the RLM analysis loop (root RLM model).
38    RlmIteration,
39    /// A sub-LLM call dispatched from inside the RLM loop (subcall model).
40    RlmSubcall,
41    /// An LLM call issued from inside a tool implementation.
42    ToolEmbedded,
43}
44
45impl TokenSource {
46    /// Stable string identifier suitable for logs and JSONL records.
47    ///
48    /// # Examples
49    ///
50    /// ```rust
51    /// use codetether_agent::session::TokenSource;
52    ///
53    /// assert_eq!(TokenSource::Root.as_str(), "root");
54    /// assert_eq!(TokenSource::ToolEmbedded.as_str(), "tool_embedded");
55    /// ```
56    pub const fn as_str(self) -> &'static str {
57        match self {
58            Self::Root => "root",
59            Self::RlmIteration => "rlm_iteration",
60            Self::RlmSubcall => "rlm_subcall",
61            Self::ToolEmbedded => "tool_embedded",
62        }
63    }
64
65    /// Returns `true` when the source is any RLM-attributable variant.
66    ///
67    /// Useful for telemetry splits like "chat tokens vs RLM overhead".
68    ///
69    /// # Examples
70    ///
71    /// ```rust
72    /// use codetether_agent::session::TokenSource;
73    ///
74    /// assert!(TokenSource::RlmIteration.is_rlm());
75    /// assert!(TokenSource::RlmSubcall.is_rlm());
76    /// assert!(!TokenSource::Root.is_rlm());
77    /// ```
78    pub const fn is_rlm(self) -> bool {
79        matches!(self, Self::RlmIteration | Self::RlmSubcall)
80    }
81}
82
83/// A single token-consumption delta observed during session processing.
84///
85/// One `TokenDelta` is emitted per LLM round-trip (root or RLM), allowing
86/// downstream consumers to maintain per-model and per-source counters
87/// without re-reading the full session history.
88///
89/// # Examples
90///
91/// ```rust
92/// use codetether_agent::session::{TokenDelta, TokenSource};
93///
94/// let delta = TokenDelta {
95///     source: TokenSource::Root,
96///     model: "gpt-4o".to_string(),
97///     prompt_tokens: 1_200,
98///     completion_tokens: 340,
99///     duration_ms: 820,
100/// };
101/// assert_eq!(delta.total(), 1_540);
102/// ```
103#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct TokenDelta {
105    /// What kind of call produced this delta.
106    pub source: TokenSource,
107    /// Provider-qualified model identifier (e.g. `"anthropic/claude-opus-4-7"`).
108    pub model: String,
109    /// Prompt tokens consumed.
110    pub prompt_tokens: usize,
111    /// Completion tokens produced.
112    pub completion_tokens: usize,
113    /// Wall-clock round-trip in milliseconds.
114    pub duration_ms: u64,
115}
116
117impl TokenDelta {
118    /// Sum of prompt and completion tokens.
119    ///
120    /// # Examples
121    ///
122    /// ```rust
123    /// use codetether_agent::session::{TokenDelta, TokenSource};
124    ///
125    /// let d = TokenDelta {
126    ///     source: TokenSource::Root,
127    ///     model: "m".into(),
128    ///     prompt_tokens: 10,
129    ///     completion_tokens: 5,
130    ///     duration_ms: 0,
131    /// };
132    /// assert_eq!(d.total(), 15);
133    /// ```
134    pub fn total(&self) -> usize {
135        self.prompt_tokens.saturating_add(self.completion_tokens)
136    }
137}
138
139/// Pre-flight estimate of the next request's token footprint.
140///
141/// Emitted before the provider is contacted so the TUI can render
142/// budget-aware warnings ("73 % of 128k window") and the compaction
143/// pipeline can decide whether to run a pre-emptive summary pass.
144///
145/// The `budget` field is the usable window after the implementation has
146/// subtracted its completion reserve and protocol overhead — it is **not**
147/// the raw context window from the model card.
148///
149/// # Examples
150///
151/// ```rust
152/// use codetether_agent::session::TokenEstimate;
153///
154/// let est = TokenEstimate {
155///     model: "anthropic/claude-opus-4-7".into(),
156///     request_tokens: 94_000,
157///     budget: 128_000,
158/// };
159/// assert!(est.fraction() > 0.7);
160/// assert!(!est.is_over_budget());
161/// ```
162#[derive(Debug, Clone, Serialize, Deserialize)]
163pub struct TokenEstimate {
164    /// Provider-qualified model identifier the estimate targets.
165    pub model: String,
166    /// Estimated total request tokens (system + messages + tools).
167    pub request_tokens: usize,
168    /// Usable token budget for the request after reserves are subtracted.
169    pub budget: usize,
170}
171
172impl TokenEstimate {
173    /// Fraction of the budget the request is projected to consume.
174    ///
175    /// Returns `0.0` when `budget == 0` rather than panicking, so callers
176    /// may feed the value directly into UI colour ramps.
177    ///
178    /// # Examples
179    ///
180    /// ```rust
181    /// use codetether_agent::session::TokenEstimate;
182    ///
183    /// let e = TokenEstimate { model: "m".into(), request_tokens: 50, budget: 100 };
184    /// assert!((e.fraction() - 0.5).abs() < 1e-9);
185    ///
186    /// let z = TokenEstimate { model: "m".into(), request_tokens: 10, budget: 0 };
187    /// assert_eq!(z.fraction(), 0.0);
188    /// ```
189    pub fn fraction(&self) -> f64 {
190        if self.budget == 0 {
191            0.0
192        } else {
193            self.request_tokens as f64 / self.budget as f64
194        }
195    }
196
197    /// Returns `true` when the estimate strictly exceeds the usable budget.
198    ///
199    /// # Examples
200    ///
201    /// ```rust
202    /// use codetether_agent::session::TokenEstimate;
203    ///
204    /// let e = TokenEstimate { model: "m".into(), request_tokens: 129_000, budget: 128_000 };
205    /// assert!(e.is_over_budget());
206    /// ```
207    pub fn is_over_budget(&self) -> bool {
208        self.request_tokens > self.budget
209    }
210}