codetether_agent/session/event_token.rs
1//! Token-accounting payloads carried by [`SessionEvent`].
2//!
3//! These types back the [`SessionEvent::TokenUsage`] and
4//! [`SessionEvent::TokenEstimate`] variants. They are intentionally
5//! newtype-wrapped so their wire format is stable independently of the
6//! enum itself — the durable JSONL sink consumes them directly, and the
7//! TUI status line renders [`TokenEstimate`] against the current model
8//! budget on every request.
9//!
10//! [`SessionEvent`]: crate::session::SessionEvent
11//! [`SessionEvent::TokenUsage`]: crate::session::SessionEvent::TokenUsage
12//! [`SessionEvent::TokenEstimate`]: crate::session::SessionEvent::TokenEstimate
13
14use serde::{Deserialize, Serialize};
15
16/// Origin of a token accounting delta.
17///
18/// Distinguishes the root chat round-trip from tokens spent inside
19/// Recursive Language Model (RLM) sub-processing so cost dashboards can
20/// break down spend by source instead of aggregating everything under the
21/// chat model.
22///
23/// # Examples
24///
25/// ```rust
26/// use codetether_agent::session::TokenSource;
27///
28/// let src = TokenSource::RlmSubcall;
29/// assert_eq!(src.as_str(), "rlm_subcall");
30/// assert!(src.is_rlm());
31/// ```
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
33#[serde(rename_all = "snake_case")]
34pub enum TokenSource {
35 /// A primary chat completion for the user-facing session.
36 Root,
37 /// A single iteration of the RLM analysis loop (root RLM model).
38 RlmIteration,
39 /// A sub-LLM call dispatched from inside the RLM loop (subcall model).
40 RlmSubcall,
41 /// An LLM call issued from inside a tool implementation.
42 ToolEmbedded,
43}
44
45impl TokenSource {
46 /// Stable string identifier suitable for logs and JSONL records.
47 ///
48 /// # Examples
49 ///
50 /// ```rust
51 /// use codetether_agent::session::TokenSource;
52 ///
53 /// assert_eq!(TokenSource::Root.as_str(), "root");
54 /// assert_eq!(TokenSource::ToolEmbedded.as_str(), "tool_embedded");
55 /// ```
56 pub const fn as_str(self) -> &'static str {
57 match self {
58 Self::Root => "root",
59 Self::RlmIteration => "rlm_iteration",
60 Self::RlmSubcall => "rlm_subcall",
61 Self::ToolEmbedded => "tool_embedded",
62 }
63 }
64
65 /// Returns `true` when the source is any RLM-attributable variant.
66 ///
67 /// Useful for telemetry splits like "chat tokens vs RLM overhead".
68 ///
69 /// # Examples
70 ///
71 /// ```rust
72 /// use codetether_agent::session::TokenSource;
73 ///
74 /// assert!(TokenSource::RlmIteration.is_rlm());
75 /// assert!(TokenSource::RlmSubcall.is_rlm());
76 /// assert!(!TokenSource::Root.is_rlm());
77 /// ```
78 pub const fn is_rlm(self) -> bool {
79 matches!(self, Self::RlmIteration | Self::RlmSubcall)
80 }
81}
82
83/// A single token-consumption delta observed during session processing.
84///
85/// One `TokenDelta` is emitted per LLM round-trip (root or RLM), allowing
86/// downstream consumers to maintain per-model and per-source counters
87/// without re-reading the full session history.
88///
89/// # Examples
90///
91/// ```rust
92/// use codetether_agent::session::{TokenDelta, TokenSource};
93///
94/// let delta = TokenDelta {
95/// source: TokenSource::Root,
96/// model: "gpt-4o".to_string(),
97/// prompt_tokens: 1_200,
98/// completion_tokens: 340,
99/// duration_ms: 820,
100/// };
101/// assert_eq!(delta.total(), 1_540);
102/// ```
103#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct TokenDelta {
105 /// What kind of call produced this delta.
106 pub source: TokenSource,
107 /// Provider-qualified model identifier (e.g. `"anthropic/claude-opus-4-7"`).
108 pub model: String,
109 /// Prompt tokens consumed.
110 pub prompt_tokens: usize,
111 /// Completion tokens produced.
112 pub completion_tokens: usize,
113 /// Wall-clock round-trip in milliseconds.
114 pub duration_ms: u64,
115}
116
117impl TokenDelta {
118 /// Sum of prompt and completion tokens.
119 ///
120 /// # Examples
121 ///
122 /// ```rust
123 /// use codetether_agent::session::{TokenDelta, TokenSource};
124 ///
125 /// let d = TokenDelta {
126 /// source: TokenSource::Root,
127 /// model: "m".into(),
128 /// prompt_tokens: 10,
129 /// completion_tokens: 5,
130 /// duration_ms: 0,
131 /// };
132 /// assert_eq!(d.total(), 15);
133 /// ```
134 pub fn total(&self) -> usize {
135 self.prompt_tokens.saturating_add(self.completion_tokens)
136 }
137}
138
139/// Pre-flight estimate of the next request's token footprint.
140///
141/// Emitted before the provider is contacted so the TUI can render
142/// budget-aware warnings ("73 % of 128k window") and the compaction
143/// pipeline can decide whether to run a pre-emptive summary pass.
144///
145/// The `budget` field is the usable window after the implementation has
146/// subtracted its completion reserve and protocol overhead — it is **not**
147/// the raw context window from the model card.
148///
149/// # Examples
150///
151/// ```rust
152/// use codetether_agent::session::TokenEstimate;
153///
154/// let est = TokenEstimate {
155/// model: "anthropic/claude-opus-4-7".into(),
156/// request_tokens: 94_000,
157/// budget: 128_000,
158/// };
159/// assert!(est.fraction() > 0.7);
160/// assert!(!est.is_over_budget());
161/// ```
162#[derive(Debug, Clone, Serialize, Deserialize)]
163pub struct TokenEstimate {
164 /// Provider-qualified model identifier the estimate targets.
165 pub model: String,
166 /// Estimated total request tokens (system + messages + tools).
167 pub request_tokens: usize,
168 /// Usable token budget for the request after reserves are subtracted.
169 pub budget: usize,
170}
171
172impl TokenEstimate {
173 /// Fraction of the budget the request is projected to consume.
174 ///
175 /// Returns `0.0` when `budget == 0` rather than panicking, so callers
176 /// may feed the value directly into UI colour ramps.
177 ///
178 /// # Examples
179 ///
180 /// ```rust
181 /// use codetether_agent::session::TokenEstimate;
182 ///
183 /// let e = TokenEstimate { model: "m".into(), request_tokens: 50, budget: 100 };
184 /// assert!((e.fraction() - 0.5).abs() < 1e-9);
185 ///
186 /// let z = TokenEstimate { model: "m".into(), request_tokens: 10, budget: 0 };
187 /// assert_eq!(z.fraction(), 0.0);
188 /// ```
189 pub fn fraction(&self) -> f64 {
190 if self.budget == 0 {
191 0.0
192 } else {
193 self.request_tokens as f64 / self.budget as f64
194 }
195 }
196
197 /// Returns `true` when the estimate strictly exceeds the usable budget.
198 ///
199 /// # Examples
200 ///
201 /// ```rust
202 /// use codetether_agent::session::TokenEstimate;
203 ///
204 /// let e = TokenEstimate { model: "m".into(), request_tokens: 129_000, budget: 128_000 };
205 /// assert!(e.is_over_budget());
206 /// ```
207 pub fn is_over_budget(&self) -> bool {
208 self.request_tokens > self.budget
209 }
210}