1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
use crate::repl::conversation::ConversationHistory;
use crate::session::DisplayMessage;
/// Manages the state of a single REPL session
#[derive(Clone)]
pub struct SessionState {
/// Unique identifier for this session
pub session_id: String,
/// Conversation history for API
pub conversation: ConversationHistory,
/// Display-friendly message history for UI
pub display_messages: Vec<DisplayMessage>,
/// Total input tokens consumed in this session.
/// Provider semantics differ:
///
/// - OpenAI Responses API: this is the **total** count, of which
/// `total_cache_read_tokens` is a subset.
/// - Anthropic Messages API: this is **uncached** new tokens only;
/// cache read/creation are tracked separately and disjoint.
///
/// `calculate_cost` normalizes this when computing the bill.
pub total_input_tokens: u32,
/// Total output tokens generated in this session
pub total_output_tokens: u32,
/// Tokens served from the provider prompt cache (charged at a
/// reduced rate). Both providers report this; semantics relative to
/// `total_input_tokens` differ as documented above.
pub total_cache_read_tokens: u32,
/// Tokens written to the Anthropic prompt cache (charged at a
/// premium). OpenAI does not surface a creation counter and leaves
/// this at 0.
pub total_cache_creation_tokens: u32,
/// Largest input-token count observed on any single API call this
/// session. Used to detect tiered-pricing cliffs (gpt-5.4/5.5
/// flip the entire session to premium rates once any prompt
/// crosses 272K input tokens). Compared against
/// `ModelInfo::premium_tier.input_threshold` in `calculate_cost`
/// so the displayed cost reflects what the provider actually
/// bills, not the standard-tier rate.
///
/// All five counters above are persisted through
/// [`SessionTokenCounters`](crate::session::SessionTokenCounters)
/// so a `--resume` keeps the cost summary accurate and the cliff
/// detector remembers whether the threshold had already been
/// crossed. Session files written before persistence was added
/// default every counter to 0 via `#[serde(default)]`.
pub peak_single_turn_input_tokens: u32,
}
impl SessionState {
pub fn new(session_id: String, conversation: ConversationHistory) -> Self {
Self {
session_id,
conversation,
display_messages: Vec::new(),
total_input_tokens: 0,
total_output_tokens: 0,
total_cache_read_tokens: 0,
total_cache_creation_tokens: 0,
peak_single_turn_input_tokens: 0,
}
}
pub fn clear(&mut self, new_session_id: String) {
self.session_id = new_session_id;
self.conversation.clear();
self.display_messages.clear();
self.total_input_tokens = 0;
self.total_output_tokens = 0;
self.total_cache_read_tokens = 0;
self.total_cache_creation_tokens = 0;
self.peak_single_turn_input_tokens = 0;
}
pub fn add_usage(&mut self, usage: &crate::api::Usage) {
self.total_input_tokens += usage.input_tokens;
self.total_output_tokens += usage.output_tokens;
self.total_cache_read_tokens += usage.cache_read_input_tokens.unwrap_or(0);
self.total_cache_creation_tokens += usage.cache_creation_input_tokens.unwrap_or(0);
// Per-call high-water mark on input tokens. For OpenAI, the
// figure already includes cached input (the provider's
// documented basis for the 272K premium cliff); for Anthropic,
// cache reads come on a separate counter, so this is uncached
// input only — neither model has a documented Anthropic cliff,
// so the asymmetry doesn't matter today.
if usage.input_tokens > self.peak_single_turn_input_tokens {
self.peak_single_turn_input_tokens = usage.input_tokens;
}
}
}