defect_agent/session/history.rs
1//! Concrete implementation of [`History`] as [`VecHistory`]: `Vec<Message>` + token
2//! accounting.
3//!
4//! Pure storage, no compression — compression is orchestrated in the turn main loop
5//! (`session/turn/compact.rs`).
6//! History — design tradeoffs for conversation history representation.
7//!
8//! ## Token estimation
9//!
10//! No tokenizer dependency (aligned with opencode: trigger uses real usage, internal
11//! estimation uses
12//! character heuristics). Two segments are combined:
13//! - **Baseline**: the real input token count from the last LLM call
14//! (`record_input_tokens`),
15//! fed in by the turn main loop after each call. This is the most accurate segment.
16//! - **Delta**: messages `append`ed after the baseline are estimated as `chars/4` and
17//! accumulated — these
18//! have not yet been sent to the LLM, so no real token count is available.
19//!
20//! `replace` (write-back after compression) clears the baseline: the new list's token
21//! count must wait
22//! for the next real call report. When the baseline is missing (session just created, or
23//! just after
24//! `replace`), the entire snapshot falls back to character heuristics.
25
26use std::sync::Mutex;
27
28use crate::llm::{Message, MessageContent};
29use crate::session::History;
30
31/// Multimodal images are counted as a fixed token cost in character estimation, aligning
32/// with Claude Code microcompact's image counting (cannot estimate by characters, so a
33/// conservative constant is used).
34const IMAGE_TOKEN_ESTIMATE: usize = 2_000;
35
36/// Heuristic character-to-token ratio: `chars / 4` (aligned with codex / opencode).
37const CHARS_PER_TOKEN: usize = 4;
38
39/// A [`History`] implementation backed by `Vec<Message>` + `Mutex`, with token
40/// accounting.
41#[derive(Default)]
42pub struct VecHistory {
43 inner: Mutex<Inner>,
44}
45
46#[derive(Default)]
47struct Inner {
48 messages: Vec<Message>,
49 /// Real input tokens reported by the last LLM call. `None` means no real baseline
50 /// exists yet (freshly created or just replaced), so `token_estimate` falls back
51 /// entirely to character heuristics.
52 last_real_input: Option<u64>,
53 /// Accumulated character-heuristic token estimate for messages `append`ed after the
54 /// last real baseline.
55 est_since_baseline: u64,
56}
57
58impl VecHistory {
59 pub fn new() -> Self {
60 Self::default()
61 }
62
63 pub fn from_messages(messages: Vec<Message>) -> Self {
64 Self {
65 inner: Mutex::new(Inner {
66 messages,
67 last_real_input: None,
68 est_since_baseline: 0,
69 }),
70 }
71 }
72}
73
74impl History for VecHistory {
75 fn append(&self, msg: Message) {
76 let mut inner = self.inner.lock().expect("VecHistory mutex poisoned");
77 // When a baseline exists, the estimate for new messages is accumulated separately
78 // into the delta; when no baseline is set, no accumulation is needed (the entire
79 // `token_estimate` will be recomputed).
80 if inner.last_real_input.is_some() {
81 inner.est_since_baseline = inner
82 .est_since_baseline
83 .saturating_add(estimate_message_tokens(&msg));
84 }
85 inner.messages.push(msg);
86 }
87
88 fn snapshot(&self) -> Vec<Message> {
89 self.inner
90 .lock()
91 .expect("VecHistory mutex poisoned")
92 .messages
93 .clone()
94 }
95
96 fn replace(&self, messages: Vec<Message>) {
97 let mut inner = self.inner.lock().expect("VecHistory mutex poisoned");
98 inner.messages = messages;
99 // The true token count of the new list is unknown; it will be reported on the
100 // next LLM call.
101 inner.last_real_input = None;
102 inner.est_since_baseline = 0;
103 }
104
105 fn splice_prefix(&self, drop_count: usize, summary: Message) -> usize {
106 let mut inner = self.inner.lock().expect("VecHistory mutex poisoned");
107 // Invariant check: `drop_count` was computed from a snapshot at some earlier
108 // point; by the time it is applied, the list should only have grown (via append)
109 // or been replaced in place — it must not be **shorter**. If the current length
110 // is less than `drop_count`, it means a mid-list deletion happened in flight,
111 // violating the single-flight invariant (see `session.rs` docs). In debug builds
112 // this assertion catches the bug; in release builds the `clamp` below prevents a
113 // panic.
114 debug_assert!(
115 drop_count <= inner.messages.len(),
116 "splice_prefix invariant violated: drop_count={drop_count} > current len={}; \
117 history shrank mid-flight (concurrent mid-list deletion?)",
118 inner.messages.len()
119 );
120 // Clamp to current length — concurrent tail insertion only grows the list, so
121 // `drop_count` should never exceed it, but clamping is a cheap safety net (even
122 // if an old snapshot is longer than the current list under extreme races, this
123 // won't panic).
124 let drop_count = drop_count.min(inner.messages.len());
125 let tail = inner.messages.split_off(drop_count);
126 inner.messages = Vec::with_capacity(tail.len() + 1);
127 inner.messages.push(summary);
128 inner.messages.extend(tail);
129 // Same as `replace`: the true token count of the new prefix is unknown; it will
130 // be reported by the next LLM call.
131 inner.last_real_input = None;
132 inner.est_since_baseline = 0;
133 drop_count
134 }
135
136 fn len(&self) -> usize {
137 self.inner
138 .lock()
139 .expect("VecHistory mutex poisoned")
140 .messages
141 .len()
142 }
143
144 fn truncate(&self, len: usize) {
145 let mut inner = self.inner.lock().expect("VecHistory mutex poisoned");
146 if len >= inner.messages.len() {
147 return;
148 }
149 inner.messages.truncate(len);
150 // The dropped messages may have fed into `est_since_baseline`; the cheapest correct
151 // fix is to reset the baseline, same as `replace` — the next LLM call reports the
152 // true count.
153 inner.last_real_input = None;
154 inner.est_since_baseline = 0;
155 }
156
157 fn record_input_tokens(&self, tokens: u64) {
158 let mut inner = self.inner.lock().expect("VecHistory mutex poisoned");
159 inner.last_real_input = Some(tokens);
160 // Baseline reset — subsequent appends count their delta from zero.
161 inner.est_since_baseline = 0;
162 }
163
164 fn token_estimate(&self) -> Option<u64> {
165 let inner = self.inner.lock().expect("VecHistory mutex poisoned");
166 match inner.last_real_input {
167 // With a real baseline: baseline + character-heuristic increment for messages
168 // added after it.
169 Some(real) => Some(real.saturating_add(inner.est_since_baseline)),
170 // No baseline: fall back to character heuristics for the entire history.
171 // Returns `None` if history is empty.
172 None => {
173 if inner.messages.is_empty() {
174 return None;
175 }
176 Some(
177 inner
178 .messages
179 .iter()
180 .map(estimate_message_tokens)
181 .fold(0u64, u64::saturating_add),
182 )
183 }
184 }
185 }
186}
187
188/// Character-based heuristic token estimate for a single message (`chars/4`, images count
189/// as a constant).
190///
191/// `pub(crate)`: the compaction module (`session/turn/compact.rs`) reuses the same ruler
192/// when selecting retention boundaries, preventing drift between two estimation sites.
193pub(crate) fn estimate_message_tokens(msg: &Message) -> u64 {
194 let chars: usize = msg
195 .content
196 .iter()
197 .map(|c| match c {
198 MessageContent::Text { text } => text.len() / CHARS_PER_TOKEN,
199 MessageContent::Thinking { text, signature } => {
200 (text.len() + signature.as_ref().map_or(0, |s| s.len())) / CHARS_PER_TOKEN
201 }
202 MessageContent::ToolUse { name, args, .. } => {
203 (name.len() + args.to_string().len()) / CHARS_PER_TOKEN
204 }
205 MessageContent::ToolResult { output, .. } => {
206 tool_result_chars(output) / CHARS_PER_TOKEN
207 }
208 MessageContent::Image { .. } => IMAGE_TOKEN_ESTIMATE,
209 // The payload of a hosted activity is not persisted across processes, so it
210 // is ignored in the estimate.
211 MessageContent::ProviderActivity { .. } => 0,
212 })
213 .sum();
214 chars as u64
215}
216
217fn tool_result_chars(output: &crate::llm::ToolResultBody) -> usize {
218 use crate::llm::{ToolResultBody, ToolResultContent};
219 match output {
220 ToolResultBody::Text { text } => text.len(),
221 ToolResultBody::Json { value } => value.to_string().len(),
222 ToolResultBody::Content { blocks } => blocks
223 .iter()
224 .map(|b| match b {
225 ToolResultContent::Text { text } => text.len(),
226 ToolResultContent::Image { data, .. } => image_data_chars(data),
227 })
228 .sum(),
229 }
230}
231
232/// Approximate character count for an image block: base64 string length or URL length.
233/// Used for estimation and compression decisions; exact precision is not required.
234fn image_data_chars(data: &crate::llm::ImageData) -> usize {
235 match data {
236 crate::llm::ImageData::Base64 { encoded } => encoded.len(),
237 crate::llm::ImageData::Url { url } => url.len(),
238 }
239}
240
241#[cfg(test)]
242mod tests;