Skip to main content

defect_agent/session/
history.rs

1//! Concrete implementation of [`History`] as [`VecHistory`]: `Vec<Message>` + token
2//! accounting.
3//!
4//! Pure storage, no compression — compression is orchestrated in the turn main loop
5//! (`session/turn/compact.rs`).
6//! History — design tradeoffs for conversation history representation.
7//!
8//! ## Token estimation
9//!
10//! No tokenizer dependency (aligned with opencode: trigger uses real usage, internal
11//! estimation uses
12//! character heuristics). Two segments are combined:
13//! - **Baseline**: the real input token count from the last LLM call
14//!   (`record_input_tokens`),
15//!   fed in by the turn main loop after each call. This is the most accurate segment.
16//! - **Delta**: messages `append`ed after the baseline are estimated as `chars/4` and
17//!   accumulated — these
18//!   have not yet been sent to the LLM, so no real token count is available.
19//!
20//! `replace` (write-back after compression) clears the baseline: the new list's token
21//! count must wait
22//! for the next real call report. When the baseline is missing (session just created, or
23//! just after
24//! `replace`), the entire snapshot falls back to character heuristics.
25
26use std::sync::Mutex;
27
28use crate::llm::{Message, MessageContent};
29use crate::session::History;
30
31/// Multimodal images are counted as a fixed token cost in character estimation, aligning
32/// with Claude Code microcompact's image counting (cannot estimate by characters, so a
33/// conservative constant is used).
34const IMAGE_TOKEN_ESTIMATE: usize = 2_000;
35
36/// Heuristic character-to-token ratio: `chars / 4` (aligned with codex / opencode).
37const CHARS_PER_TOKEN: usize = 4;
38
39/// A [`History`] implementation backed by `Vec<Message>` + `Mutex`, with token
40/// accounting.
41#[derive(Default)]
42pub struct VecHistory {
43    inner: Mutex<Inner>,
44}
45
46#[derive(Default)]
47struct Inner {
48    messages: Vec<Message>,
49    /// Real input tokens reported by the last LLM call. `None` means no real baseline
50    /// exists yet (freshly created or just replaced), so `token_estimate` falls back
51    /// entirely to character heuristics.
52    last_real_input: Option<u64>,
53    /// Accumulated character-heuristic token estimate for messages `append`ed after the
54    /// last real baseline.
55    est_since_baseline: u64,
56}
57
58impl VecHistory {
59    pub fn new() -> Self {
60        Self::default()
61    }
62
63    pub fn from_messages(messages: Vec<Message>) -> Self {
64        Self {
65            inner: Mutex::new(Inner {
66                messages,
67                last_real_input: None,
68                est_since_baseline: 0,
69            }),
70        }
71    }
72}
73
74impl History for VecHistory {
75    fn append(&self, msg: Message) {
76        let mut inner = self.inner.lock().expect("VecHistory mutex poisoned");
77        // When a baseline exists, the estimate for new messages is accumulated separately
78        // into the delta; when no baseline is set, no accumulation is needed (the entire
79        // `token_estimate` will be recomputed).
80        if inner.last_real_input.is_some() {
81            inner.est_since_baseline = inner
82                .est_since_baseline
83                .saturating_add(estimate_message_tokens(&msg));
84        }
85        inner.messages.push(msg);
86    }
87
88    fn snapshot(&self) -> Vec<Message> {
89        self.inner
90            .lock()
91            .expect("VecHistory mutex poisoned")
92            .messages
93            .clone()
94    }
95
96    fn replace(&self, messages: Vec<Message>) {
97        let mut inner = self.inner.lock().expect("VecHistory mutex poisoned");
98        inner.messages = messages;
99        // The true token count of the new list is unknown; it will be reported on the
100        // next LLM call.
101        inner.last_real_input = None;
102        inner.est_since_baseline = 0;
103    }
104
105    fn splice_prefix(&self, drop_count: usize, summary: Message) -> usize {
106        let mut inner = self.inner.lock().expect("VecHistory mutex poisoned");
107        // Invariant check: `drop_count` was computed from a snapshot at some earlier
108        // point; by the time it is applied, the list should only have grown (via append)
109        // or been replaced in place — it must not be **shorter**. If the current length
110        // is less than `drop_count`, it means a mid-list deletion happened in flight,
111        // violating the single-flight invariant (see `session.rs` docs). In debug builds
112        // this assertion catches the bug; in release builds the `clamp` below prevents a
113        // panic.
114        debug_assert!(
115            drop_count <= inner.messages.len(),
116            "splice_prefix invariant violated: drop_count={drop_count} > current len={}; \
117             history shrank mid-flight (concurrent mid-list deletion?)",
118            inner.messages.len()
119        );
120        // Clamp to current length — concurrent tail insertion only grows the list, so
121        // `drop_count` should never exceed it, but clamping is a cheap safety net (even
122        // if an old snapshot is longer than the current list under extreme races, this
123        // won't panic).
124        let drop_count = drop_count.min(inner.messages.len());
125        let tail = inner.messages.split_off(drop_count);
126        inner.messages = Vec::with_capacity(tail.len() + 1);
127        inner.messages.push(summary);
128        inner.messages.extend(tail);
129        // Same as `replace`: the true token count of the new prefix is unknown; it will
130        // be reported by the next LLM call.
131        inner.last_real_input = None;
132        inner.est_since_baseline = 0;
133        drop_count
134    }
135
136    fn len(&self) -> usize {
137        self.inner
138            .lock()
139            .expect("VecHistory mutex poisoned")
140            .messages
141            .len()
142    }
143
144    fn truncate(&self, len: usize) {
145        let mut inner = self.inner.lock().expect("VecHistory mutex poisoned");
146        if len >= inner.messages.len() {
147            return;
148        }
149        inner.messages.truncate(len);
150        // The dropped messages may have fed into `est_since_baseline`; the cheapest correct
151        // fix is to reset the baseline, same as `replace` — the next LLM call reports the
152        // true count.
153        inner.last_real_input = None;
154        inner.est_since_baseline = 0;
155    }
156
157    fn record_input_tokens(&self, tokens: u64) {
158        let mut inner = self.inner.lock().expect("VecHistory mutex poisoned");
159        inner.last_real_input = Some(tokens);
160        // Baseline reset — subsequent appends count their delta from zero.
161        inner.est_since_baseline = 0;
162    }
163
164    fn token_estimate(&self) -> Option<u64> {
165        let inner = self.inner.lock().expect("VecHistory mutex poisoned");
166        match inner.last_real_input {
167            // With a real baseline: baseline + character-heuristic increment for messages
168            // added after it.
169            Some(real) => Some(real.saturating_add(inner.est_since_baseline)),
170            // No baseline: fall back to character heuristics for the entire history.
171            // Returns `None` if history is empty.
172            None => {
173                if inner.messages.is_empty() {
174                    return None;
175                }
176                Some(
177                    inner
178                        .messages
179                        .iter()
180                        .map(estimate_message_tokens)
181                        .fold(0u64, u64::saturating_add),
182                )
183            }
184        }
185    }
186}
187
188/// Character-based heuristic token estimate for a single message (`chars/4`, images count
189/// as a constant).
190///
191/// `pub(crate)`: the compaction module (`session/turn/compact.rs`) reuses the same ruler
192/// when selecting retention boundaries, preventing drift between two estimation sites.
193pub(crate) fn estimate_message_tokens(msg: &Message) -> u64 {
194    let chars: usize = msg
195        .content
196        .iter()
197        .map(|c| match c {
198            MessageContent::Text { text } => text.len() / CHARS_PER_TOKEN,
199            MessageContent::Thinking { text, signature } => {
200                (text.len() + signature.as_ref().map_or(0, |s| s.len())) / CHARS_PER_TOKEN
201            }
202            MessageContent::ToolUse { name, args, .. } => {
203                (name.len() + args.to_string().len()) / CHARS_PER_TOKEN
204            }
205            MessageContent::ToolResult { output, .. } => {
206                tool_result_chars(output) / CHARS_PER_TOKEN
207            }
208            MessageContent::Image { .. } => IMAGE_TOKEN_ESTIMATE,
209            // The payload of a hosted activity is not persisted across processes, so it
210            // is ignored in the estimate.
211            MessageContent::ProviderActivity { .. } => 0,
212        })
213        .sum();
214    chars as u64
215}
216
217fn tool_result_chars(output: &crate::llm::ToolResultBody) -> usize {
218    use crate::llm::{ToolResultBody, ToolResultContent};
219    match output {
220        ToolResultBody::Text { text } => text.len(),
221        ToolResultBody::Json { value } => value.to_string().len(),
222        ToolResultBody::Content { blocks } => blocks
223            .iter()
224            .map(|b| match b {
225                ToolResultContent::Text { text } => text.len(),
226                ToolResultContent::Image { data, .. } => image_data_chars(data),
227            })
228            .sum(),
229    }
230}
231
232/// Approximate character count for an image block: base64 string length or URL length.
233/// Used for estimation and compression decisions; exact precision is not required.
234fn image_data_chars(data: &crate::llm::ImageData) -> usize {
235    match data {
236        crate::llm::ImageData::Base64 { encoded } => encoded.len(),
237        crate::llm::ImageData::Url { url } => url.len(),
238    }
239}
240
241#[cfg(test)]
242mod tests;