Skip to main content

defect_agent/session/
history.rs

1//! Concrete implementation of [`History`] as [`VecHistory`]: `Vec<Message>` + token
2//! accounting.
3//!
4//! Pure storage, no compression — compression is orchestrated in the turn main loop
5//! (`session/turn/compact.rs`).
6//! History — design tradeoffs for conversation history representation.
7//!
8//! ## Token estimation
9//!
10//! No tokenizer dependency (aligned with opencode: trigger uses real usage, internal
11//! estimation uses
12//! character heuristics). Two segments are combined:
13//! - **Baseline**: the real input token count from the last LLM call
14//!   (`record_input_tokens`),
15//!   fed in by the turn main loop after each call. This is the most accurate segment.
16//! - **Delta**: messages `append`ed after the baseline are estimated as `chars/4` and
17//!   accumulated — these
18//!   have not yet been sent to the LLM, so no real token count is available.
19//!
20//! `replace` (write-back after compression) clears the baseline: the new list's token
21//! count must wait
22//! for the next real call report. When the baseline is missing (session just created, or
23//! just after
24//! `replace`), the entire snapshot falls back to character heuristics.
25
26use std::sync::Mutex;
27
28use crate::llm::{Message, MessageContent};
29use crate::session::History;
30
31/// Multimodal images are counted as a fixed token cost in character estimation, aligning
32/// with Claude Code microcompact's image counting (cannot estimate by characters, so a
33/// conservative constant is used).
34const IMAGE_TOKEN_ESTIMATE: usize = 2_000;
35
36/// Heuristic character-to-token ratio: `chars / 4` (aligned with codex / opencode).
37const CHARS_PER_TOKEN: usize = 4;
38
39/// A [`History`] implementation backed by `Vec<Message>` + `Mutex`, with token
40/// accounting.
41#[derive(Default)]
42pub struct VecHistory {
43    inner: Mutex<Inner>,
44}
45
46#[derive(Default)]
47struct Inner {
48    messages: Vec<Message>,
49    /// Real input tokens reported by the last LLM call. `None` means no real baseline
50    /// exists yet (freshly created or just replaced), so `token_estimate` falls back
51    /// entirely to character heuristics.
52    last_real_input: Option<u64>,
53    /// Accumulated character-heuristic token estimate for messages `append`ed after the
54    /// last real baseline.
55    est_since_baseline: u64,
56}
57
58impl VecHistory {
59    pub fn new() -> Self {
60        Self::default()
61    }
62
63    pub fn from_messages(messages: Vec<Message>) -> Self {
64        Self {
65            inner: Mutex::new(Inner {
66                messages,
67                last_real_input: None,
68                est_since_baseline: 0,
69            }),
70        }
71    }
72}
73
74impl History for VecHistory {
75    fn append(&self, msg: Message) {
76        let mut inner = self.inner.lock().expect("VecHistory mutex poisoned");
77        // When a baseline exists, the estimate for new messages is accumulated separately
78        // into the delta; when no baseline is set, no accumulation is needed (the entire
79        // `token_estimate` will be recomputed).
80        if inner.last_real_input.is_some() {
81            inner.est_since_baseline = inner
82                .est_since_baseline
83                .saturating_add(estimate_message_tokens(&msg));
84        }
85        inner.messages.push(msg);
86    }
87
88    fn snapshot(&self) -> Vec<Message> {
89        self.inner
90            .lock()
91            .expect("VecHistory mutex poisoned")
92            .messages
93            .clone()
94    }
95
96    fn replace(&self, messages: Vec<Message>) {
97        let mut inner = self.inner.lock().expect("VecHistory mutex poisoned");
98        inner.messages = messages;
99        // The true token count of the new list is unknown; it will be reported on the
100        // next LLM call.
101        inner.last_real_input = None;
102        inner.est_since_baseline = 0;
103    }
104
105    fn splice_prefix(&self, drop_count: usize, summary: Message) -> usize {
106        let mut inner = self.inner.lock().expect("VecHistory mutex poisoned");
107        // Invariant check: `drop_count` was computed from a snapshot at some earlier
108        // point; by the time it is applied, the list should only have grown (via append)
109        // or been replaced in place — it must not be **shorter**. If the current length
110        // is less than `drop_count`, it means a mid-list deletion happened in flight,
111        // violating the single-flight invariant (see `session.rs` docs). In debug builds
112        // this assertion catches the bug; in release builds the `clamp` below prevents a
113        // panic.
114        debug_assert!(
115            drop_count <= inner.messages.len(),
116            "splice_prefix invariant violated: drop_count={drop_count} > current len={}; \
117             history shrank mid-flight (concurrent mid-list deletion?)",
118            inner.messages.len()
119        );
120        // Clamp to current length — concurrent tail insertion only grows the list, so
121        // `drop_count` should never exceed it, but clamping is a cheap safety net (even
122        // if an old snapshot is longer than the current list under extreme races, this
123        // won't panic).
124        let drop_count = drop_count.min(inner.messages.len());
125        let tail = inner.messages.split_off(drop_count);
126        inner.messages = Vec::with_capacity(tail.len() + 1);
127        inner.messages.push(summary);
128        inner.messages.extend(tail);
129        // Same as `replace`: the true token count of the new prefix is unknown; it will
130        // be reported by the next LLM call.
131        inner.last_real_input = None;
132        inner.est_since_baseline = 0;
133        drop_count
134    }
135
136    fn record_input_tokens(&self, tokens: u64) {
137        let mut inner = self.inner.lock().expect("VecHistory mutex poisoned");
138        inner.last_real_input = Some(tokens);
139        // Baseline reset — subsequent appends count their delta from zero.
140        inner.est_since_baseline = 0;
141    }
142
143    fn token_estimate(&self) -> Option<u64> {
144        let inner = self.inner.lock().expect("VecHistory mutex poisoned");
145        match inner.last_real_input {
146            // With a real baseline: baseline + character-heuristic increment for messages
147            // added after it.
148            Some(real) => Some(real.saturating_add(inner.est_since_baseline)),
149            // No baseline: fall back to character heuristics for the entire history.
150            // Returns `None` if history is empty.
151            None => {
152                if inner.messages.is_empty() {
153                    return None;
154                }
155                Some(
156                    inner
157                        .messages
158                        .iter()
159                        .map(estimate_message_tokens)
160                        .fold(0u64, u64::saturating_add),
161                )
162            }
163        }
164    }
165}
166
167/// Character-based heuristic token estimate for a single message (`chars/4`, images count
168/// as a constant).
169///
170/// `pub(crate)`: the compaction module (`session/turn/compact.rs`) reuses the same ruler
171/// when selecting retention boundaries, preventing drift between two estimation sites.
172pub(crate) fn estimate_message_tokens(msg: &Message) -> u64 {
173    let chars: usize = msg
174        .content
175        .iter()
176        .map(|c| match c {
177            MessageContent::Text { text } => text.len() / CHARS_PER_TOKEN,
178            MessageContent::Thinking { text, signature } => {
179                (text.len() + signature.as_ref().map_or(0, |s| s.len())) / CHARS_PER_TOKEN
180            }
181            MessageContent::ToolUse { name, args, .. } => {
182                (name.len() + args.to_string().len()) / CHARS_PER_TOKEN
183            }
184            MessageContent::ToolResult { output, .. } => {
185                tool_result_chars(output) / CHARS_PER_TOKEN
186            }
187            MessageContent::Image { .. } => IMAGE_TOKEN_ESTIMATE,
188            // The payload of a hosted activity is not persisted across processes, so it
189            // is ignored in the estimate.
190            MessageContent::ProviderActivity { .. } => 0,
191        })
192        .sum();
193    chars as u64
194}
195
196fn tool_result_chars(output: &crate::llm::ToolResultBody) -> usize {
197    use crate::llm::{ToolResultBody, ToolResultContent};
198    match output {
199        ToolResultBody::Text { text } => text.len(),
200        ToolResultBody::Json { value } => value.to_string().len(),
201        ToolResultBody::Content { blocks } => blocks
202            .iter()
203            .map(|b| match b {
204                ToolResultContent::Text { text } => text.len(),
205                ToolResultContent::Image { data, .. } => image_data_chars(data),
206            })
207            .sum(),
208    }
209}
210
211/// Approximate character count for an image block: base64 string length or URL length.
212/// Used for estimation and compression decisions; exact precision is not required.
213fn image_data_chars(data: &crate::llm::ImageData) -> usize {
214    match data {
215        crate::llm::ImageData::Base64 { encoded } => encoded.len(),
216        crate::llm::ImageData::Url { url } => url.len(),
217    }
218}
219
220#[cfg(test)]
221mod tests;