ailoop_context/context_manager.rs
1//! [`ContextManager`] + [`ContextManagerBuilder`] + the report type
2//! [`compact_if_needed`](ContextManager::compact_if_needed) returns.
3
4use ailoop_core::{AssistantBlock, CharTokenizer, Message, Tokenizer, UserBlock};
5
6use crate::{
7 compaction::{CompactionStrategy, TruncateStrategy},
8 errors::{CompactionError, FromMessagesError},
9};
10
11/// Reports what [`ContextManager::compact_if_needed`] did when it ran.
12/// Returned wrapped in `Option`: `None` means compaction was not needed
13/// (history fits within `max_tokens`).
14#[derive(Debug, Clone, PartialEq, Eq)]
15#[non_exhaustive]
16pub struct CompactionReport {
17 /// Message count before compaction ran.
18 pub before: usize,
19 /// Message count after compaction ran. `after < before` whenever
20 /// the strategy actually dropped or replaced messages.
21 pub after: usize,
22 /// Stable, machine-readable strategy name from
23 /// [`CompactionStrategy::name`] (e.g. `"truncate"`,
24 /// `"summarize"`). Used as the `strategy` field of
25 /// [`StreamChunk::HistoryCompacted`].
26 ///
27 /// [`CompactionStrategy::name`]: crate::CompactionStrategy::name
28 /// [`StreamChunk::HistoryCompacted`]: ailoop_core::StreamChunk::HistoryCompacted
29 pub strategy: &'static str,
30}
31
32/// Owns the message vector backing a single conversation, plus the
33/// budget and pin mask that govern how compaction reduces it.
34///
35/// Lifecycle: append turns with [`add_message`](Self::add_message) /
36/// [`extend`](Self::extend), inspect with [`messages`](Self::messages),
37/// pin survivors with [`pin_last`](Self::pin_last) /
38/// [`pin_at`](Self::pin_at) / [`pin_with_tool_result`](Self::pin_with_tool_result),
39/// and call [`compact_if_needed`](Self::compact_if_needed) before each
40/// outgoing [`ChatRequest`] to hold the history under the configured
41/// budget. Restore from a [`ConversationSnapshot`] via
42/// [`from_messages`](Self::from_messages).
43///
44/// The façade [`Conversation`](https://docs.rs/ailoop) wires one of
45/// these automatically — touch this type directly only when driving
46/// [`advanced::run_chat`](https://docs.rs/ailoop) or building tests.
47///
48/// [`ChatRequest`]: ailoop_core::ChatRequest
49/// [`ConversationSnapshot`]: crate::ConversationSnapshot
50pub struct ContextManager {
51 messages: Vec<Message>,
52 /// Parallel to `messages`: `pinned[i] == true` marks `messages[i]`
53 /// as "must survive compaction". The two vectors are kept the same
54 /// length by every internal mutation; new messages default to
55 /// `false` and only the explicit pin API flips the flag.
56 pinned: Vec<bool>,
57 max_tokens: usize,
58 preserve_n_last: usize,
59 strategy: Box<dyn CompactionStrategy>,
60 /// Tokenizer used to size [`Self::messages`] against
61 /// [`Self::max_tokens`] in [`Self::compact_if_needed`]. Defaults
62 /// to [`CharTokenizer`] (`len() / 4`) when the builder is not
63 /// given one — a coarse fallback fine for tests and bring-up but
64 /// explicitly not recommended for production budgeting; wire up a
65 /// provider-specific [`Tokenizer`] (e.g.
66 /// `ailoop_anthropic::OnlineCalibratedTokenizer`) when correctness
67 /// matters.
68 tokenizer: Box<dyn Tokenizer>,
69}
70
71impl ContextManager {
72 /// Begin configuring a new manager with `max_tokens` as the
73 /// budget [`compact_if_needed`](Self::compact_if_needed) will
74 /// hold the history under. Defaults: [`TruncateStrategy`] for
75 /// reduction, [`CharTokenizer`] (`len() / 4`) for sizing, four
76 /// preserved tail messages.
77 ///
78 /// [`TruncateStrategy`]: crate::TruncateStrategy
79 pub fn builder(max_tokens: usize) -> ContextManagerBuilder {
80 ContextManagerBuilder::new(max_tokens)
81 }
82
83 /// Restore a `ContextManager` whose history is `messages` and whose
84 /// pin mask is `pinned`. The two vectors must have the same length;
85 /// otherwise a [`FromMessagesError::LengthMismatch`] is returned.
86 /// All other configuration (budget, strategy, tokenizer, preserved
87 /// tail size) comes from `builder` — pass the same configuration
88 /// the original conversation used so compaction behaves
89 /// consistently across resumes.
90 pub fn from_messages(
91 builder: ContextManagerBuilder,
92 messages: Vec<Message>,
93 pinned: Vec<bool>,
94 ) -> Result<Self, FromMessagesError> {
95 if messages.len() != pinned.len() {
96 return Err(FromMessagesError::LengthMismatch {
97 messages: messages.len(),
98 pinned: pinned.len(),
99 });
100 }
101 let mut cm = builder.build();
102 cm.messages = messages;
103 cm.pinned = pinned;
104 Ok(cm)
105 }
106}
107
108impl ContextManager {
109 /// Append `message` to the history with `pinned = false`. Use
110 /// [`pin_last`](Self::pin_last) immediately after the call to mark
111 /// it as a survivor.
112 pub fn add_message(&mut self, message: Message) {
113 self.messages.push(message);
114 self.pinned.push(false);
115 }
116
117 /// Approximate token cost of the entire history under the
118 /// configured [`Tokenizer`]. The accuracy of this number is only
119 /// as good as the tokenizer wired into the builder — under the
120 /// default [`CharTokenizer`] it is a `len() / 4` ballpark.
121 pub fn estimated_tokens(&self) -> usize {
122 self.tokenizer.count_messages(&self.messages)
123 }
124
125 /// Borrow the current history slice. Indices into this slice align
126 /// with [`pinned`](Self::pinned).
127 pub fn messages(&self) -> &[Message] {
128 &self.messages
129 }
130
131 /// Append every message in `new_messages`, all with `pinned =
132 /// false`. The engine uses this after a turn to fold the
133 /// assistant reply (and any tool results) back into the history.
134 pub fn extend(&mut self, new_messages: Vec<Message>) {
135 let added = new_messages.len();
136 self.messages.extend(new_messages);
137 self.pinned.extend(std::iter::repeat_n(false, added));
138 }
139
140 /// Pinned-state slice, parallel to [`Self::messages`]. Useful for
141 /// tests; production callers normally interact via the `pin_*`
142 /// helpers and inspect [`Self::is_pinned`].
143 pub fn pinned(&self) -> &[bool] {
144 &self.pinned
145 }
146
147 /// Whether `messages[idx]` is currently pinned. Out-of-bounds
148 /// indices return `false` rather than panicking.
149 pub fn is_pinned(&self, idx: usize) -> bool {
150 self.pinned.get(idx).copied().unwrap_or(false)
151 }
152
153 /// Pin the most recently added message so it survives every future
154 /// compaction. No-op when the history is empty.
155 ///
156 /// Indices are not stable across compactions: a `pin_last()` made
157 /// before compaction stays pinned (the strategy keeps it), but its
158 /// numeric index in the new history may shift. Re-derive indices
159 /// (or rely on `pin_last`) after compaction runs.
160 pub fn pin_last(&mut self) {
161 if let Some(last) = self.pinned.last_mut() {
162 *last = true;
163 }
164 }
165
166 /// Pin the message at `idx`. Panics on out-of-bounds, matching the
167 /// convention of `Vec::index`.
168 pub fn pin_at(&mut self, idx: usize) {
169 assert!(
170 idx < self.messages.len(),
171 "pin_at: index {idx} out of bounds"
172 );
173 self.pinned[idx] = true;
174 }
175
176 /// Clear the pin on the message at `idx`. Panics on out-of-bounds.
177 pub fn unpin_at(&mut self, idx: usize) {
178 assert!(
179 idx < self.messages.len(),
180 "unpin_at: index {idx} out of bounds"
181 );
182 self.pinned[idx] = false;
183 }
184
185 /// Pin `idx` together with every message that pairs with it via a
186 /// `tool_call` ↔ `tool_result` link. Without this, pinning a lone
187 /// `Assistant` `ToolCall` (or a lone `User` `ToolResult`) and then
188 /// letting compaction run would strand the partner — most providers
189 /// reject that as a malformed history.
190 ///
191 /// Resolution rules:
192 /// - If `messages[idx]` is an `Assistant` with `ToolCall` blocks,
193 /// any `User` message containing a `ToolResult` whose `call_id`
194 /// matches one of those calls is also pinned.
195 /// - If `messages[idx]` is a `User` with `ToolResult` blocks, any
196 /// `Assistant` message containing a `ToolCall` whose `id` matches
197 /// is also pinned.
198 /// - Messages without tool blocks are pinned alone (effectively a
199 /// `pin_at`).
200 ///
201 /// Panics on out-of-bounds. The lookup scans the full history but
202 /// each message is examined at most once.
203 pub fn pin_with_tool_result(&mut self, idx: usize) {
204 assert!(
205 idx < self.messages.len(),
206 "pin_with_tool_result: index {idx} out of bounds"
207 );
208
209 self.pinned[idx] = true;
210
211 let target_ids: Vec<String> = match &self.messages[idx] {
212 Message::Assistant { blocks } => blocks
213 .iter()
214 .filter_map(|b| match b {
215 AssistantBlock::ToolCall { id, .. } => Some(id.clone()),
216 _ => None,
217 })
218 .collect(),
219 Message::User { blocks } => blocks
220 .iter()
221 .filter_map(|b| match b {
222 UserBlock::ToolResult { call_id, .. } => Some(call_id.clone()),
223 _ => None,
224 })
225 .collect(),
226 _ => Vec::new(),
227 };
228
229 if target_ids.is_empty() {
230 return;
231 }
232
233 let is_assistant_target = matches!(self.messages[idx], Message::Assistant { .. });
234
235 for (i, msg) in self.messages.iter().enumerate() {
236 if i == idx || self.pinned[i] {
237 continue;
238 }
239 let matches = match (is_assistant_target, msg) {
240 (true, Message::User { blocks }) => blocks.iter().any(|b| matches!(b,
241 UserBlock::ToolResult { call_id, .. } if target_ids.iter().any(|t| t == call_id))),
242 (false, Message::Assistant { blocks }) => blocks.iter().any(|b| matches!(b,
243 AssistantBlock::ToolCall { id, .. } if target_ids.iter().any(|t| t == id))),
244 _ => false,
245 };
246 if matches {
247 self.pinned[i] = true;
248 }
249 }
250 }
251
252 /// Run the configured [`CompactionStrategy`] when
253 /// [`estimated_tokens`](Self::estimated_tokens) reaches the
254 /// budget; otherwise return `Ok(None)` and leave the history
255 /// untouched.
256 ///
257 /// On success returns `Ok(Some(report))` describing the
258 /// before/after counts and the strategy name. The engine emits
259 /// [`StreamChunk::HistoryCompacted`] carrying the same fields so
260 /// observability middlewares can correlate the compaction with the
261 /// run that triggered it.
262 ///
263 /// Errors propagate from the strategy (commonly
264 /// [`CompactionError::SummarizationFailed`] when the summarizer
265 /// model itself fails). [`CompactionError::NotEnoughHistory`]
266 /// surfaces when the history has fewer messages than
267 /// `preserve_n_last` and there is nothing to drop.
268 ///
269 /// [`CompactionStrategy`]: crate::CompactionStrategy
270 /// [`StreamChunk::HistoryCompacted`]: ailoop_core::StreamChunk::HistoryCompacted
271 pub async fn compact_if_needed(&mut self) -> Result<Option<CompactionReport>, CompactionError> {
272 if self.estimated_tokens() < self.max_tokens {
273 return Ok(None);
274 }
275
276 let before = self.messages.len();
277 let output = self
278 .strategy
279 .compact(&self.messages, &self.pinned, self.preserve_n_last)
280 .await?;
281 debug_assert_eq!(
282 output.messages.len(),
283 output.pinned.len(),
284 "strategy must return a pinned mask matching the message vector",
285 );
286 let after = output.messages.len();
287 let strategy = self.strategy.name();
288 self.messages = output.messages;
289 self.pinned = output.pinned;
290 Ok(Some(CompactionReport {
291 before,
292 after,
293 strategy,
294 }))
295 }
296}
297
298/// Configuration for a [`ContextManager`].
299///
300/// Construct via [`ContextManager::builder`]. Setters return
301/// `Self` so calls chain; [`build`](Self::build) is infallible.
302pub struct ContextManagerBuilder {
303 max_tokens: usize,
304 preserve_n_last: usize,
305 tokenizer: Box<dyn Tokenizer>,
306 strategy: Box<dyn CompactionStrategy>,
307}
308
309impl ContextManagerBuilder {
310 fn new(max_tokens: usize) -> Self {
311 Self {
312 max_tokens,
313 preserve_n_last: 4,
314 // Fallback default — see the doc on `ContextManager::tokenizer`.
315 // Production code should override via `Self::tokenizer`.
316 tokenizer: Box::new(CharTokenizer),
317 strategy: Box::new(TruncateStrategy),
318 }
319 }
320}
321
322impl ContextManagerBuilder {
323 /// Number of trailing messages the strategy must preserve verbatim
324 /// (after walking back to a safe `User`-without-`ToolResult`
325 /// boundary). Default: 4. Lowering it lets compaction reclaim more
326 /// budget at the cost of dropping more recent context; raising it
327 /// keeps recent turns at the cost of compacting sooner.
328 pub fn preserve_n_last(mut self, n: usize) -> Self {
329 self.preserve_n_last = n;
330 self
331 }
332
333 /// Wire a [`Tokenizer`] into the manager. Replaces the default
334 /// [`CharTokenizer`] fallback so [`ContextManager::compact_if_needed`]
335 /// measures the budget in real tokens rather than `len() / 4`.
336 pub fn tokenizer(self, tokenizer: Box<dyn Tokenizer>) -> ContextManagerBuilder {
337 ContextManagerBuilder {
338 max_tokens: self.max_tokens,
339 preserve_n_last: self.preserve_n_last,
340 tokenizer,
341 strategy: self.strategy,
342 }
343 }
344
345 /// Wire a [`CompactionStrategy`] into the manager. Replaces the
346 /// default [`TruncateStrategy`]. Use
347 /// `Box::new(SummarizeStrategy::new(model))` to compress dropped
348 /// history into a model-generated summary instead of losing it.
349 ///
350 /// [`CompactionStrategy`]: crate::CompactionStrategy
351 /// [`TruncateStrategy`]: crate::TruncateStrategy
352 pub fn strategy(self, strategy: Box<dyn CompactionStrategy>) -> ContextManagerBuilder {
353 ContextManagerBuilder {
354 max_tokens: self.max_tokens,
355 preserve_n_last: self.preserve_n_last,
356 tokenizer: self.tokenizer,
357 strategy,
358 }
359 }
360
361 /// Finalize the configuration and build the [`ContextManager`].
362 /// Infallible — every error case is caught by the typed setters.
363 pub fn build(self) -> ContextManager {
364 ContextManager {
365 messages: Vec::new(),
366 pinned: Vec::new(),
367 max_tokens: self.max_tokens,
368 preserve_n_last: self.preserve_n_last,
369 strategy: self.strategy,
370 tokenizer: self.tokenizer,
371 }
372 }
373}
374
375#[cfg(test)]
376mod tests {
377 use super::*;
378 use ailoop_core::{AssistantBlock, ToolResultContent, UserBlock};
379 use serde_json::json;
380
381 fn tool_call_msg(id: &str) -> Message {
382 Message::Assistant {
383 blocks: vec![AssistantBlock::tool_call(id, "t", json!({}))],
384 }
385 }
386
387 fn tool_result_msg(call_id: &str) -> Message {
388 Message::User {
389 blocks: vec![UserBlock::tool_result(
390 call_id,
391 ToolResultContent::text("ok"),
392 )],
393 }
394 }
395
396 #[tokio::test]
397 async fn compact_if_needed_returns_none_when_under_budget() {
398 let mut mgr = ContextManager::builder(10_000).build();
399 mgr.add_message(Message::user("hi"));
400 mgr.add_message(Message::assistant_text("hello"));
401
402 let report = mgr
403 .compact_if_needed()
404 .await
405 .expect("compaction should succeed");
406 assert!(report.is_none(), "no compaction expected when under budget");
407 }
408
409 #[tokio::test]
410 async fn compact_if_needed_returns_report_when_over_budget() {
411 // CharTokenizer fallback is len()/4. Use a tiny budget so a
412 // couple of small messages already trip the limit.
413 let mut mgr = ContextManager::builder(10).preserve_n_last(2).build();
414 mgr.add_message(Message::user("first turn"));
415 mgr.add_message(Message::assistant_text("first reply"));
416 mgr.add_message(Message::user("second turn"));
417 mgr.add_message(Message::assistant_text("second reply"));
418 mgr.add_message(Message::user("third turn"));
419
420 let report = mgr
421 .compact_if_needed()
422 .await
423 .expect("compaction should succeed")
424 .expect("expected compaction to run");
425
426 assert_eq!(report.strategy, "truncate");
427 assert!(
428 report.after < report.before,
429 "compaction must drop messages"
430 );
431 }
432
433 #[tokio::test]
434 async fn pin_last_survives_compaction() {
435 let mut mgr = ContextManager::builder(10).preserve_n_last(2).build();
436 mgr.add_message(Message::user("pinned anchor"));
437 mgr.pin_last();
438 for i in 0..5 {
439 mgr.add_message(Message::user(format!("turn {i} q")));
440 mgr.add_message(Message::assistant_text(format!("turn {i} a")));
441 }
442
443 let report = mgr
444 .compact_if_needed()
445 .await
446 .expect("compaction should succeed")
447 .expect("expected compaction to run");
448 assert!(report.after < report.before);
449
450 // The pinned anchor must still be the first message and still pinned.
451 let first = mgr.messages().first().expect("history should be non-empty");
452 match first {
453 Message::User { blocks } => match &blocks[0] {
454 UserBlock::Text { text, .. } => assert_eq!(text, "pinned anchor"),
455 other => panic!("expected pinned text block, got {other:?}"),
456 },
457 other => panic!("expected pinned user message, got {other:?}"),
458 }
459 assert!(
460 mgr.is_pinned(0),
461 "pinned mask must be preserved across compaction"
462 );
463 }
464
465 #[tokio::test]
466 async fn pin_with_tool_result_keeps_pair_intact() {
467 let mut mgr = ContextManager::builder(10).preserve_n_last(2).build();
468 mgr.add_message(Message::user("task"));
469 mgr.add_message(tool_call_msg("c1"));
470 mgr.add_message(tool_result_msg("c1"));
471 mgr.add_message(Message::assistant_text("result"));
472 // Pin the tool_call (idx 1). The helper should also pin the
473 // tool_result (idx 2) so the pair survives compaction together.
474 mgr.pin_with_tool_result(1);
475 assert!(mgr.is_pinned(1));
476 assert!(mgr.is_pinned(2), "partner tool_result must be pinned too");
477
478 // Add filler so we overflow the budget.
479 for i in 0..6 {
480 mgr.add_message(Message::user(format!("filler {i}")));
481 mgr.add_message(Message::assistant_text(format!("ack {i}")));
482 }
483
484 mgr.compact_if_needed()
485 .await
486 .expect("compaction should succeed")
487 .expect("expected compaction to run");
488
489 // The pinned pair should still be present in the same relative order.
490 let mut saw_call = false;
491 let mut saw_result = false;
492 for msg in mgr.messages() {
493 match msg {
494 Message::Assistant { blocks } => {
495 if blocks
496 .iter()
497 .any(|b| matches!(b, AssistantBlock::ToolCall { id, .. } if id == "c1"))
498 {
499 saw_call = true;
500 }
501 }
502 Message::User { blocks } => {
503 if blocks.iter().any(
504 |b| matches!(b, UserBlock::ToolResult { call_id, .. } if call_id == "c1"),
505 ) {
506 assert!(saw_call, "tool_result must follow its tool_call");
507 saw_result = true;
508 }
509 }
510 _ => {}
511 }
512 }
513 assert!(
514 saw_call && saw_result,
515 "pinned pair must survive compaction"
516 );
517 }
518
519 #[tokio::test]
520 async fn pin_with_tool_result_on_result_pins_the_call() {
521 let mut mgr = ContextManager::builder(10).preserve_n_last(1).build();
522 mgr.add_message(tool_call_msg("c1"));
523 mgr.add_message(tool_result_msg("c1"));
524 mgr.add_message(Message::user("later"));
525
526 mgr.pin_with_tool_result(1);
527 assert!(mgr.is_pinned(0), "tool_call partner must be pinned");
528 assert!(mgr.is_pinned(1));
529 }
530
531 /// `compact_if_needed` measures the budget in real tokens via the
532 /// configured [`Tokenizer`], not in characters. A tokenizer that
533 /// bills every message at a fixed cost lets us drive compaction by
534 /// message count alone, independently of the underlying
535 /// `text.len()`.
536 #[tokio::test]
537 async fn compact_uses_tokenizer_budget_not_character_count() {
538 struct PerMessageTokenizer;
539 impl Tokenizer for PerMessageTokenizer {
540 fn count_text(&self, _text: &str) -> usize {
541 10
542 }
543 }
544
545 // Budget: 35 tokens. Five 1-text-block messages cost 50 tokens
546 // (5 * 10), so compaction must run. Under the `CharTokenizer`
547 // fallback the same content (under ~50 chars total) would fit
548 // comfortably under 35 — proving the budget is sourced from
549 // the supplied tokenizer.
550 let mut mgr = ContextManager::builder(35)
551 .tokenizer(Box::new(PerMessageTokenizer))
552 .preserve_n_last(2)
553 .build();
554 for i in 0..5 {
555 mgr.add_message(Message::user(format!("q{i}")));
556 }
557 assert_eq!(mgr.estimated_tokens(), 50);
558
559 let report = mgr
560 .compact_if_needed()
561 .await
562 .expect("compaction should succeed")
563 .expect("over-budget history must compact");
564 assert_eq!(report.before, 5);
565 assert!(report.after < report.before);
566 // After compaction the tail is still bound by `preserve_n_last`.
567 assert_eq!(report.after, 2);
568 }
569
570 #[test]
571 fn from_messages_restores_history_and_pin_mask() {
572 let messages = vec![
573 Message::user("first"),
574 Message::assistant_text("ack"),
575 Message::user("second"),
576 ];
577 let pinned = vec![true, false, true];
578 let mgr = ContextManager::from_messages(ContextManager::builder(10_000), messages, pinned)
579 .expect("equal lengths");
580 assert_eq!(mgr.messages().len(), 3);
581 assert!(mgr.is_pinned(0));
582 assert!(!mgr.is_pinned(1));
583 assert!(mgr.is_pinned(2));
584 }
585
586 #[test]
587 fn from_messages_rejects_length_mismatch() {
588 let result = ContextManager::from_messages(
589 ContextManager::builder(10_000),
590 vec![Message::user("solo")],
591 vec![],
592 );
593 match result {
594 Err(FromMessagesError::LengthMismatch {
595 messages: 1,
596 pinned: 0,
597 }) => {}
598 Ok(_) => panic!("length mismatch must error, not panic"),
599 Err(other) => panic!("unexpected error: {other:?}"),
600 }
601 }
602}