Skip to main content

ailoop_context/
context_manager.rs

1//! [`ContextManager`] + [`ContextManagerBuilder`] + the report type
2//! [`compact_if_needed`](ContextManager::compact_if_needed) returns.
3
4use ailoop_core::{AssistantBlock, CharTokenizer, Message, Tokenizer, UserBlock};
5
6use crate::{
7    compaction::{CompactionStrategy, TruncateStrategy},
8    errors::{CompactionError, FromMessagesError},
9};
10
11/// Reports what [`ContextManager::compact_if_needed`] did when it ran.
12/// Returned wrapped in `Option`: `None` means compaction was not needed
13/// (history fits within `max_tokens`).
14#[derive(Debug, Clone, PartialEq, Eq)]
15#[non_exhaustive]
16pub struct CompactionReport {
17    /// Message count before compaction ran.
18    pub before: usize,
19    /// Message count after compaction ran. `after < before` whenever
20    /// the strategy actually dropped or replaced messages.
21    pub after: usize,
22    /// Stable, machine-readable strategy name from
23    /// [`CompactionStrategy::name`] (e.g. `"truncate"`,
24    /// `"summarize"`). Used as the `strategy` field of
25    /// [`StreamChunk::HistoryCompacted`].
26    ///
27    /// [`CompactionStrategy::name`]: crate::CompactionStrategy::name
28    /// [`StreamChunk::HistoryCompacted`]: ailoop_core::StreamChunk::HistoryCompacted
29    pub strategy: &'static str,
30}
31
32/// Owns the message vector backing a single conversation, plus the
33/// budget and pin mask that govern how compaction reduces it.
34///
35/// Lifecycle: append turns with [`add_message`](Self::add_message) /
36/// [`extend`](Self::extend), inspect with [`messages`](Self::messages),
37/// pin survivors with [`pin_last`](Self::pin_last) /
38/// [`pin_at`](Self::pin_at) / [`pin_with_tool_result`](Self::pin_with_tool_result),
39/// and call [`compact_if_needed`](Self::compact_if_needed) before each
40/// outgoing [`ChatRequest`] to hold the history under the configured
41/// budget. Restore from a [`ConversationSnapshot`] via
42/// [`from_messages`](Self::from_messages).
43///
44/// The façade [`Conversation`](https://docs.rs/ailoop) wires one of
45/// these automatically — touch this type directly only when driving
46/// [`advanced::run_chat`](https://docs.rs/ailoop) or building tests.
47///
48/// [`ChatRequest`]: ailoop_core::ChatRequest
49/// [`ConversationSnapshot`]: crate::ConversationSnapshot
50pub struct ContextManager {
51    messages: Vec<Message>,
52    /// Parallel to `messages`: `pinned[i] == true` marks `messages[i]`
53    /// as "must survive compaction". The two vectors are kept the same
54    /// length by every internal mutation; new messages default to
55    /// `false` and only the explicit pin API flips the flag.
56    pinned: Vec<bool>,
57    max_tokens: usize,
58    preserve_n_last: usize,
59    strategy: Box<dyn CompactionStrategy>,
60    /// Tokenizer used to size [`Self::messages`] against
61    /// [`Self::max_tokens`] in [`Self::compact_if_needed`]. Defaults
62    /// to [`CharTokenizer`] (`len() / 4`) when the builder is not
63    /// given one — a coarse fallback fine for tests and bring-up but
64    /// explicitly not recommended for production budgeting; wire up a
65    /// provider-specific [`Tokenizer`] (e.g.
66    /// `ailoop_anthropic::OnlineCalibratedTokenizer`) when correctness
67    /// matters.
68    tokenizer: Box<dyn Tokenizer>,
69}
70
71impl ContextManager {
72    /// Begin configuring a new manager with `max_tokens` as the
73    /// budget [`compact_if_needed`](Self::compact_if_needed) will
74    /// hold the history under. Defaults: [`TruncateStrategy`] for
75    /// reduction, [`CharTokenizer`] (`len() / 4`) for sizing, four
76    /// preserved tail messages.
77    ///
78    /// [`TruncateStrategy`]: crate::TruncateStrategy
79    pub fn builder(max_tokens: usize) -> ContextManagerBuilder {
80        ContextManagerBuilder::new(max_tokens)
81    }
82
83    /// Restore a `ContextManager` whose history is `messages` and whose
84    /// pin mask is `pinned`. The two vectors must have the same length;
85    /// otherwise a [`FromMessagesError::LengthMismatch`] is returned.
86    /// All other configuration (budget, strategy, tokenizer, preserved
87    /// tail size) comes from `builder` — pass the same configuration
88    /// the original conversation used so compaction behaves
89    /// consistently across resumes.
90    pub fn from_messages(
91        builder: ContextManagerBuilder,
92        messages: Vec<Message>,
93        pinned: Vec<bool>,
94    ) -> Result<Self, FromMessagesError> {
95        if messages.len() != pinned.len() {
96            return Err(FromMessagesError::LengthMismatch {
97                messages: messages.len(),
98                pinned: pinned.len(),
99            });
100        }
101        let mut cm = builder.build();
102        cm.messages = messages;
103        cm.pinned = pinned;
104        Ok(cm)
105    }
106}
107
108impl ContextManager {
109    /// Append `message` to the history with `pinned = false`. Use
110    /// [`pin_last`](Self::pin_last) immediately after the call to mark
111    /// it as a survivor.
112    pub fn add_message(&mut self, message: Message) {
113        self.messages.push(message);
114        self.pinned.push(false);
115    }
116
117    /// Approximate token cost of the entire history under the
118    /// configured [`Tokenizer`]. The accuracy of this number is only
119    /// as good as the tokenizer wired into the builder — under the
120    /// default [`CharTokenizer`] it is a `len() / 4` ballpark.
121    pub fn estimated_tokens(&self) -> usize {
122        self.tokenizer.count_messages(&self.messages)
123    }
124
125    /// Borrow the current history slice. Indices into this slice align
126    /// with [`pinned`](Self::pinned).
127    pub fn messages(&self) -> &[Message] {
128        &self.messages
129    }
130
131    /// Append every message in `new_messages`, all with `pinned =
132    /// false`. The engine uses this after a turn to fold the
133    /// assistant reply (and any tool results) back into the history.
134    pub fn extend(&mut self, new_messages: Vec<Message>) {
135        let added = new_messages.len();
136        self.messages.extend(new_messages);
137        self.pinned.extend(std::iter::repeat_n(false, added));
138    }
139
140    /// Pinned-state slice, parallel to [`Self::messages`]. Useful for
141    /// tests; production callers normally interact via the `pin_*`
142    /// helpers and inspect [`Self::is_pinned`].
143    pub fn pinned(&self) -> &[bool] {
144        &self.pinned
145    }
146
147    /// Whether `messages[idx]` is currently pinned. Out-of-bounds
148    /// indices return `false` rather than panicking.
149    pub fn is_pinned(&self, idx: usize) -> bool {
150        self.pinned.get(idx).copied().unwrap_or(false)
151    }
152
153    /// Pin the most recently added message so it survives every future
154    /// compaction. No-op when the history is empty.
155    ///
156    /// Indices are not stable across compactions: a `pin_last()` made
157    /// before compaction stays pinned (the strategy keeps it), but its
158    /// numeric index in the new history may shift. Re-derive indices
159    /// (or rely on `pin_last`) after compaction runs.
160    pub fn pin_last(&mut self) {
161        if let Some(last) = self.pinned.last_mut() {
162            *last = true;
163        }
164    }
165
166    /// Pin the message at `idx`. Panics on out-of-bounds, matching the
167    /// convention of `Vec::index`.
168    pub fn pin_at(&mut self, idx: usize) {
169        assert!(
170            idx < self.messages.len(),
171            "pin_at: index {idx} out of bounds"
172        );
173        self.pinned[idx] = true;
174    }
175
176    /// Clear the pin on the message at `idx`. Panics on out-of-bounds.
177    pub fn unpin_at(&mut self, idx: usize) {
178        assert!(
179            idx < self.messages.len(),
180            "unpin_at: index {idx} out of bounds"
181        );
182        self.pinned[idx] = false;
183    }
184
185    /// Pin `idx` together with every message that pairs with it via a
186    /// `tool_call` ↔ `tool_result` link. Without this, pinning a lone
187    /// `Assistant` `ToolCall` (or a lone `User` `ToolResult`) and then
188    /// letting compaction run would strand the partner — most providers
189    /// reject that as a malformed history.
190    ///
191    /// Resolution rules:
192    /// - If `messages[idx]` is an `Assistant` with `ToolCall` blocks,
193    ///   any `User` message containing a `ToolResult` whose `call_id`
194    ///   matches one of those calls is also pinned.
195    /// - If `messages[idx]` is a `User` with `ToolResult` blocks, any
196    ///   `Assistant` message containing a `ToolCall` whose `id` matches
197    ///   is also pinned.
198    /// - Messages without tool blocks are pinned alone (effectively a
199    ///   `pin_at`).
200    ///
201    /// Panics on out-of-bounds. The lookup scans the full history but
202    /// each message is examined at most once.
203    pub fn pin_with_tool_result(&mut self, idx: usize) {
204        assert!(
205            idx < self.messages.len(),
206            "pin_with_tool_result: index {idx} out of bounds"
207        );
208
209        self.pinned[idx] = true;
210
211        let target_ids: Vec<String> = match &self.messages[idx] {
212            Message::Assistant { blocks } => blocks
213                .iter()
214                .filter_map(|b| match b {
215                    AssistantBlock::ToolCall { id, .. } => Some(id.clone()),
216                    _ => None,
217                })
218                .collect(),
219            Message::User { blocks } => blocks
220                .iter()
221                .filter_map(|b| match b {
222                    UserBlock::ToolResult { call_id, .. } => Some(call_id.clone()),
223                    _ => None,
224                })
225                .collect(),
226            _ => Vec::new(),
227        };
228
229        if target_ids.is_empty() {
230            return;
231        }
232
233        let is_assistant_target = matches!(self.messages[idx], Message::Assistant { .. });
234
235        for (i, msg) in self.messages.iter().enumerate() {
236            if i == idx || self.pinned[i] {
237                continue;
238            }
239            let matches = match (is_assistant_target, msg) {
240                (true, Message::User { blocks }) => blocks.iter().any(|b| matches!(b,
241                    UserBlock::ToolResult { call_id, .. } if target_ids.iter().any(|t| t == call_id))),
242                (false, Message::Assistant { blocks }) => blocks.iter().any(|b| matches!(b,
243                    AssistantBlock::ToolCall { id, .. } if target_ids.iter().any(|t| t == id))),
244                _ => false,
245            };
246            if matches {
247                self.pinned[i] = true;
248            }
249        }
250    }
251
252    /// Run the configured [`CompactionStrategy`] when
253    /// [`estimated_tokens`](Self::estimated_tokens) reaches the
254    /// budget; otherwise return `Ok(None)` and leave the history
255    /// untouched.
256    ///
257    /// On success returns `Ok(Some(report))` describing the
258    /// before/after counts and the strategy name. The engine emits
259    /// [`StreamChunk::HistoryCompacted`] carrying the same fields so
260    /// observability middlewares can correlate the compaction with the
261    /// run that triggered it.
262    ///
263    /// Errors propagate from the strategy (commonly
264    /// [`CompactionError::SummarizationFailed`] when the summarizer
265    /// model itself fails). [`CompactionError::NotEnoughHistory`]
266    /// surfaces when the history has fewer messages than
267    /// `preserve_n_last` and there is nothing to drop.
268    ///
269    /// [`CompactionStrategy`]: crate::CompactionStrategy
270    /// [`StreamChunk::HistoryCompacted`]: ailoop_core::StreamChunk::HistoryCompacted
271    pub async fn compact_if_needed(&mut self) -> Result<Option<CompactionReport>, CompactionError> {
272        if self.estimated_tokens() < self.max_tokens {
273            return Ok(None);
274        }
275
276        let before = self.messages.len();
277        let output = self
278            .strategy
279            .compact(&self.messages, &self.pinned, self.preserve_n_last)
280            .await?;
281        debug_assert_eq!(
282            output.messages.len(),
283            output.pinned.len(),
284            "strategy must return a pinned mask matching the message vector",
285        );
286        let after = output.messages.len();
287        let strategy = self.strategy.name();
288        self.messages = output.messages;
289        self.pinned = output.pinned;
290        Ok(Some(CompactionReport {
291            before,
292            after,
293            strategy,
294        }))
295    }
296}
297
298/// Configuration for a [`ContextManager`].
299///
300/// Construct via [`ContextManager::builder`]. Setters return
301/// `Self` so calls chain; [`build`](Self::build) is infallible.
302pub struct ContextManagerBuilder {
303    max_tokens: usize,
304    preserve_n_last: usize,
305    tokenizer: Box<dyn Tokenizer>,
306    strategy: Box<dyn CompactionStrategy>,
307}
308
309impl ContextManagerBuilder {
310    fn new(max_tokens: usize) -> Self {
311        Self {
312            max_tokens,
313            preserve_n_last: 4,
314            // Fallback default — see the doc on `ContextManager::tokenizer`.
315            // Production code should override via `Self::tokenizer`.
316            tokenizer: Box::new(CharTokenizer),
317            strategy: Box::new(TruncateStrategy),
318        }
319    }
320}
321
322impl ContextManagerBuilder {
323    /// Number of trailing messages the strategy must preserve verbatim
324    /// (after walking back to a safe `User`-without-`ToolResult`
325    /// boundary). Default: 4. Lowering it lets compaction reclaim more
326    /// budget at the cost of dropping more recent context; raising it
327    /// keeps recent turns at the cost of compacting sooner.
328    pub fn preserve_n_last(mut self, n: usize) -> Self {
329        self.preserve_n_last = n;
330        self
331    }
332
333    /// Wire a [`Tokenizer`] into the manager. Replaces the default
334    /// [`CharTokenizer`] fallback so [`ContextManager::compact_if_needed`]
335    /// measures the budget in real tokens rather than `len() / 4`.
336    pub fn tokenizer(self, tokenizer: Box<dyn Tokenizer>) -> ContextManagerBuilder {
337        ContextManagerBuilder {
338            max_tokens: self.max_tokens,
339            preserve_n_last: self.preserve_n_last,
340            tokenizer,
341            strategy: self.strategy,
342        }
343    }
344
345    /// Wire a [`CompactionStrategy`] into the manager. Replaces the
346    /// default [`TruncateStrategy`]. Use
347    /// `Box::new(SummarizeStrategy::new(model))` to compress dropped
348    /// history into a model-generated summary instead of losing it.
349    ///
350    /// [`CompactionStrategy`]: crate::CompactionStrategy
351    /// [`TruncateStrategy`]: crate::TruncateStrategy
352    pub fn strategy(self, strategy: Box<dyn CompactionStrategy>) -> ContextManagerBuilder {
353        ContextManagerBuilder {
354            max_tokens: self.max_tokens,
355            preserve_n_last: self.preserve_n_last,
356            tokenizer: self.tokenizer,
357            strategy,
358        }
359    }
360
361    /// Finalize the configuration and build the [`ContextManager`].
362    /// Infallible — every error case is caught by the typed setters.
363    pub fn build(self) -> ContextManager {
364        ContextManager {
365            messages: Vec::new(),
366            pinned: Vec::new(),
367            max_tokens: self.max_tokens,
368            preserve_n_last: self.preserve_n_last,
369            strategy: self.strategy,
370            tokenizer: self.tokenizer,
371        }
372    }
373}
374
375#[cfg(test)]
376mod tests {
377    use super::*;
378    use ailoop_core::{AssistantBlock, ToolResultContent, UserBlock};
379    use serde_json::json;
380
381    fn tool_call_msg(id: &str) -> Message {
382        Message::Assistant {
383            blocks: vec![AssistantBlock::tool_call(id, "t", json!({}))],
384        }
385    }
386
387    fn tool_result_msg(call_id: &str) -> Message {
388        Message::User {
389            blocks: vec![UserBlock::tool_result(
390                call_id,
391                ToolResultContent::text("ok"),
392            )],
393        }
394    }
395
396    #[tokio::test]
397    async fn compact_if_needed_returns_none_when_under_budget() {
398        let mut mgr = ContextManager::builder(10_000).build();
399        mgr.add_message(Message::user("hi"));
400        mgr.add_message(Message::assistant_text("hello"));
401
402        let report = mgr
403            .compact_if_needed()
404            .await
405            .expect("compaction should succeed");
406        assert!(report.is_none(), "no compaction expected when under budget");
407    }
408
409    #[tokio::test]
410    async fn compact_if_needed_returns_report_when_over_budget() {
411        // CharTokenizer fallback is len()/4. Use a tiny budget so a
412        // couple of small messages already trip the limit.
413        let mut mgr = ContextManager::builder(10).preserve_n_last(2).build();
414        mgr.add_message(Message::user("first turn"));
415        mgr.add_message(Message::assistant_text("first reply"));
416        mgr.add_message(Message::user("second turn"));
417        mgr.add_message(Message::assistant_text("second reply"));
418        mgr.add_message(Message::user("third turn"));
419
420        let report = mgr
421            .compact_if_needed()
422            .await
423            .expect("compaction should succeed")
424            .expect("expected compaction to run");
425
426        assert_eq!(report.strategy, "truncate");
427        assert!(
428            report.after < report.before,
429            "compaction must drop messages"
430        );
431    }
432
433    #[tokio::test]
434    async fn pin_last_survives_compaction() {
435        let mut mgr = ContextManager::builder(10).preserve_n_last(2).build();
436        mgr.add_message(Message::user("pinned anchor"));
437        mgr.pin_last();
438        for i in 0..5 {
439            mgr.add_message(Message::user(format!("turn {i} q")));
440            mgr.add_message(Message::assistant_text(format!("turn {i} a")));
441        }
442
443        let report = mgr
444            .compact_if_needed()
445            .await
446            .expect("compaction should succeed")
447            .expect("expected compaction to run");
448        assert!(report.after < report.before);
449
450        // The pinned anchor must still be the first message and still pinned.
451        let first = mgr.messages().first().expect("history should be non-empty");
452        match first {
453            Message::User { blocks } => match &blocks[0] {
454                UserBlock::Text { text, .. } => assert_eq!(text, "pinned anchor"),
455                other => panic!("expected pinned text block, got {other:?}"),
456            },
457            other => panic!("expected pinned user message, got {other:?}"),
458        }
459        assert!(
460            mgr.is_pinned(0),
461            "pinned mask must be preserved across compaction"
462        );
463    }
464
465    #[tokio::test]
466    async fn pin_with_tool_result_keeps_pair_intact() {
467        let mut mgr = ContextManager::builder(10).preserve_n_last(2).build();
468        mgr.add_message(Message::user("task"));
469        mgr.add_message(tool_call_msg("c1"));
470        mgr.add_message(tool_result_msg("c1"));
471        mgr.add_message(Message::assistant_text("result"));
472        // Pin the tool_call (idx 1). The helper should also pin the
473        // tool_result (idx 2) so the pair survives compaction together.
474        mgr.pin_with_tool_result(1);
475        assert!(mgr.is_pinned(1));
476        assert!(mgr.is_pinned(2), "partner tool_result must be pinned too");
477
478        // Add filler so we overflow the budget.
479        for i in 0..6 {
480            mgr.add_message(Message::user(format!("filler {i}")));
481            mgr.add_message(Message::assistant_text(format!("ack {i}")));
482        }
483
484        mgr.compact_if_needed()
485            .await
486            .expect("compaction should succeed")
487            .expect("expected compaction to run");
488
489        // The pinned pair should still be present in the same relative order.
490        let mut saw_call = false;
491        let mut saw_result = false;
492        for msg in mgr.messages() {
493            match msg {
494                Message::Assistant { blocks } => {
495                    if blocks
496                        .iter()
497                        .any(|b| matches!(b, AssistantBlock::ToolCall { id, .. } if id == "c1"))
498                    {
499                        saw_call = true;
500                    }
501                }
502                Message::User { blocks } => {
503                    if blocks.iter().any(
504                        |b| matches!(b, UserBlock::ToolResult { call_id, .. } if call_id == "c1"),
505                    ) {
506                        assert!(saw_call, "tool_result must follow its tool_call");
507                        saw_result = true;
508                    }
509                }
510                _ => {}
511            }
512        }
513        assert!(
514            saw_call && saw_result,
515            "pinned pair must survive compaction"
516        );
517    }
518
519    #[tokio::test]
520    async fn pin_with_tool_result_on_result_pins_the_call() {
521        let mut mgr = ContextManager::builder(10).preserve_n_last(1).build();
522        mgr.add_message(tool_call_msg("c1"));
523        mgr.add_message(tool_result_msg("c1"));
524        mgr.add_message(Message::user("later"));
525
526        mgr.pin_with_tool_result(1);
527        assert!(mgr.is_pinned(0), "tool_call partner must be pinned");
528        assert!(mgr.is_pinned(1));
529    }
530
531    /// `compact_if_needed` measures the budget in real tokens via the
532    /// configured [`Tokenizer`], not in characters. A tokenizer that
533    /// bills every message at a fixed cost lets us drive compaction by
534    /// message count alone, independently of the underlying
535    /// `text.len()`.
536    #[tokio::test]
537    async fn compact_uses_tokenizer_budget_not_character_count() {
538        struct PerMessageTokenizer;
539        impl Tokenizer for PerMessageTokenizer {
540            fn count_text(&self, _text: &str) -> usize {
541                10
542            }
543        }
544
545        // Budget: 35 tokens. Five 1-text-block messages cost 50 tokens
546        // (5 * 10), so compaction must run. Under the `CharTokenizer`
547        // fallback the same content (under ~50 chars total) would fit
548        // comfortably under 35 — proving the budget is sourced from
549        // the supplied tokenizer.
550        let mut mgr = ContextManager::builder(35)
551            .tokenizer(Box::new(PerMessageTokenizer))
552            .preserve_n_last(2)
553            .build();
554        for i in 0..5 {
555            mgr.add_message(Message::user(format!("q{i}")));
556        }
557        assert_eq!(mgr.estimated_tokens(), 50);
558
559        let report = mgr
560            .compact_if_needed()
561            .await
562            .expect("compaction should succeed")
563            .expect("over-budget history must compact");
564        assert_eq!(report.before, 5);
565        assert!(report.after < report.before);
566        // After compaction the tail is still bound by `preserve_n_last`.
567        assert_eq!(report.after, 2);
568    }
569
570    #[test]
571    fn from_messages_restores_history_and_pin_mask() {
572        let messages = vec![
573            Message::user("first"),
574            Message::assistant_text("ack"),
575            Message::user("second"),
576        ];
577        let pinned = vec![true, false, true];
578        let mgr = ContextManager::from_messages(ContextManager::builder(10_000), messages, pinned)
579            .expect("equal lengths");
580        assert_eq!(mgr.messages().len(), 3);
581        assert!(mgr.is_pinned(0));
582        assert!(!mgr.is_pinned(1));
583        assert!(mgr.is_pinned(2));
584    }
585
586    #[test]
587    fn from_messages_rejects_length_mismatch() {
588        let result = ContextManager::from_messages(
589            ContextManager::builder(10_000),
590            vec![Message::user("solo")],
591            vec![],
592        );
593        match result {
594            Err(FromMessagesError::LengthMismatch {
595                messages: 1,
596                pinned: 0,
597            }) => {}
598            Ok(_) => panic!("length mismatch must error, not panic"),
599            Err(other) => panic!("unexpected error: {other:?}"),
600        }
601    }
602}