ailoop-context 1.0.0-rc.2

Conversation history management and compaction for ailoop
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
//! [`ContextManager`] + [`ContextManagerBuilder`] + the report type
//! [`compact_if_needed`](ContextManager::compact_if_needed) returns.

use ailoop_core::{AssistantBlock, CharTokenizer, Message, Tokenizer, UserBlock};

use crate::{
    compaction::{CompactionStrategy, TruncateStrategy},
    errors::{CompactionError, FromMessagesError},
};

/// Reports what [`ContextManager::compact_if_needed`] did when it ran.
/// Returned wrapped in `Option`: `None` means compaction was not needed
/// (history fits within `max_tokens`).
#[derive(Debug, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub struct CompactionReport {
    /// Message count before compaction ran.
    pub before: usize,
    /// Message count after compaction ran. `after < before` whenever
    /// the strategy actually dropped or replaced messages.
    pub after: usize,
    /// Stable, machine-readable strategy name from
    /// [`CompactionStrategy::name`] (e.g. `"truncate"`,
    /// `"summarize"`). Used as the `strategy` field of
    /// [`StreamChunk::HistoryCompacted`].
    ///
    /// [`CompactionStrategy::name`]: crate::CompactionStrategy::name
    /// [`StreamChunk::HistoryCompacted`]: ailoop_core::StreamChunk::HistoryCompacted
    pub strategy: &'static str,
}

/// Owns the message vector backing a single conversation, plus the
/// budget and pin mask that govern how compaction reduces it.
///
/// Lifecycle: append turns with [`add_message`](Self::add_message) /
/// [`extend`](Self::extend), inspect with [`messages`](Self::messages),
/// pin survivors with [`pin_last`](Self::pin_last) /
/// [`pin_at`](Self::pin_at) / [`pin_with_tool_result`](Self::pin_with_tool_result),
/// and call [`compact_if_needed`](Self::compact_if_needed) before each
/// outgoing [`ChatRequest`] to hold the history under the configured
/// budget. Restore from a [`ConversationSnapshot`] via
/// [`from_messages`](Self::from_messages).
///
/// The façade [`Conversation`](https://docs.rs/ailoop) wires one of
/// these automatically — touch this type directly only when driving
/// [`advanced::run_chat`](https://docs.rs/ailoop) or building tests.
///
/// [`ChatRequest`]: ailoop_core::ChatRequest
/// [`ConversationSnapshot`]: crate::ConversationSnapshot
pub struct ContextManager {
    messages: Vec<Message>,
    /// Parallel to `messages`: `pinned[i] == true` marks `messages[i]`
    /// as "must survive compaction". The two vectors are kept the same
    /// length by every internal mutation; new messages default to
    /// `false` and only the explicit pin API flips the flag.
    pinned: Vec<bool>,
    max_tokens: usize,
    preserve_n_last: usize,
    strategy: Box<dyn CompactionStrategy>,
    /// Tokenizer used to size [`Self::messages`] against
    /// [`Self::max_tokens`] in [`Self::compact_if_needed`]. Defaults
    /// to [`CharTokenizer`] (`len() / 4`) when the builder is not
    /// given one — a coarse fallback fine for tests and bring-up but
    /// explicitly not recommended for production budgeting; wire up a
    /// provider-specific [`Tokenizer`] (e.g.
    /// `ailoop_anthropic::OnlineCalibratedTokenizer`) when correctness
    /// matters.
    tokenizer: Box<dyn Tokenizer>,
}

impl ContextManager {
    /// Begin configuring a new manager with `max_tokens` as the
    /// budget [`compact_if_needed`](Self::compact_if_needed) will
    /// hold the history under. Defaults: [`TruncateStrategy`] for
    /// reduction, [`CharTokenizer`] (`len() / 4`) for sizing, four
    /// preserved tail messages.
    ///
    /// [`TruncateStrategy`]: crate::TruncateStrategy
    pub fn builder(max_tokens: usize) -> ContextManagerBuilder {
        ContextManagerBuilder::new(max_tokens)
    }

    /// Restore a `ContextManager` whose history is `messages` and whose
    /// pin mask is `pinned`. The two vectors must have the same length;
    /// otherwise a [`FromMessagesError::LengthMismatch`] is returned.
    /// All other configuration (budget, strategy, tokenizer, preserved
    /// tail size) comes from `builder` — pass the same configuration
    /// the original conversation used so compaction behaves
    /// consistently across resumes.
    pub fn from_messages(
        builder: ContextManagerBuilder,
        messages: Vec<Message>,
        pinned: Vec<bool>,
    ) -> Result<Self, FromMessagesError> {
        if messages.len() != pinned.len() {
            return Err(FromMessagesError::LengthMismatch {
                messages: messages.len(),
                pinned: pinned.len(),
            });
        }
        let mut cm = builder.build();
        cm.messages = messages;
        cm.pinned = pinned;
        Ok(cm)
    }
}

impl ContextManager {
    /// Append `message` to the history with `pinned = false`. Use
    /// [`pin_last`](Self::pin_last) immediately after the call to mark
    /// it as a survivor.
    pub fn add_message(&mut self, message: Message) {
        self.messages.push(message);
        self.pinned.push(false);
    }

    /// Approximate token cost of the entire history under the
    /// configured [`Tokenizer`]. The accuracy of this number is only
    /// as good as the tokenizer wired into the builder — under the
    /// default [`CharTokenizer`] it is a `len() / 4` ballpark.
    pub fn estimated_tokens(&self) -> usize {
        self.tokenizer.count_messages(&self.messages)
    }

    /// Borrow the current history slice. Indices into this slice align
    /// with [`pinned`](Self::pinned).
    pub fn messages(&self) -> &[Message] {
        &self.messages
    }

    /// Append every message in `new_messages`, all with `pinned =
    /// false`. The engine uses this after a turn to fold the
    /// assistant reply (and any tool results) back into the history.
    pub fn extend(&mut self, new_messages: Vec<Message>) {
        let added = new_messages.len();
        self.messages.extend(new_messages);
        self.pinned.extend(std::iter::repeat_n(false, added));
    }

    /// Pinned-state slice, parallel to [`Self::messages`]. Useful for
    /// tests; production callers normally interact via the `pin_*`
    /// helpers and inspect [`Self::is_pinned`].
    pub fn pinned(&self) -> &[bool] {
        &self.pinned
    }

    /// Whether `messages[idx]` is currently pinned. Out-of-bounds
    /// indices return `false` rather than panicking.
    pub fn is_pinned(&self, idx: usize) -> bool {
        self.pinned.get(idx).copied().unwrap_or(false)
    }

    /// Pin the most recently added message so it survives every future
    /// compaction. No-op when the history is empty.
    ///
    /// Indices are not stable across compactions: a `pin_last()` made
    /// before compaction stays pinned (the strategy keeps it), but its
    /// numeric index in the new history may shift. Re-derive indices
    /// (or rely on `pin_last`) after compaction runs.
    pub fn pin_last(&mut self) {
        if let Some(last) = self.pinned.last_mut() {
            *last = true;
        }
    }

    /// Pin the message at `idx`. Panics on out-of-bounds, matching the
    /// convention of `Vec::index`.
    pub fn pin_at(&mut self, idx: usize) {
        assert!(
            idx < self.messages.len(),
            "pin_at: index {idx} out of bounds"
        );
        self.pinned[idx] = true;
    }

    /// Clear the pin on the message at `idx`. Panics on out-of-bounds.
    pub fn unpin_at(&mut self, idx: usize) {
        assert!(
            idx < self.messages.len(),
            "unpin_at: index {idx} out of bounds"
        );
        self.pinned[idx] = false;
    }

    /// Pin `idx` together with every message that pairs with it via a
    /// `tool_call` ↔ `tool_result` link. Without this, pinning a lone
    /// `Assistant` `ToolCall` (or a lone `User` `ToolResult`) and then
    /// letting compaction run would strand the partner — most providers
    /// reject that as a malformed history.
    ///
    /// Resolution rules:
    /// - If `messages[idx]` is an `Assistant` with `ToolCall` blocks,
    ///   any `User` message containing a `ToolResult` whose `call_id`
    ///   matches one of those calls is also pinned.
    /// - If `messages[idx]` is a `User` with `ToolResult` blocks, any
    ///   `Assistant` message containing a `ToolCall` whose `id` matches
    ///   is also pinned.
    /// - Messages without tool blocks are pinned alone (effectively a
    ///   `pin_at`).
    ///
    /// Panics on out-of-bounds. The lookup scans the full history but
    /// each message is examined at most once.
    pub fn pin_with_tool_result(&mut self, idx: usize) {
        assert!(
            idx < self.messages.len(),
            "pin_with_tool_result: index {idx} out of bounds"
        );

        self.pinned[idx] = true;

        let target_ids: Vec<String> = match &self.messages[idx] {
            Message::Assistant { blocks } => blocks
                .iter()
                .filter_map(|b| match b {
                    AssistantBlock::ToolCall { id, .. } => Some(id.clone()),
                    _ => None,
                })
                .collect(),
            Message::User { blocks } => blocks
                .iter()
                .filter_map(|b| match b {
                    UserBlock::ToolResult { call_id, .. } => Some(call_id.clone()),
                    _ => None,
                })
                .collect(),
            _ => Vec::new(),
        };

        if target_ids.is_empty() {
            return;
        }

        let is_assistant_target = matches!(self.messages[idx], Message::Assistant { .. });

        for (i, msg) in self.messages.iter().enumerate() {
            if i == idx || self.pinned[i] {
                continue;
            }
            let matches = match (is_assistant_target, msg) {
                (true, Message::User { blocks }) => blocks.iter().any(|b| matches!(b,
                    UserBlock::ToolResult { call_id, .. } if target_ids.iter().any(|t| t == call_id))),
                (false, Message::Assistant { blocks }) => blocks.iter().any(|b| matches!(b,
                    AssistantBlock::ToolCall { id, .. } if target_ids.iter().any(|t| t == id))),
                _ => false,
            };
            if matches {
                self.pinned[i] = true;
            }
        }
    }

    /// Run the configured [`CompactionStrategy`] when
    /// [`estimated_tokens`](Self::estimated_tokens) reaches the
    /// budget; otherwise return `Ok(None)` and leave the history
    /// untouched.
    ///
    /// On success returns `Ok(Some(report))` describing the
    /// before/after counts and the strategy name. The engine emits
    /// [`StreamChunk::HistoryCompacted`] carrying the same fields so
    /// observability middlewares can correlate the compaction with the
    /// run that triggered it.
    ///
    /// Errors propagate from the strategy (commonly
    /// [`CompactionError::SummarizationFailed`] when the summarizer
    /// model itself fails). [`CompactionError::NotEnoughHistory`]
    /// surfaces when the history has fewer messages than
    /// `preserve_n_last` and there is nothing to drop.
    ///
    /// [`CompactionStrategy`]: crate::CompactionStrategy
    /// [`StreamChunk::HistoryCompacted`]: ailoop_core::StreamChunk::HistoryCompacted
    pub async fn compact_if_needed(&mut self) -> Result<Option<CompactionReport>, CompactionError> {
        if self.estimated_tokens() < self.max_tokens {
            return Ok(None);
        }

        let before = self.messages.len();
        let output = self
            .strategy
            .compact(&self.messages, &self.pinned, self.preserve_n_last)
            .await?;
        debug_assert_eq!(
            output.messages.len(),
            output.pinned.len(),
            "strategy must return a pinned mask matching the message vector",
        );
        let after = output.messages.len();
        let strategy = self.strategy.name();
        self.messages = output.messages;
        self.pinned = output.pinned;
        Ok(Some(CompactionReport {
            before,
            after,
            strategy,
        }))
    }
}

/// Configuration for a [`ContextManager`].
///
/// Construct via [`ContextManager::builder`]. Setters return
/// `Self` so calls chain; [`build`](Self::build) is infallible.
pub struct ContextManagerBuilder {
    max_tokens: usize,
    preserve_n_last: usize,
    tokenizer: Box<dyn Tokenizer>,
    strategy: Box<dyn CompactionStrategy>,
}

impl ContextManagerBuilder {
    fn new(max_tokens: usize) -> Self {
        Self {
            max_tokens,
            preserve_n_last: 4,
            // Fallback default — see the doc on `ContextManager::tokenizer`.
            // Production code should override via `Self::tokenizer`.
            tokenizer: Box::new(CharTokenizer),
            strategy: Box::new(TruncateStrategy),
        }
    }
}

impl ContextManagerBuilder {
    /// Number of trailing messages the strategy must preserve verbatim
    /// (after walking back to a safe `User`-without-`ToolResult`
    /// boundary). Default: 4. Lowering it lets compaction reclaim more
    /// budget at the cost of dropping more recent context; raising it
    /// keeps recent turns at the cost of compacting sooner.
    pub fn preserve_n_last(mut self, n: usize) -> Self {
        self.preserve_n_last = n;
        self
    }

    /// Wire a [`Tokenizer`] into the manager. Replaces the default
    /// [`CharTokenizer`] fallback so [`ContextManager::compact_if_needed`]
    /// measures the budget in real tokens rather than `len() / 4`.
    pub fn tokenizer(self, tokenizer: Box<dyn Tokenizer>) -> ContextManagerBuilder {
        ContextManagerBuilder {
            max_tokens: self.max_tokens,
            preserve_n_last: self.preserve_n_last,
            tokenizer,
            strategy: self.strategy,
        }
    }

    /// Wire a [`CompactionStrategy`] into the manager. Replaces the
    /// default [`TruncateStrategy`]. Use
    /// `Box::new(SummarizeStrategy::new(model))` to compress dropped
    /// history into a model-generated summary instead of losing it.
    ///
    /// [`CompactionStrategy`]: crate::CompactionStrategy
    /// [`TruncateStrategy`]: crate::TruncateStrategy
    pub fn strategy(self, strategy: Box<dyn CompactionStrategy>) -> ContextManagerBuilder {
        ContextManagerBuilder {
            max_tokens: self.max_tokens,
            preserve_n_last: self.preserve_n_last,
            tokenizer: self.tokenizer,
            strategy,
        }
    }

    /// Finalize the configuration and build the [`ContextManager`].
    /// Infallible — every error case is caught by the typed setters.
    pub fn build(self) -> ContextManager {
        ContextManager {
            messages: Vec::new(),
            pinned: Vec::new(),
            max_tokens: self.max_tokens,
            preserve_n_last: self.preserve_n_last,
            strategy: self.strategy,
            tokenizer: self.tokenizer,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use ailoop_core::{AssistantBlock, ToolResultContent, UserBlock};
    use serde_json::json;

    fn tool_call_msg(id: &str) -> Message {
        Message::Assistant {
            blocks: vec![AssistantBlock::tool_call(id, "t", json!({}))],
        }
    }

    fn tool_result_msg(call_id: &str) -> Message {
        Message::User {
            blocks: vec![UserBlock::tool_result(
                call_id,
                ToolResultContent::text("ok"),
            )],
        }
    }

    #[tokio::test]
    async fn compact_if_needed_returns_none_when_under_budget() {
        let mut mgr = ContextManager::builder(10_000).build();
        mgr.add_message(Message::user("hi"));
        mgr.add_message(Message::assistant_text("hello"));

        let report = mgr
            .compact_if_needed()
            .await
            .expect("compaction should succeed");
        assert!(report.is_none(), "no compaction expected when under budget");
    }

    #[tokio::test]
    async fn compact_if_needed_returns_report_when_over_budget() {
        // CharTokenizer fallback is len()/4. Use a tiny budget so a
        // couple of small messages already trip the limit.
        let mut mgr = ContextManager::builder(10).preserve_n_last(2).build();
        mgr.add_message(Message::user("first turn"));
        mgr.add_message(Message::assistant_text("first reply"));
        mgr.add_message(Message::user("second turn"));
        mgr.add_message(Message::assistant_text("second reply"));
        mgr.add_message(Message::user("third turn"));

        let report = mgr
            .compact_if_needed()
            .await
            .expect("compaction should succeed")
            .expect("expected compaction to run");

        assert_eq!(report.strategy, "truncate");
        assert!(
            report.after < report.before,
            "compaction must drop messages"
        );
    }

    #[tokio::test]
    async fn pin_last_survives_compaction() {
        let mut mgr = ContextManager::builder(10).preserve_n_last(2).build();
        mgr.add_message(Message::user("pinned anchor"));
        mgr.pin_last();
        for i in 0..5 {
            mgr.add_message(Message::user(format!("turn {i} q")));
            mgr.add_message(Message::assistant_text(format!("turn {i} a")));
        }

        let report = mgr
            .compact_if_needed()
            .await
            .expect("compaction should succeed")
            .expect("expected compaction to run");
        assert!(report.after < report.before);

        // The pinned anchor must still be the first message and still pinned.
        let first = mgr.messages().first().expect("history should be non-empty");
        match first {
            Message::User { blocks } => match &blocks[0] {
                UserBlock::Text { text, .. } => assert_eq!(text, "pinned anchor"),
                other => panic!("expected pinned text block, got {other:?}"),
            },
            other => panic!("expected pinned user message, got {other:?}"),
        }
        assert!(
            mgr.is_pinned(0),
            "pinned mask must be preserved across compaction"
        );
    }

    #[tokio::test]
    async fn pin_with_tool_result_keeps_pair_intact() {
        let mut mgr = ContextManager::builder(10).preserve_n_last(2).build();
        mgr.add_message(Message::user("task"));
        mgr.add_message(tool_call_msg("c1"));
        mgr.add_message(tool_result_msg("c1"));
        mgr.add_message(Message::assistant_text("result"));
        // Pin the tool_call (idx 1). The helper should also pin the
        // tool_result (idx 2) so the pair survives compaction together.
        mgr.pin_with_tool_result(1);
        assert!(mgr.is_pinned(1));
        assert!(mgr.is_pinned(2), "partner tool_result must be pinned too");

        // Add filler so we overflow the budget.
        for i in 0..6 {
            mgr.add_message(Message::user(format!("filler {i}")));
            mgr.add_message(Message::assistant_text(format!("ack {i}")));
        }

        mgr.compact_if_needed()
            .await
            .expect("compaction should succeed")
            .expect("expected compaction to run");

        // The pinned pair should still be present in the same relative order.
        let mut saw_call = false;
        let mut saw_result = false;
        for msg in mgr.messages() {
            match msg {
                Message::Assistant { blocks } => {
                    if blocks
                        .iter()
                        .any(|b| matches!(b, AssistantBlock::ToolCall { id, .. } if id == "c1"))
                    {
                        saw_call = true;
                    }
                }
                Message::User { blocks } => {
                    if blocks.iter().any(
                        |b| matches!(b, UserBlock::ToolResult { call_id, .. } if call_id == "c1"),
                    ) {
                        assert!(saw_call, "tool_result must follow its tool_call");
                        saw_result = true;
                    }
                }
                _ => {}
            }
        }
        assert!(
            saw_call && saw_result,
            "pinned pair must survive compaction"
        );
    }

    #[tokio::test]
    async fn pin_with_tool_result_on_result_pins_the_call() {
        let mut mgr = ContextManager::builder(10).preserve_n_last(1).build();
        mgr.add_message(tool_call_msg("c1"));
        mgr.add_message(tool_result_msg("c1"));
        mgr.add_message(Message::user("later"));

        mgr.pin_with_tool_result(1);
        assert!(mgr.is_pinned(0), "tool_call partner must be pinned");
        assert!(mgr.is_pinned(1));
    }

    /// `compact_if_needed` measures the budget in real tokens via the
    /// configured [`Tokenizer`], not in characters. A tokenizer that
    /// bills every message at a fixed cost lets us drive compaction by
    /// message count alone, independently of the underlying
    /// `text.len()`.
    #[tokio::test]
    async fn compact_uses_tokenizer_budget_not_character_count() {
        struct PerMessageTokenizer;
        impl Tokenizer for PerMessageTokenizer {
            fn count_text(&self, _text: &str) -> usize {
                10
            }
        }

        // Budget: 35 tokens. Five 1-text-block messages cost 50 tokens
        // (5 * 10), so compaction must run. Under the `CharTokenizer`
        // fallback the same content (under ~50 chars total) would fit
        // comfortably under 35 — proving the budget is sourced from
        // the supplied tokenizer.
        let mut mgr = ContextManager::builder(35)
            .tokenizer(Box::new(PerMessageTokenizer))
            .preserve_n_last(2)
            .build();
        for i in 0..5 {
            mgr.add_message(Message::user(format!("q{i}")));
        }
        assert_eq!(mgr.estimated_tokens(), 50);

        let report = mgr
            .compact_if_needed()
            .await
            .expect("compaction should succeed")
            .expect("over-budget history must compact");
        assert_eq!(report.before, 5);
        assert!(report.after < report.before);
        // After compaction the tail is still bound by `preserve_n_last`.
        assert_eq!(report.after, 2);
    }

    #[test]
    fn from_messages_restores_history_and_pin_mask() {
        let messages = vec![
            Message::user("first"),
            Message::assistant_text("ack"),
            Message::user("second"),
        ];
        let pinned = vec![true, false, true];
        let mgr = ContextManager::from_messages(ContextManager::builder(10_000), messages, pinned)
            .expect("equal lengths");
        assert_eq!(mgr.messages().len(), 3);
        assert!(mgr.is_pinned(0));
        assert!(!mgr.is_pinned(1));
        assert!(mgr.is_pinned(2));
    }

    #[test]
    fn from_messages_rejects_length_mismatch() {
        let result = ContextManager::from_messages(
            ContextManager::builder(10_000),
            vec![Message::user("solo")],
            vec![],
        );
        match result {
            Err(FromMessagesError::LengthMismatch {
                messages: 1,
                pinned: 0,
            }) => {}
            Ok(_) => panic!("length mismatch must error, not panic"),
            Err(other) => panic!("unexpected error: {other:?}"),
        }
    }
}