1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
//! Pluggable context transformation with compaction awareness.
//!
//! Replaces the bare `TransformContextFn` closure with a trait that supports
//! both transformation and compaction reporting.
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use crate::context::{CompactionReport, TokenCounter, compact_sliding_window_with};
use crate::types::AgentMessage;
pub trait ContextTransformer: Send + Sync {
/// Transform the context messages in-place.
///
/// Called synchronously before each LLM call. The `overflow` flag is true
/// when the previous turn exceeded the context window.
///
/// Returns `Some(CompactionReport)` if messages were dropped, `None` otherwise.
fn transform(
&self,
messages: &mut Vec<AgentMessage>,
overflow: bool,
) -> Option<CompactionReport>;
/// Downcast support so the turn pipeline can inject the cached-prefix
/// boundary into a built-in [`SlidingWindowTransformer`] each turn.
///
/// The default impl returns a sentinel that downcasts to no concrete type
/// the turn pipeline inspects, leaving custom transformers opaque and
/// unaffected by the cache pipeline. Custom transformers that wish to
/// participate in cache-protected compaction can override this to expose
/// their concrete type.
fn as_any(&self) -> &dyn std::any::Any {
static SENTINEL: NoDowncast = NoDowncast;
&SENTINEL
}
}
/// Sentinel type returned by the default [`ContextTransformer::as_any`] impl.
///
/// Downcasts against this type always fail to match the concrete transformers
/// the turn pipeline cares about, which is the intended behavior for types
/// that don't opt in to the downcast hook.
struct NoDowncast;
/// Blanket impl for existing closures (backward compat).
impl<F: Fn(&mut Vec<AgentMessage>, bool) + Send + Sync> ContextTransformer for F {
fn transform(
&self,
messages: &mut Vec<AgentMessage>,
overflow: bool,
) -> Option<CompactionReport> {
let before = messages.len();
self(messages, overflow);
let after = messages.len();
if after < before {
Some(CompactionReport {
dropped_count: before - after,
tokens_before: 0, // bare closures can't report token counts
tokens_after: 0,
overflow,
dropped_messages: Vec::new(), // bare closures don't have access to the dropped slice
})
} else {
None
}
}
}
/// Built-in sliding window context transformer with compaction reporting.
///
/// Wraps the same logic as [`sliding_window`](crate::sliding_window) but
/// captures compaction metrics for reporting.
///
/// Accepts an optional [`TokenCounter`] for pluggable token estimation.
/// When none is provided, the default `chars / 4` heuristic is used.
///
/// Supports runtime updates of `cached_prefix_len` via interior mutability so
/// the turn pipeline can propagate the cache boundary into each compaction pass
/// without needing `&mut` access to the transformer (which is shared behind
/// `Arc<dyn ContextTransformer>`).
pub struct SlidingWindowTransformer {
normal_budget: usize,
overflow_budget: usize,
anchor: usize,
token_counter: Option<Arc<dyn TokenCounter>>,
/// Builder-set cached prefix length. When caching is active, protects
/// this many leading messages from compaction unless `published_prefix`
/// overrides it.
cached_prefix_len: usize,
/// Runtime-published cached prefix length, set by the turn pipeline
/// through interior mutability. Zero means "no runtime publish yet";
/// otherwise it takes precedence over `cached_prefix_len`.
published_prefix: AtomicUsize,
}
impl SlidingWindowTransformer {
/// Create a new sliding window transformer.
///
/// # Arguments
///
/// * `normal_budget` - Token budget under normal operation.
/// * `overflow_budget` - Smaller token budget used when overflow is signaled.
/// * `anchor` - Number of messages at the start to always preserve.
#[must_use]
pub const fn new(normal_budget: usize, overflow_budget: usize, anchor: usize) -> Self {
Self {
normal_budget,
overflow_budget,
anchor,
token_counter: None,
cached_prefix_len: 0,
published_prefix: AtomicUsize::new(0),
}
}
#[must_use]
pub fn with_token_counter(mut self, counter: Arc<dyn TokenCounter>) -> Self {
self.token_counter = Some(counter);
self
}
/// Set the cached prefix length to protect from compaction.
///
/// When caching is active, the effective anchor is `max(anchor, cached_prefix_len)`.
#[must_use]
pub const fn with_cached_prefix_len(mut self, len: usize) -> Self {
self.cached_prefix_len = len;
self
}
/// Set the cached prefix length on a uniquely-owned transformer.
///
/// Use [`Self::publish_cached_prefix`] when the transformer is shared
/// behind an `Arc` at runtime.
pub const fn set_cached_prefix_len(&mut self, len: usize) {
self.cached_prefix_len = len;
}
/// Publish a new cached prefix length through interior mutability.
///
/// Used by the turn pipeline to update the boundary before each
/// compaction pass without taking `&mut` on the shared `Arc<dyn ...>`.
/// A non-zero value takes precedence over the builder-set field.
pub fn publish_cached_prefix(&self, len: usize) {
self.published_prefix.store(len, Ordering::Relaxed);
}
/// Read the current effective cached prefix length — the runtime
/// publish if set, otherwise the builder-set value.
#[must_use]
pub fn cached_prefix_len(&self) -> usize {
let published = self.published_prefix.load(Ordering::Relaxed);
if published > 0 {
published
} else {
self.cached_prefix_len
}
}
}
impl ContextTransformer for SlidingWindowTransformer {
fn transform(
&self,
messages: &mut Vec<AgentMessage>,
overflow: bool,
) -> Option<CompactionReport> {
let budget = if overflow {
self.overflow_budget
} else {
self.normal_budget
};
let cached_prefix = self.cached_prefix_len();
let effective_anchor = self.anchor.max(cached_prefix);
let counter_ref = self.token_counter.as_deref();
let mut report =
compact_sliding_window_with(messages, budget, effective_anchor, counter_ref)?;
report.overflow = overflow;
Some(report)
}
fn as_any(&self) -> &dyn std::any::Any {
self
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::{ContentBlock, LlmMessage, UserMessage};
fn text_message(text: &str) -> AgentMessage {
AgentMessage::Llm(LlmMessage::User(UserMessage {
content: vec![ContentBlock::Text {
text: text.to_owned(),
}],
timestamp: 0,
cache_hint: None,
}))
}
#[test]
fn sliding_window_transformer_reports_dropped_messages() {
let transformer = SlidingWindowTransformer::new(250, 100, 1);
// Each message: 400 chars / 4 = 100 tokens
let body = "x".repeat(400);
let mut messages = vec![
text_message(&body),
text_message(&body),
text_message(&body),
text_message(&body),
];
let report = transformer.transform(&mut messages, false);
assert!(report.is_some(), "should report compaction");
let report = report.unwrap();
assert_eq!(report.dropped_count, 2);
assert_eq!(report.tokens_before, 400);
assert!(report.tokens_after < report.tokens_before);
assert!(!report.overflow);
assert_eq!(messages.len(), 2);
}
#[test]
fn sliding_window_transformer_no_report_under_budget() {
let transformer = SlidingWindowTransformer::new(10_000, 5_000, 1);
let mut messages = vec![text_message("hello"), text_message("world")];
let report = transformer.transform(&mut messages, false);
assert!(report.is_none(), "should not report when under budget");
assert_eq!(messages.len(), 2);
}
#[test]
fn closure_blanket_impl_works() {
let closure = |msgs: &mut Vec<AgentMessage>, _overflow: bool| {
if msgs.len() > 2 {
msgs.truncate(2);
}
};
let mut messages = vec![
text_message("a"),
text_message("b"),
text_message("c"),
text_message("d"),
];
let report = closure.transform(&mut messages, false);
assert!(report.is_some());
let report = report.unwrap();
assert_eq!(report.dropped_count, 2);
// Bare closures can't report token counts
assert_eq!(report.tokens_before, 0);
assert_eq!(report.tokens_after, 0);
assert_eq!(messages.len(), 2);
}
#[test]
fn overflow_uses_smaller_budget() {
let transformer = SlidingWindowTransformer::new(1000, 150, 1);
let body = "x".repeat(400);
let mut messages = vec![
text_message(&body),
text_message(&body),
text_message(&body),
text_message(&body),
];
// Under normal budget (1000), total is 400 tokens -- no trim.
let report = transformer.transform(&mut messages, false);
assert!(report.is_none());
assert_eq!(messages.len(), 4);
// Under overflow budget (150), should trim.
let report = transformer.transform(&mut messages, true);
assert!(report.is_some());
let report = report.unwrap();
assert!(report.overflow);
assert!(messages.len() < 4);
}
#[test]
fn sliding_window_transformer_with_custom_counter() {
use crate::context::TokenCounter;
/// Counts every character as one token (4x the default heuristic).
struct CharCounter;
impl TokenCounter for CharCounter {
fn count_tokens(&self, message: &AgentMessage) -> usize {
match message {
AgentMessage::Llm(llm) => {
let blocks = match llm {
LlmMessage::User(m) => &m.content,
_ => return 0,
};
blocks
.iter()
.map(|b| match b {
ContentBlock::Text { text } => text.len(),
_ => 0,
})
.sum()
}
AgentMessage::Custom(_) => 50,
}
}
}
// Each message: 400 chars.
// Default counter: 400/4 = 100 tokens each.
// CharCounter: 400 tokens each.
let body = "x".repeat(400);
// With default counter, 4 * 100 = 400 tokens. Budget 500 => no trim.
let default_transformer = SlidingWindowTransformer::new(500, 250, 1);
let mut messages = vec![
text_message(&body),
text_message(&body),
text_message(&body),
text_message(&body),
];
let report = default_transformer.transform(&mut messages, false);
assert!(
report.is_none(),
"default counter should not trim at budget 500"
);
assert_eq!(messages.len(), 4);
// With CharCounter, 4 * 400 = 1600 tokens. Budget 500 => trims.
let custom_transformer =
SlidingWindowTransformer::new(500, 250, 1).with_token_counter(Arc::new(CharCounter));
let mut messages = vec![
text_message(&body),
text_message(&body),
text_message(&body),
text_message(&body),
];
let report = custom_transformer.transform(&mut messages, false);
assert!(report.is_some(), "char counter should trim at budget 500");
assert!(messages.len() < 4);
}
}