brainos_cortex/compaction.rs
1//! Conversation-history compaction planning.
2//!
3//! When a chat thread grows past its slice of the token budget, the assembler
4//! would otherwise *drop* the oldest turns outright. Compaction instead keeps
5//! the recent turns verbatim and folds the overflow prefix into a short
6//! summary, so older context survives in compressed form rather than
7//! vanishing.
8//!
9//! This module is the **pure planning half**: it decides which turns fit and
10//! which overflow, with no LLM and no I/O, so the policy is fully unit-tested.
11//! The signal pipeline owns the LLM-backed summarization + caching and feeds
12//! the kept/overflow split from here.
13
14use crate::context::estimate_tokens;
15use crate::llm::Message;
16
17/// How `history` should be split to fit a token budget.
18#[derive(Debug, Clone)]
19pub struct HistoryPlan<'a> {
20 /// Oldest turns that don't fit — to be compacted into a single summary
21 /// note by the caller. Empty when the whole history already fits.
22 pub to_summarize: &'a [Message],
23 /// The most recent turns that fit verbatim, in chronological order.
24 pub keep_recent: &'a [Message],
25}
26
27impl HistoryPlan<'_> {
28 /// True when nothing overflows — the caller can use the history as-is and
29 /// skip the (LLM-costing) summary entirely.
30 pub fn is_noop(&self) -> bool {
31 self.to_summarize.is_empty()
32 }
33}
34
35/// Plan history compaction against a `budget` (in tokens), holding back
36/// `reserve` tokens for the summary note the caller will prepend so the final
37/// `summary + keep_recent` still fits.
38///
39/// Walks newest→oldest, keeping turns while they fit within `budget - reserve`;
40/// the oldest prefix that doesn't fit becomes `to_summarize`. When everything
41/// fits, `to_summarize` is empty (a no-op). A single turn larger than the
42/// effective budget falls into `to_summarize` rather than being kept, so the
43/// caller never emits a verbatim block that blows the budget.
44pub fn plan_history_compaction<'a>(
45 history: &'a [Message],
46 budget: usize,
47 reserve: usize,
48) -> HistoryPlan<'a> {
49 let effective = budget.saturating_sub(reserve);
50 let mut used = 0usize;
51 // Index where the kept (recent) turns begin; defaults to len() == keep none.
52 let mut keep_from = history.len();
53
54 for (i, msg) in history.iter().enumerate().rev() {
55 let tokens = estimate_tokens(&msg.content);
56 if used + tokens > effective {
57 break;
58 }
59 used += tokens;
60 keep_from = i;
61 }
62
63 HistoryPlan {
64 to_summarize: &history[..keep_from],
65 keep_recent: &history[keep_from..],
66 }
67}
68
69#[cfg(test)]
70mod tests {
71 use super::*;
72 use crate::llm::Message;
73
74 // ~30 chars ≈ 10 tokens at 3 chars/token.
75 fn turn(n: usize) -> Message {
76 Message::user(format!("turn {n}: {}", "x".repeat(27)))
77 }
78
79 #[test]
80 fn nothing_overflows_is_a_noop() {
81 let history: Vec<Message> = (0..3).map(turn).collect();
82 let plan = plan_history_compaction(&history, 1000, 100);
83 assert!(plan.is_noop());
84 assert_eq!(plan.keep_recent.len(), 3);
85 assert!(plan.to_summarize.is_empty());
86 }
87
88 #[test]
89 fn oldest_turns_overflow_into_summary_keeping_recent() {
90 let history: Vec<Message> = (0..10).map(turn).collect();
91 // Each turn ~11 tokens; budget 40, reserve 5 → effective 35 → ~3 turns fit.
92 let plan = plan_history_compaction(&history, 40, 5);
93 assert!(!plan.is_noop());
94 // Kept turns are the most recent, in order, and contiguous with overflow.
95 assert_eq!(
96 plan.to_summarize.len() + plan.keep_recent.len(),
97 history.len()
98 );
99 assert!(plan.keep_recent.len() < history.len());
100 // The newest turn is always kept.
101 assert_eq!(
102 plan.keep_recent.last().unwrap().content,
103 history.last().unwrap().content
104 );
105 // The split is chronological: overflow is the oldest prefix.
106 assert_eq!(
107 plan.to_summarize.first().unwrap().content,
108 history[0].content
109 );
110 }
111
112 #[test]
113 fn single_oversized_turn_goes_to_summary_not_kept_verbatim() {
114 let big = Message::user("y".repeat(10_000));
115 let history = vec![big];
116 let plan = plan_history_compaction(&history, 40, 5);
117 assert_eq!(plan.keep_recent.len(), 0);
118 assert_eq!(plan.to_summarize.len(), 1);
119 }
120
121 #[test]
122 fn empty_history_is_a_noop() {
123 let plan = plan_history_compaction(&[], 100, 10);
124 assert!(plan.is_noop());
125 assert!(plan.keep_recent.is_empty());
126 }
127
128 // ── Property tests ────────────────────────────────────────────────
129 //
130 // Compaction must never lose, duplicate, or reorder a turn (the summary
131 // path depends on the split being a faithful partition), must keep the
132 // verbatim block within budget, and must keep as much recent context as
133 // fits. These assert all four for arbitrary histories and budgets.
134
135 use proptest::prelude::*;
136
137 fn histories() -> impl Strategy<Value = Vec<Message>> {
138 // Content length drives token cost; vary it widely so cases span
139 // "everything fits" through "even one turn overflows".
140 proptest::collection::vec("[a-z ]{0,120}".prop_map(Message::user), 0..20)
141 }
142
143 fn kept_tokens(plan: &HistoryPlan<'_>) -> usize {
144 plan.keep_recent
145 .iter()
146 .map(|m| estimate_tokens(&m.content))
147 .sum()
148 }
149
150 proptest! {
151 #![proptest_config(ProptestConfig { cases: 512, .. ProptestConfig::default() })]
152
153 /// The split is a faithful partition: concatenating `to_summarize`
154 /// then `keep_recent` reproduces the original history exactly — same
155 /// turns, same chronological order, none lost or duplicated.
156 #[test]
157 fn split_is_a_faithful_partition(
158 history in histories(),
159 budget in 0usize..500,
160 reserve in 0usize..200,
161 ) {
162 let plan = plan_history_compaction(&history, budget, reserve);
163 prop_assert_eq!(
164 plan.to_summarize.len() + plan.keep_recent.len(),
165 history.len()
166 );
167 let rejoined = plan
168 .to_summarize
169 .iter()
170 .chain(plan.keep_recent.iter())
171 .map(|m| &m.content);
172 prop_assert!(rejoined.eq(history.iter().map(|m| &m.content)));
173 }
174
175 /// The verbatim block the caller emits never exceeds the effective
176 /// budget (`budget - reserve`) — the whole point of holding turns
177 /// back into the summary.
178 #[test]
179 fn kept_turns_fit_the_effective_budget(
180 history in histories(),
181 budget in 0usize..500,
182 reserve in 0usize..200,
183 ) {
184 let plan = plan_history_compaction(&history, budget, reserve);
185 prop_assert!(kept_tokens(&plan) <= budget.saturating_sub(reserve));
186 }
187
188 /// Greedy maximality: whenever something overflows, the newest
189 /// overflowed turn genuinely didn't fit — adding it to the kept set
190 /// would have exceeded the effective budget. So no turn that could
191 /// have been kept verbatim was needlessly summarized.
192 #[test]
193 fn keeps_as_much_recent_as_fits(
194 history in histories(),
195 budget in 0usize..500,
196 reserve in 0usize..200,
197 ) {
198 let plan = plan_history_compaction(&history, budget, reserve);
199 if let Some(newest_overflow) = plan.to_summarize.last() {
200 let effective = budget.saturating_sub(reserve);
201 let would_be = kept_tokens(&plan) + estimate_tokens(&newest_overflow.content);
202 prop_assert!(
203 would_be > effective,
204 "a turn that fit was summarized: {would_be} <= {effective}"
205 );
206 }
207 }
208
209 /// No-op exactly when the whole history fits: `is_noop()` holds iff
210 /// the summed per-turn token cost is within the effective budget.
211 #[test]
212 fn noop_iff_everything_fits(
213 history in histories(),
214 budget in 0usize..500,
215 reserve in 0usize..200,
216 ) {
217 let plan = plan_history_compaction(&history, budget, reserve);
218 let total: usize = history.iter().map(|m| estimate_tokens(&m.content)).sum();
219 prop_assert_eq!(plan.is_noop(), total <= budget.saturating_sub(reserve));
220 }
221 }
222}