brainos_cortex/
compaction.rs

1//! Conversation-history compaction planning.
2//!
3//! When a chat thread grows past its slice of the token budget, the assembler
4//! would otherwise *drop* the oldest turns outright. Compaction instead keeps
5//! the recent turns verbatim and folds the overflow prefix into a short
6//! summary, so older context survives in compressed form rather than
7//! vanishing.
8//!
9//! This module is the **pure planning half**: it decides which turns fit and
10//! which overflow, with no LLM and no I/O, so the policy is fully unit-tested.
11//! The signal pipeline owns the LLM-backed summarization + caching and feeds
12//! the kept/overflow split from here.
13
14use crate::context::estimate_tokens;
15use crate::llm::Message;
16
17/// How `history` should be split to fit a token budget.
18#[derive(Debug, Clone)]
19pub struct HistoryPlan<'a> {
20    /// Oldest turns that don't fit — to be compacted into a single summary
21    /// note by the caller. Empty when the whole history already fits.
22    pub to_summarize: &'a [Message],
23    /// The most recent turns that fit verbatim, in chronological order.
24    pub keep_recent: &'a [Message],
25}
26
27impl HistoryPlan<'_> {
28    /// True when nothing overflows — the caller can use the history as-is and
29    /// skip the (LLM-costing) summary entirely.
30    pub fn is_noop(&self) -> bool {
31        self.to_summarize.is_empty()
32    }
33}
34
35/// Plan history compaction against a `budget` (in tokens), holding back
36/// `reserve` tokens for the summary note the caller will prepend so the final
37/// `summary + keep_recent` still fits.
38///
39/// Walks newest→oldest, keeping turns while they fit within `budget - reserve`;
40/// the oldest prefix that doesn't fit becomes `to_summarize`. When everything
41/// fits, `to_summarize` is empty (a no-op). A single turn larger than the
42/// effective budget falls into `to_summarize` rather than being kept, so the
43/// caller never emits a verbatim block that blows the budget.
44pub fn plan_history_compaction<'a>(
45    history: &'a [Message],
46    budget: usize,
47    reserve: usize,
48) -> HistoryPlan<'a> {
49    let effective = budget.saturating_sub(reserve);
50    let mut used = 0usize;
51    // Index where the kept (recent) turns begin; defaults to len() == keep none.
52    let mut keep_from = history.len();
53
54    for (i, msg) in history.iter().enumerate().rev() {
55        let tokens = estimate_tokens(&msg.content);
56        if used + tokens > effective {
57            break;
58        }
59        used += tokens;
60        keep_from = i;
61    }
62
63    HistoryPlan {
64        to_summarize: &history[..keep_from],
65        keep_recent: &history[keep_from..],
66    }
67}
68
69#[cfg(test)]
70mod tests {
71    use super::*;
72    use crate::llm::Message;
73
74    // ~30 chars ≈ 10 tokens at 3 chars/token.
75    fn turn(n: usize) -> Message {
76        Message::user(format!("turn {n}: {}", "x".repeat(27)))
77    }
78
79    #[test]
80    fn nothing_overflows_is_a_noop() {
81        let history: Vec<Message> = (0..3).map(turn).collect();
82        let plan = plan_history_compaction(&history, 1000, 100);
83        assert!(plan.is_noop());
84        assert_eq!(plan.keep_recent.len(), 3);
85        assert!(plan.to_summarize.is_empty());
86    }
87
88    #[test]
89    fn oldest_turns_overflow_into_summary_keeping_recent() {
90        let history: Vec<Message> = (0..10).map(turn).collect();
91        // Each turn ~11 tokens; budget 40, reserve 5 → effective 35 → ~3 turns fit.
92        let plan = plan_history_compaction(&history, 40, 5);
93        assert!(!plan.is_noop());
94        // Kept turns are the most recent, in order, and contiguous with overflow.
95        assert_eq!(
96            plan.to_summarize.len() + plan.keep_recent.len(),
97            history.len()
98        );
99        assert!(plan.keep_recent.len() < history.len());
100        // The newest turn is always kept.
101        assert_eq!(
102            plan.keep_recent.last().unwrap().content,
103            history.last().unwrap().content
104        );
105        // The split is chronological: overflow is the oldest prefix.
106        assert_eq!(
107            plan.to_summarize.first().unwrap().content,
108            history[0].content
109        );
110    }
111
112    #[test]
113    fn single_oversized_turn_goes_to_summary_not_kept_verbatim() {
114        let big = Message::user("y".repeat(10_000));
115        let history = vec![big];
116        let plan = plan_history_compaction(&history, 40, 5);
117        assert_eq!(plan.keep_recent.len(), 0);
118        assert_eq!(plan.to_summarize.len(), 1);
119    }
120
121    #[test]
122    fn empty_history_is_a_noop() {
123        let plan = plan_history_compaction(&[], 100, 10);
124        assert!(plan.is_noop());
125        assert!(plan.keep_recent.is_empty());
126    }
127
128    // ── Property tests ────────────────────────────────────────────────
129    //
130    // Compaction must never lose, duplicate, or reorder a turn (the summary
131    // path depends on the split being a faithful partition), must keep the
132    // verbatim block within budget, and must keep as much recent context as
133    // fits. These assert all four for arbitrary histories and budgets.
134
135    use proptest::prelude::*;
136
137    fn histories() -> impl Strategy<Value = Vec<Message>> {
138        // Content length drives token cost; vary it widely so cases span
139        // "everything fits" through "even one turn overflows".
140        proptest::collection::vec("[a-z ]{0,120}".prop_map(Message::user), 0..20)
141    }
142
143    fn kept_tokens(plan: &HistoryPlan<'_>) -> usize {
144        plan.keep_recent
145            .iter()
146            .map(|m| estimate_tokens(&m.content))
147            .sum()
148    }
149
150    proptest! {
151        #![proptest_config(ProptestConfig { cases: 512, .. ProptestConfig::default() })]
152
153        /// The split is a faithful partition: concatenating `to_summarize`
154        /// then `keep_recent` reproduces the original history exactly — same
155        /// turns, same chronological order, none lost or duplicated.
156        #[test]
157        fn split_is_a_faithful_partition(
158            history in histories(),
159            budget in 0usize..500,
160            reserve in 0usize..200,
161        ) {
162            let plan = plan_history_compaction(&history, budget, reserve);
163            prop_assert_eq!(
164                plan.to_summarize.len() + plan.keep_recent.len(),
165                history.len()
166            );
167            let rejoined = plan
168                .to_summarize
169                .iter()
170                .chain(plan.keep_recent.iter())
171                .map(|m| &m.content);
172            prop_assert!(rejoined.eq(history.iter().map(|m| &m.content)));
173        }
174
175        /// The verbatim block the caller emits never exceeds the effective
176        /// budget (`budget - reserve`) — the whole point of holding turns
177        /// back into the summary.
178        #[test]
179        fn kept_turns_fit_the_effective_budget(
180            history in histories(),
181            budget in 0usize..500,
182            reserve in 0usize..200,
183        ) {
184            let plan = plan_history_compaction(&history, budget, reserve);
185            prop_assert!(kept_tokens(&plan) <= budget.saturating_sub(reserve));
186        }
187
188        /// Greedy maximality: whenever something overflows, the newest
189        /// overflowed turn genuinely didn't fit — adding it to the kept set
190        /// would have exceeded the effective budget. So no turn that could
191        /// have been kept verbatim was needlessly summarized.
192        #[test]
193        fn keeps_as_much_recent_as_fits(
194            history in histories(),
195            budget in 0usize..500,
196            reserve in 0usize..200,
197        ) {
198            let plan = plan_history_compaction(&history, budget, reserve);
199            if let Some(newest_overflow) = plan.to_summarize.last() {
200                let effective = budget.saturating_sub(reserve);
201                let would_be = kept_tokens(&plan) + estimate_tokens(&newest_overflow.content);
202                prop_assert!(
203                    would_be > effective,
204                    "a turn that fit was summarized: {would_be} <= {effective}"
205                );
206            }
207        }
208
209        /// No-op exactly when the whole history fits: `is_noop()` holds iff
210        /// the summed per-turn token cost is within the effective budget.
211        #[test]
212        fn noop_iff_everything_fits(
213            history in histories(),
214            budget in 0usize..500,
215            reserve in 0usize..200,
216        ) {
217            let plan = plan_history_compaction(&history, budget, reserve);
218            let total: usize = history.iter().map(|m| estimate_tokens(&m.content)).sum();
219            prop_assert_eq!(plan.is_noop(), total <= budget.saturating_sub(reserve));
220        }
221    }
222}
brainos_cortex/compaction.rs

brainos_cortex/
compaction.rs