Skip to main content

roboticus_agent/
compaction.rs

1//! Context compaction — minimize the token footprint of all injected context.
2//!
3//! Every piece of context (memories, recent activity, checkpoints, hippocampus
4//! summaries, topic summaries) flows through this module before entering the
5//! token budget. The compactor:
6//!
7//! 1. **Deduplicates** — merges overlapping entries (same fact from different
8//!    tiers, or the same memory appearing in both ambient and similarity results)
9//! 2. **Compresses** — strips verbose formatting, collapses multi-line entries
10//!    into single-line bullets, removes low-information-density content
11//! 3. **Prioritizes** — ranks entries by `(recency × relevance × importance)`
12//!    and drops the lowest-value entries first when the budget is exceeded
13//! 4. **Enforces ceiling** — hard token cap ensures memory never starves system
14//!    prompt or conversation history
15//!
16//! ## Architecture position
17//!
18//! ```text
19//! Retrieval → Compaction → Context Assembly → Inference
20//!     ↑            ↑              ↑
21//!  raw memories   compressed    token-budgeted
22//!  (verbose)      (minimal)     (final context)
23//! ```
24
25use crate::context::estimate_tokens;
26
27/// A single piece of context to be compacted.
28#[derive(Debug, Clone)]
29pub struct ContextEntry {
30    /// Source tier: "working", "episodic", "semantic", "procedural",
31    /// "relationship", "ambient", "checkpoint", "hippocampus", "topic_summary"
32    pub source: String,
33    /// The raw content text.
34    pub content: String,
35    /// Importance score (higher = more important to retain). 0-10 scale.
36    pub importance: f32,
37    /// Recency: seconds since creation. Lower = more recent = higher priority.
38    pub age_seconds: u64,
39    /// Relevance: cosine similarity to the current query (0.0-1.0).
40    pub relevance: f32,
41}
42
43/// Compacted output ready for context assembly.
44#[derive(Debug, Clone)]
45pub struct CompactedContext {
46    /// The compacted memory text block to inject.
47    pub text: String,
48    /// Number of entries retained after compaction.
49    pub entries_retained: usize,
50    /// Number of entries dropped during compaction.
51    pub entries_dropped: usize,
52    /// Estimated token count of the compacted output.
53    pub tokens: usize,
54}
55
56/// Configuration for the compaction pass.
57#[derive(Debug, Clone)]
58pub struct CompactionConfig {
59    /// Hard ceiling in tokens for the total compacted output.
60    pub max_tokens: usize,
61    /// Maximum characters per individual entry after compression.
62    pub max_entry_chars: usize,
63    /// Similarity threshold for deduplication (0.0-1.0). Entries with
64    /// text overlap above this threshold are merged.
65    pub dedup_threshold: f64,
66}
67
68impl Default for CompactionConfig {
69    fn default() -> Self {
70        Self {
71            max_tokens: 2000,
72            max_entry_chars: 200,
73            dedup_threshold: 0.8,
74        }
75    }
76}
77
78/// Compact a set of context entries into a minimal-footprint text block.
79///
80/// This is the single entry point for all context compaction. Every memory
81/// section should be converted to `ContextEntry` structs and passed through
82/// this function before entering the token budget.
83pub fn compact(entries: &[ContextEntry], config: &CompactionConfig) -> CompactedContext {
84    if entries.is_empty() {
85        return CompactedContext {
86            text: String::new(),
87            entries_retained: 0,
88            entries_dropped: 0,
89            tokens: 0,
90        };
91    }
92
93    // Phase 1: Compress each entry
94    let mut compressed: Vec<(f64, String, &str)> = entries
95        .iter()
96        .map(|e| {
97            let text = compress_entry(&e.content, config.max_entry_chars);
98            let priority = compute_priority(e);
99            (priority, text, e.source.as_str())
100        })
101        .collect();
102
103    // Phase 2: Deduplicate by text similarity
104    deduplicate(&mut compressed, config.dedup_threshold);
105
106    // Phase 3: Sort by priority (highest first)
107    compressed.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
108
109    // Phase 4: Emit within token budget
110    let mut output = String::new();
111    let mut retained = 0usize;
112    let mut tokens_used = 0usize;
113
114    // Group by source for readable output
115    let mut by_source: std::collections::BTreeMap<&str, Vec<&str>> =
116        std::collections::BTreeMap::new();
117    let mut budget_entries = Vec::new();
118
119    for (_, text, source) in &compressed {
120        let entry_tokens = estimate_tokens(text);
121        if tokens_used + entry_tokens > config.max_tokens {
122            break;
123        }
124        tokens_used += entry_tokens;
125        retained += 1;
126        budget_entries.push((*source, text.as_str()));
127    }
128
129    // Group retained entries by source for clean formatting
130    for (source, text) in &budget_entries {
131        by_source.entry(source).or_default().push(text);
132    }
133
134    for (source, texts) in &by_source {
135        let header = source_header(source);
136        output.push_str(&header);
137        output.push('\n');
138        for text in texts {
139            output.push_str("- ");
140            output.push_str(text);
141            output.push('\n');
142        }
143        output.push('\n');
144    }
145
146    let final_tokens = estimate_tokens(&output);
147
148    CompactedContext {
149        text: output.trim_end().to_string(),
150        entries_retained: retained,
151        entries_dropped: entries.len().saturating_sub(retained),
152        tokens: final_tokens,
153    }
154}
155
156/// Compress a single entry to its minimal form.
157fn compress_entry(content: &str, max_chars: usize) -> String {
158    let mut text = content.trim().to_string();
159
160    // Strip markdown formatting artifacts
161    text = text.replace("**", "");
162    text = text.replace("## ", "");
163    text = text.replace("### ", "");
164
165    // Collapse multi-line entries to single line
166    if text.contains('\n') {
167        text = text
168            .lines()
169            .map(|l| l.trim())
170            .filter(|l| !l.is_empty())
171            .collect::<Vec<_>>()
172            .join("; ");
173    }
174
175    // Strip leading bullet/list markers
176    if text.starts_with("- ") {
177        text = text[2..].to_string();
178    }
179    if text.starts_with("• ") {
180        text = text["• ".len()..].to_string();
181    }
182
183    // Strip metadata brackets like "[episodic_memory | sim=0.85]" anywhere
184    while let Some(start) = text.find('[') {
185        if let Some(end) = text[start..].find(']') {
186            let bracket_content = &text[start + 1..start + end];
187            // Only strip if it looks like metadata (contains | or sim= or source_table)
188            if bracket_content.contains('|')
189                || bracket_content.contains("sim=")
190                || bracket_content.contains("_memory")
191            {
192                text = format!(
193                    "{}{}",
194                    text[..start].trim_end(),
195                    text[start + end + 1..].trim_start()
196                );
197                continue;
198            }
199        }
200        break;
201    }
202
203    // Truncate to max chars
204    if text.len() > max_chars {
205        text = text.chars().take(max_chars).collect();
206        // Clean truncation — don't cut mid-word
207        if let Some(last_space) = text.rfind(' ')
208            && last_space > max_chars / 2
209        {
210            text.truncate(last_space);
211        }
212        text.push_str("...");
213    }
214
215    text
216}
217
218/// Compute a priority score combining recency, relevance, and importance.
219/// Higher = more important to retain.
220fn compute_priority(entry: &ContextEntry) -> f64 {
221    let importance_norm = entry.importance as f64 / 10.0; // 0.0-1.0
222    let relevance = entry.relevance as f64; // 0.0-1.0
223
224    // Recency: exponential decay with 1-hour half-life
225    let recency = if entry.age_seconds == 0 {
226        1.0
227    } else {
228        (-0.693 * entry.age_seconds as f64 / 3600.0).exp() // ln(2)/3600
229    };
230
231    // Weighted combination: relevance dominates when present,
232    // recency fills in when relevance is 0 (ambient entries)
233    if relevance > 0.1 {
234        0.4 * relevance + 0.3 * importance_norm + 0.3 * recency
235    } else {
236        0.2 * importance_norm + 0.8 * recency
237    }
238}
239
240/// Remove entries whose compressed text is substantially similar to a
241/// higher-priority entry already in the list.
242fn deduplicate(entries: &mut Vec<(f64, String, &str)>, threshold: f64) {
243    let mut i = 0;
244    while i < entries.len() {
245        let mut duplicate = false;
246        for j in 0..i {
247            if text_overlap(&entries[j].1, &entries[i].1) > threshold {
248                duplicate = true;
249                break;
250            }
251        }
252        if duplicate {
253            entries.remove(i);
254        } else {
255            i += 1;
256        }
257    }
258}
259
260/// Cheap text overlap measure: Jaccard similarity on word trigrams.
261/// Public text overlap score for topic detection and deduplication.
262/// Jaccard similarity on word trigrams — 1.0 = identical, 0.0 = no overlap.
263pub fn text_overlap_score(a: &str, b: &str) -> f64 {
264    text_overlap(a, b)
265}
266
267fn text_overlap(a: &str, b: &str) -> f64 {
268    let trigrams_a = word_trigrams(a);
269    let trigrams_b = word_trigrams(b);
270    if trigrams_a.is_empty() && trigrams_b.is_empty() {
271        return 1.0;
272    }
273    let intersection = trigrams_a.intersection(&trigrams_b).count();
274    let union = trigrams_a.union(&trigrams_b).count();
275    if union == 0 {
276        0.0
277    } else {
278        intersection as f64 / union as f64
279    }
280}
281
282fn word_trigrams(text: &str) -> std::collections::HashSet<String> {
283    let words: Vec<&str> = text.split_whitespace().collect();
284    if words.len() < 3 {
285        return words.iter().map(|w| w.to_ascii_lowercase()).collect();
286    }
287    words
288        .windows(3)
289        .map(|w| format!("{} {} {}", w[0], w[1], w[2]).to_ascii_lowercase())
290        .collect()
291}
292
293/// Compact a pre-formatted memory text block (from the retriever) to fit
294/// within a token budget. This is the string-level compaction entry point
295/// used at the retrieval → context assembly boundary.
296///
297/// Operates on the formatted `[Active Memory]` / `[Working Memory]` /
298/// `[Relevant Memories]` / `[Recent Activity]` text blocks produced by
299/// the retriever. Compresses each bullet, deduplicates, and drops
300/// lowest-priority lines until the budget is satisfied.
301pub fn compact_text(text: &str, max_tokens: usize) -> String {
302    if text.is_empty() || max_tokens == 0 {
303        return String::new();
304    }
305
306    let current_tokens = crate::context::estimate_tokens(text);
307    if current_tokens <= max_tokens {
308        return text.to_string(); // Already within budget
309    }
310
311    // Parse into lines, compress each, then reassemble within budget
312    let mut output = String::new();
313    let mut used = 0usize;
314
315    for line in text.lines() {
316        let trimmed = line.trim();
317        if trimmed.is_empty() {
318            continue;
319        }
320
321        // Section headers pass through unchanged
322        if trimmed.starts_with('[') && trimmed.ends_with(']') {
323            let header_tokens = crate::context::estimate_tokens(trimmed);
324            if used + header_tokens > max_tokens {
325                break;
326            }
327            if !output.is_empty() {
328                output.push('\n');
329            }
330            output.push_str(trimmed);
331            output.push('\n');
332            used += header_tokens;
333            continue;
334        }
335
336        // Compress bullet entries
337        let compressed = compress_entry(trimmed, 150);
338        let line_tokens = crate::context::estimate_tokens(&compressed);
339        if used + line_tokens > max_tokens {
340            break;
341        }
342        output.push_str(&compressed);
343        output.push('\n');
344        used += line_tokens;
345    }
346
347    output.trim_end().to_string()
348}
349
350/// Map source tier to a compact section header.
351fn source_header(source: &str) -> String {
352    match source {
353        "working" => "[Working Memory]".to_string(),
354        "ambient" => "[Recent Activity]".to_string(),
355        "episodic" | "semantic" => "[Relevant Memories]".to_string(),
356        "procedural" => "[Skills]".to_string(),
357        "relationship" => "[Relationships]".to_string(),
358        "checkpoint" => "[Session Context]".to_string(),
359        "hippocampus" => "[Storage]".to_string(),
360        "topic_summary" => "[Earlier Topics]".to_string(),
361        other => format!("[{other}]"),
362    }
363}
364
365#[cfg(test)]
366mod tests {
367    use super::*;
368
369    fn entry(
370        source: &str,
371        content: &str,
372        importance: f32,
373        age_secs: u64,
374        relevance: f32,
375    ) -> ContextEntry {
376        ContextEntry {
377            source: source.to_string(),
378            content: content.to_string(),
379            importance,
380            age_seconds: age_secs,
381            relevance,
382        }
383    }
384
385    #[test]
386    fn empty_input_produces_empty_output() {
387        let result = compact(&[], &CompactionConfig::default());
388        assert!(result.text.is_empty());
389        assert_eq!(result.entries_retained, 0);
390    }
391
392    #[test]
393    fn single_entry_passes_through() {
394        let entries = vec![entry(
395            "episodic",
396            "User asked about workspace cleanup",
397            5.0,
398            300,
399            0.8,
400        )];
401        let result = compact(&entries, &CompactionConfig::default());
402        assert!(result.text.contains("workspace cleanup"));
403        assert_eq!(result.entries_retained, 1);
404        assert_eq!(result.entries_dropped, 0);
405    }
406
407    #[test]
408    fn duplicates_are_removed() {
409        let entries = vec![
410            entry(
411                "episodic",
412                "Agent cleaned up workspace files",
413                5.0,
414                300,
415                0.8,
416            ),
417            entry("ambient", "Agent cleaned up workspace files", 5.0, 300, 0.0),
418        ];
419        let result = compact(&entries, &CompactionConfig::default());
420        assert_eq!(result.entries_retained, 1);
421        assert_eq!(result.entries_dropped, 1);
422    }
423
424    #[test]
425    fn budget_enforced() {
426        let entries: Vec<ContextEntry> = (0..100)
427            .map(|i| {
428                entry(
429                    "episodic",
430                    &format!("Memory entry number {i} with some content to take up space"),
431                    5.0,
432                    i * 60,
433                    0.5,
434                )
435            })
436            .collect();
437        let config = CompactionConfig {
438            max_tokens: 100,
439            ..Default::default()
440        };
441        let result = compact(&entries, &config);
442        assert!(result.tokens <= 110); // small margin for estimation
443        assert!(result.entries_retained < 100);
444        assert!(result.entries_dropped > 0);
445    }
446
447    #[test]
448    fn high_priority_entries_retained_first() {
449        let entries = vec![
450            entry("episodic", "Old low-relevance memory", 1.0, 7200, 0.1),
451            entry("ambient", "Very recent high-importance fact", 9.0, 30, 0.0),
452            entry("semantic", "Highly relevant stored fact", 5.0, 3600, 0.9),
453        ];
454        let config = CompactionConfig {
455            max_tokens: 15, // extremely tight — can only fit ~1 entry with header
456            ..Default::default()
457        };
458        let result = compact(&entries, &config);
459        // Should retain at least the highest-priority entry
460        assert!(result.entries_retained >= 1);
461        // With such a tight budget, the old low-relevance memory should be
462        // dropped in favor of higher-priority entries (recent or relevant).
463        if result.entries_retained < 3 {
464            assert!(!result.text.contains("Old low-relevance"));
465        }
466    }
467
468    #[test]
469    fn compress_strips_formatting() {
470        let raw = "**Important**: This is a [episodic_memory | sim=0.85] formatted entry\nWith multiple lines\nAnd verbose content";
471        let compressed = compress_entry(raw, 200);
472        assert!(!compressed.contains("**"));
473        assert!(!compressed.contains('\n'));
474        assert!(!compressed.contains("sim=0.85"));
475    }
476
477    #[test]
478    fn compress_truncates_long_entries() {
479        let long = "a ".repeat(200);
480        let compressed = compress_entry(&long, 50);
481        assert!(compressed.len() < 60); // 50 + "..."
482        assert!(compressed.ends_with("..."));
483    }
484
485    #[test]
486    fn priority_favors_recent_relevant_entries() {
487        let recent_relevant = ContextEntry {
488            source: "episodic".into(),
489            content: "test".into(),
490            importance: 5.0,
491            age_seconds: 60,
492            relevance: 0.9,
493        };
494        let old_irrelevant = ContextEntry {
495            source: "episodic".into(),
496            content: "test".into(),
497            importance: 5.0,
498            age_seconds: 86400,
499            relevance: 0.1,
500        };
501        assert!(compute_priority(&recent_relevant) > compute_priority(&old_irrelevant));
502    }
503
504    #[test]
505    fn text_overlap_identical() {
506        assert!((text_overlap("the quick brown fox", "the quick brown fox") - 1.0).abs() < 0.01);
507    }
508
509    #[test]
510    fn text_overlap_different() {
511        assert!(text_overlap("the quick brown fox", "completely different words here") < 0.1);
512    }
513
514    #[test]
515    fn grouped_output_has_section_headers() {
516        let entries = vec![
517            entry("ambient", "Recent thing happened", 5.0, 60, 0.0),
518            entry("procedural", "How to run a scan", 5.0, 3600, 0.7),
519        ];
520        let result = compact(&entries, &CompactionConfig::default());
521        assert!(result.text.contains("[Recent Activity]"));
522        assert!(result.text.contains("[Skills]"));
523    }
524}