Skip to main content

engram/intelligence/
context_compression.rs

1//! Active Context Compression (RML-1211)
2//!
3//! Provides adaptive, multi-level compression of memory content to fit within
4//! LLM context windows. Pure computation — no database access.
5//!
6//! Compression levels:
7//! - `None`   — full content, no changes
8//! - `Light`  — remove stopwords and filler phrases
9//! - `Medium` — extractive summary (first sentence + entity sentences)
10//! - `Heavy`  — key facts only (entities, numbers, dates)
11
12use serde::{Deserialize, Serialize};
13
14// ---------------------------------------------------------------------------
15// Common English stopwords (~30 words)
16// ---------------------------------------------------------------------------
17
18const STOPWORDS: &[&str] = &[
19    "a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with", "by",
20    "from", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "do", "does",
21    "did", "will", "would",
22];
23
24// Common filler phrases removed during light compression
25const FILLER_PHRASES: &[&str] = &[
26    "basically",
27    "essentially",
28    "in fact",
29    "as a matter of fact",
30    "it is worth noting that",
31    "it should be noted that",
32    "needless to say",
33    "to be honest",
34    "honestly",
35    "actually",
36    "literally",
37    "obviously",
38    "clearly",
39    "simply",
40    "just",
41    "very",
42    "really",
43    "quite",
44    "rather",
45    "somewhat",
46    "kind of",
47    "sort of",
48];
49
50// ---------------------------------------------------------------------------
51// Public types
52// ---------------------------------------------------------------------------
53
54/// Level of compression to apply to memory content.
55#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
56pub enum CompressionLevel {
57    /// Full content, no changes.
58    None,
59    /// Remove stopwords and common filler phrases.
60    Light,
61    /// Extractive summary: keep first sentence of each paragraph and any
62    /// sentence that contains a capitalized (entity-like) word.
63    Medium,
64    /// Key facts only: "Entity: fact" patterns, numbers, and dates.
65    Heavy,
66}
67
68/// A memory entry after compression.
69#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct CompressedEntry {
71    /// Original memory ID.
72    pub original_id: i64,
73    /// Estimated token count of the original content.
74    pub original_tokens: usize,
75    /// The (potentially compressed) content.
76    pub compressed_content: String,
77    /// Estimated token count of the compressed content.
78    pub tokens_used: usize,
79    /// Which compression level was applied.
80    pub compression_level: CompressionLevel,
81}
82
83/// A snapshot of token budget state.
84#[derive(Debug, Clone, Serialize, Deserialize)]
85pub struct TokenBudget {
86    /// Maximum tokens allowed.
87    pub total: usize,
88    /// Tokens consumed so far.
89    pub used: usize,
90    /// Remaining capacity (`total - used`).
91    pub remaining: usize,
92}
93
94/// Simple memory representation used as compression input.
95#[derive(Debug, Clone)]
96pub struct MemoryInput {
97    /// Unique identifier for the memory.
98    pub id: i64,
99    /// Raw text content.
100    pub content: String,
101    /// Importance score in `[0.0, 1.0]`; higher = more important.
102    pub importance: f32,
103}
104
105// ---------------------------------------------------------------------------
106// ContextCompressor
107// ---------------------------------------------------------------------------
108
109/// Adaptively compresses a set of memories to fit within a token budget.
110pub struct ContextCompressor {
111    budget_tokens: usize,
112    used_tokens: usize,
113}
114
115impl ContextCompressor {
116    /// Create a new compressor with the given token budget.
117    pub fn new(budget_tokens: usize) -> Self {
118        Self {
119            budget_tokens,
120            used_tokens: 0,
121        }
122    }
123
124    // -----------------------------------------------------------------------
125    // Token estimation
126    // -----------------------------------------------------------------------
127
128    /// Estimate the token count for `text` using the heuristic `chars / 4`.
129    pub fn estimate_tokens(text: &str) -> usize {
130        text.len().div_ceil(4)
131    }
132
133    // -----------------------------------------------------------------------
134    // Compression implementations
135    // -----------------------------------------------------------------------
136
137    /// Light compression: remove filler phrases (case-insensitive), then
138    /// drop standalone stopwords and collapse extra whitespace.
139    pub fn compress_light(text: &str) -> String {
140        let mut result = text.to_string();
141
142        // Remove filler phrases first (case-insensitive, whole-phrase match)
143        for phrase in FILLER_PHRASES {
144            let lower = result.to_lowercase();
145            // Find and remove all occurrences (greedy from the right to avoid
146            // index invalidation when the string shrinks).
147            let mut positions: Vec<usize> = lower.match_indices(phrase).map(|(i, _)| i).collect();
148            positions.sort_unstable_by(|a, b| b.cmp(a)); // reverse order
149            for pos in positions {
150                // Only remove if the match is surrounded by non-alphabetic chars
151                // to avoid partial-word removal.
152                let before_ok = pos == 0
153                    || !result
154                        .as_bytes()
155                        .get(pos - 1)
156                        .copied()
157                        .map(|b| b.is_ascii_alphabetic())
158                        .unwrap_or(false);
159                let after_pos = pos + phrase.len();
160                let after_ok = after_pos >= result.len()
161                    || !result
162                        .as_bytes()
163                        .get(after_pos)
164                        .copied()
165                        .map(|b| b.is_ascii_alphabetic())
166                        .unwrap_or(false);
167
168                if before_ok && after_ok {
169                    result.drain(pos..after_pos);
170                }
171            }
172        }
173
174        // Drop standalone stopwords (whole-word, case-insensitive)
175        let words: Vec<&str> = result.split_whitespace().collect();
176        let filtered: Vec<&str> = words
177            .into_iter()
178            .filter(|w| {
179                let lower = w.to_lowercase();
180                let stripped = lower.trim_matches(|c: char| !c.is_alphabetic());
181                !STOPWORDS.contains(&stripped)
182            })
183            .collect();
184
185        // Rejoin and collapse whitespace
186        filtered.join(" ")
187    }
188
189    /// Medium compression: keep the first sentence of each paragraph and any
190    /// sentence that contains a word starting with a capital letter (entity
191    /// heuristic).
192    pub fn compress_medium(text: &str) -> String {
193        let paragraphs: Vec<&str> = text.split("\n\n").collect();
194        let mut kept: Vec<String> = Vec::new();
195
196        for paragraph in &paragraphs {
197            let sentences = split_sentences(paragraph);
198            let mut para_kept: Vec<&str> = Vec::new();
199
200            for (idx, sentence) in sentences.iter().enumerate() {
201                let trimmed = sentence.trim();
202                if trimmed.is_empty() {
203                    continue;
204                }
205
206                // Always keep first sentence of paragraph
207                if idx == 0 {
208                    para_kept.push(trimmed);
209                    continue;
210                }
211
212                // Keep sentence if it contains an entity-like capitalized word
213                // (a word that is not the first word and starts with uppercase)
214                if has_entity_word(trimmed) {
215                    para_kept.push(trimmed);
216                }
217            }
218
219            if !para_kept.is_empty() {
220                kept.push(para_kept.join(" "));
221            }
222        }
223
224        kept.join("\n\n")
225    }
226
227    /// Heavy compression: extract lines that look like key facts —
228    /// "Entity: something", lines containing numbers, or dates.
229    pub fn compress_heavy(text: &str) -> String {
230        let mut facts: Vec<String> = Vec::new();
231
232        for line in text.lines() {
233            let trimmed = line.trim();
234            if trimmed.is_empty() {
235                continue;
236            }
237
238            // Pattern 1: "Word: ..." — colon-delimited key-value fact
239            if looks_like_fact_line(trimmed) {
240                facts.push(trimmed.to_string());
241                continue;
242            }
243
244            // Pattern 2: contains a number
245            if trimmed.chars().any(|c| c.is_ascii_digit()) {
246                facts.push(trimmed.to_string());
247                continue;
248            }
249
250            // Pattern 3: contains a date-like substring (YYYY or Month-name)
251            if contains_date(trimmed) {
252                facts.push(trimmed.to_string());
253            }
254        }
255
256        if facts.is_empty() {
257            // Fallback: first sentence only
258            let first = split_sentences(text).into_iter().next().unwrap_or_default();
259            first.trim().to_string()
260        } else {
261            facts.join("\n")
262        }
263    }
264
265    // -----------------------------------------------------------------------
266    // Public API
267    // -----------------------------------------------------------------------
268
269    /// Apply the specified compression level to `content`.
270    pub fn compress_single(content: &str, level: CompressionLevel) -> String {
271        match level {
272            CompressionLevel::None => content.to_string(),
273            CompressionLevel::Light => Self::compress_light(content),
274            CompressionLevel::Medium => Self::compress_medium(content),
275            CompressionLevel::Heavy => Self::compress_heavy(content),
276        }
277    }
278
279    /// Adaptively compress a slice of memories to fit within `budget` tokens.
280    ///
281    /// Algorithm:
282    /// 1. Sort memories by importance (descending) — most important get
283    ///    processed first and receive lighter compression.
284    /// 2. For each memory, try compression levels in order:
285    ///    `None → Light → Medium → Heavy`.
286    /// 3. The first level that fits the remaining budget is used.
287    /// 4. If even `Heavy` doesn't fit, the memory is skipped.
288    ///
289    /// Returns the ordered list of successfully compressed entries.
290    pub fn compress_for_context(memories: &[MemoryInput], budget: usize) -> Vec<CompressedEntry> {
291        // Sort by importance descending (clone indices to preserve original IDs)
292        let mut indexed: Vec<usize> = (0..memories.len()).collect();
293        indexed.sort_unstable_by(|&a, &b| {
294            memories[b]
295                .importance
296                .partial_cmp(&memories[a].importance)
297                .unwrap_or(std::cmp::Ordering::Equal)
298        });
299
300        let mut entries: Vec<CompressedEntry> = Vec::new();
301        let mut used: usize = 0;
302
303        for idx in indexed {
304            let mem = &memories[idx];
305            let original_tokens = Self::estimate_tokens(&mem.content);
306
307            // Try each compression level in escalating order
308            let levels = [
309                CompressionLevel::None,
310                CompressionLevel::Light,
311                CompressionLevel::Medium,
312                CompressionLevel::Heavy,
313            ];
314
315            let mut chosen: Option<(CompressionLevel, String, usize)> = None;
316
317            for &level in &levels {
318                let compressed = Self::compress_single(&mem.content, level);
319                let tokens = Self::estimate_tokens(&compressed);
320
321                if used + tokens <= budget {
322                    chosen = Some((level, compressed, tokens));
323                    break;
324                }
325            }
326
327            if let Some((level, compressed_content, tokens)) = chosen {
328                used += tokens;
329                entries.push(CompressedEntry {
330                    original_id: mem.id,
331                    original_tokens,
332                    compressed_content,
333                    tokens_used: tokens,
334                    compression_level: level,
335                });
336            }
337            // else: memory skipped — does not fit even at Heavy
338        }
339
340        entries
341    }
342
343    /// Current budget state of this compressor instance.
344    ///
345    /// Note: `compress_for_context` is a free function that does not mutate
346    /// the compressor. Call this after manually tracking usage with
347    /// `estimate_tokens`, or use it to inspect the configured budget.
348    pub fn budget(&self) -> TokenBudget {
349        let used = self.used_tokens;
350        let remaining = self.budget_tokens.saturating_sub(used);
351        TokenBudget {
352            total: self.budget_tokens,
353            used,
354            remaining,
355        }
356    }
357}
358
359// ---------------------------------------------------------------------------
360// Private helpers
361// ---------------------------------------------------------------------------
362
363/// Split text into sentences on `.`, `!`, or `?` boundaries.
364fn split_sentences(text: &str) -> Vec<&str> {
365    let mut sentences: Vec<&str> = Vec::new();
366    let mut start = 0;
367
368    let bytes = text.as_bytes();
369    let len = bytes.len();
370
371    let mut i = 0;
372    while i < len {
373        let b = bytes[i];
374        if b == b'.' || b == b'!' || b == b'?' {
375            // Include the punctuation
376            let end = (i + 1).min(len);
377            let s = text[start..end].trim();
378            if !s.is_empty() {
379                sentences.push(s);
380            }
381            // Skip whitespace after punctuation
382            i += 1;
383            while i < len && bytes[i] == b' ' {
384                i += 1;
385            }
386            start = i;
387        } else {
388            i += 1;
389        }
390    }
391
392    // Trailing text without terminal punctuation
393    let tail = text[start..].trim();
394    if !tail.is_empty() {
395        sentences.push(tail);
396    }
397
398    sentences
399}
400
401/// Return `true` if the sentence contains a word (not the first word) that
402/// starts with an uppercase ASCII letter — a rough entity heuristic.
403fn has_entity_word(sentence: &str) -> bool {
404    sentence
405        .split_whitespace()
406        .skip(1) // skip the first word (may be sentence-initial capital)
407        .any(|w| {
408            w.chars()
409                .next()
410                .map(|c| c.is_ascii_uppercase())
411                .unwrap_or(false)
412        })
413}
414
415/// Return `true` if the line matches "Word(s): rest" — a key-value fact.
416fn looks_like_fact_line(line: &str) -> bool {
417    if let Some(colon_pos) = line.find(':') {
418        if colon_pos == 0 {
419            return false;
420        }
421        let key = &line[..colon_pos];
422        // Key should be non-empty, mostly alphabetic, and reasonably short
423        let trimmed_key = key.trim();
424        !trimmed_key.is_empty()
425            && trimmed_key.len() <= 40
426            && trimmed_key
427                .chars()
428                .all(|c| c.is_alphabetic() || c == ' ' || c == '_' || c == '-')
429    } else {
430        false
431    }
432}
433
434/// Return `true` if the text contains something that looks like a date
435/// (a 4-digit year, or a month name).
436fn contains_date(text: &str) -> bool {
437    const MONTHS: &[&str] = &[
438        "january",
439        "february",
440        "march",
441        "april",
442        "may",
443        "june",
444        "july",
445        "august",
446        "september",
447        "october",
448        "november",
449        "december",
450    ];
451
452    let lower = text.to_lowercase();
453
454    // Check for 4-digit year in range 1900-2099
455    let bytes = text.as_bytes();
456    for i in 0..bytes.len().saturating_sub(3) {
457        if bytes[i].is_ascii_digit()
458            && bytes[i + 1].is_ascii_digit()
459            && bytes[i + 2].is_ascii_digit()
460            && bytes[i + 3].is_ascii_digit()
461        {
462            let year_str = &text[i..i + 4];
463            if let Ok(year) = year_str.parse::<u32>() {
464                if (1900..=2099).contains(&year) {
465                    return true;
466                }
467            }
468        }
469    }
470
471    // Check for month names
472    MONTHS.iter().any(|m| lower.contains(m))
473}
474
475// ---------------------------------------------------------------------------
476// Tests
477// ---------------------------------------------------------------------------
478
479#[cfg(test)]
480mod tests {
481    use super::*;
482
483    // 1. Budget enforcement — total tokens_used must never exceed budget
484    #[test]
485    fn test_budget_enforcement() {
486        let memories = vec![
487            MemoryInput {
488                id: 1,
489                content: "A".repeat(400), // ~100 tokens
490                importance: 0.9,
491            },
492            MemoryInput {
493                id: 2,
494                content: "B".repeat(400),
495                importance: 0.8,
496            },
497            MemoryInput {
498                id: 3,
499                content: "C".repeat(400),
500                importance: 0.7,
501            },
502        ];
503
504        let budget = 120; // tight budget — should not fit all three at None
505        let entries = ContextCompressor::compress_for_context(&memories, budget);
506
507        let total_used: usize = entries.iter().map(|e| e.tokens_used).sum();
508        assert!(
509            total_used <= budget,
510            "total_used={} exceeded budget={}",
511            total_used,
512            budget
513        );
514    }
515
516    // 2. Adaptive escalation — less important memory gets heavier compression
517    #[test]
518    fn test_adaptive_escalation() {
519        // Create a high-importance large memory and a low-importance one.
520        // Budget is tight enough that the low-importance one must be compressed.
521        let long_content = "The project launched in January 2024. Alice and Bob led the team. \
522            The revenue grew by 40% year over year. Customer satisfaction reached 95%. \
523            The new platform handles 10 million requests per day."
524            .repeat(5);
525
526        let memories = vec![
527            MemoryInput {
528                id: 1,
529                content: long_content.clone(),
530                importance: 1.0,
531            },
532            MemoryInput {
533                id: 2,
534                content: long_content.clone(),
535                importance: 0.1,
536            },
537        ];
538
539        // Budget: can fit one at None, the second must be compressed
540        let one_token_count = ContextCompressor::estimate_tokens(&long_content);
541        let budget = one_token_count + one_token_count / 4; // room for one full + partial
542
543        let entries = ContextCompressor::compress_for_context(&memories, budget);
544
545        // First entry (higher importance) should be at None or Light
546        if let Some(first) = entries.first() {
547            assert_eq!(first.original_id, 1, "highest importance should be first");
548        }
549
550        // If both entries exist, the second should be more compressed
551        if entries.len() == 2 {
552            let first_level = entries[0].compression_level as u8;
553            let second_level = entries[1].compression_level as u8;
554            assert!(
555                second_level >= first_level,
556                "less important memory should have equal or heavier compression"
557            );
558        }
559    }
560
561    // 3. Empty input returns empty output
562    #[test]
563    fn test_empty_input() {
564        let entries = ContextCompressor::compress_for_context(&[], 1000);
565        assert!(entries.is_empty());
566    }
567
568    // 4. Single memory that fits returns at None level
569    #[test]
570    fn test_single_memory_fits_at_none() {
571        let content = "This is a short note.";
572        let memories = vec![MemoryInput {
573            id: 42,
574            content: content.to_string(),
575            importance: 0.5,
576        }];
577
578        let budget = 1000; // very generous
579        let entries = ContextCompressor::compress_for_context(&memories, budget);
580
581        assert_eq!(entries.len(), 1);
582        assert_eq!(entries[0].original_id, 42);
583        assert_eq!(entries[0].compression_level, CompressionLevel::None);
584        assert_eq!(entries[0].compressed_content, content);
585    }
586
587    // 5. All memories exceed budget — returns only what fits
588    #[test]
589    fn test_all_memories_exceed_budget_returns_partial() {
590        let memories = vec![
591            MemoryInput {
592                id: 1,
593                content: "A".repeat(1000), // 250 tokens
594                importance: 0.9,
595            },
596            MemoryInput {
597                id: 2,
598                content: "B".repeat(1000),
599                importance: 0.8,
600            },
601            MemoryInput {
602                id: 3,
603                content: "C".repeat(1000),
604                importance: 0.7,
605            },
606        ];
607
608        let budget = 1; // impossibly small — nothing should fit
609        let entries = ContextCompressor::compress_for_context(&memories, budget);
610
611        assert!(
612            entries.is_empty(),
613            "nothing should fit in a budget of 1 token"
614        );
615    }
616
617    // 6. Token estimation accuracy
618    #[test]
619    fn test_token_estimation() {
620        // Empty string
621        assert_eq!(ContextCompressor::estimate_tokens(""), 0);
622
623        // Exactly 4 chars → 1 token
624        assert_eq!(ContextCompressor::estimate_tokens("abcd"), 1);
625
626        // 8 chars → 2 tokens
627        assert_eq!(ContextCompressor::estimate_tokens("abcdefgh"), 2);
628
629        // 100 chars → 25 tokens
630        let s = "a".repeat(100);
631        assert_eq!(ContextCompressor::estimate_tokens(&s), 25);
632
633        // 5 chars → ceil(5/4) = 2 (rounds up)
634        assert_eq!(ContextCompressor::estimate_tokens("abcde"), 2);
635    }
636
637    // 7. Light compression removes filler
638    #[test]
639    fn test_light_compression_removes_filler() {
640        let text = "This is basically a very simple test. It is, honestly, quite straightforward.";
641        let compressed = ContextCompressor::compress_light(text);
642
643        // Filler words should be absent or reduced
644        assert!(
645            compressed.len() < text.len(),
646            "light compression should shorten text"
647        );
648        // "basically" should be removed
649        assert!(
650            !compressed.to_lowercase().contains("basically"),
651            "filler 'basically' should be removed"
652        );
653        // "honestly" should be removed
654        assert!(
655            !compressed.to_lowercase().contains("honestly"),
656            "filler 'honestly' should be removed"
657        );
658    }
659
660    // 8. Heavy compression extracts facts only
661    #[test]
662    fn test_heavy_compression_extracts_facts() {
663        let text = "The meeting was uneventful.\n\
664            Revenue: 1.5 million dollars\n\
665            Founded: January 2020\n\
666            The weather was nice today.\n\
667            Headcount: 42 engineers";
668
669        let compressed = ContextCompressor::compress_heavy(text);
670
671        // Should include the fact lines and number/date lines
672        assert!(
673            compressed.contains("Revenue:") || compressed.contains("1.5"),
674            "should include revenue fact"
675        );
676        assert!(
677            compressed.contains("Headcount:") || compressed.contains("42"),
678            "should include headcount fact"
679        );
680        // The non-fact lines should not dominate
681        let lines: Vec<&str> = compressed.lines().collect();
682        assert!(
683            lines.len() <= 4,
684            "heavy compression should produce few lines, got {}",
685            lines.len()
686        );
687    }
688
689    // Bonus: compress_single dispatches correctly
690    #[test]
691    fn test_compress_single_none_returns_unchanged() {
692        let text = "Hello, world!";
693        let result = ContextCompressor::compress_single(text, CompressionLevel::None);
694        assert_eq!(result, text);
695    }
696
697    // Bonus: budget() reflects configured total
698    #[test]
699    fn test_budget_reflects_configured_total() {
700        let compressor = ContextCompressor::new(8192);
701        let b = compressor.budget();
702        assert_eq!(b.total, 8192);
703        assert_eq!(b.used, 0);
704        assert_eq!(b.remaining, 8192);
705    }
706
707    // Bonus: medium compression keeps first sentence
708    #[test]
709    fn test_medium_compression_keeps_first_sentence() {
710        let text = "First sentence here. Second sentence with Entity Name. Third unimportant one.";
711        let compressed = ContextCompressor::compress_medium(text);
712
713        assert!(
714            compressed.contains("First sentence here"),
715            "medium compression should keep first sentence"
716        );
717    }
718}