engram/intelligence/
context_builder.rs

1//! Memory-aware prompt construction engine (T8)
2//!
3//! Assembles retrieved memories into structured LLM prompts with configurable
4//! token budgets, section priorities, and fill strategies.
5//!
6//! # Example
7//!
8//! ```rust,ignore
9//! use engram::intelligence::context_builder::{
10//!     ContextBuilder, PromptTemplate, Section, SimpleTokenCounter, Strategy,
11//! };
12//!
13//! let counter = SimpleTokenCounter;
14//! let builder = ContextBuilder::new(Box::new(counter));
15//!
16//! let template = PromptTemplate {
17//!     sections: vec![
18//!         Section { name: "System".into(), content: "You are helpful.".into(), max_tokens: 100, priority: 0 },
19//!         Section { name: "Memories".into(), content: String::new(), max_tokens: 500, priority: 1 },
20//!     ],
21//!     total_budget: 600,
22//!     separator: "\n\n---\n\n".into(),
23//! };
24//!
25//! let result = builder.build(&template, &memories, Strategy::Greedy);
26//! ```
27
28use chrono::{DateTime, Utc};
29
30// NOTE: For production use, tiktoken-rs can be integrated as the token counter
31// implementation by wrapping its BPE encoder in a struct that implements TokenCounter.
32// We intentionally keep this module dependency-free beyond std + chrono.
33
34// ---------------------------------------------------------------------------
35// Public types
36// ---------------------------------------------------------------------------
37
38/// A section in the prompt with its own token budget and priority.
39///
40/// - `priority = 0` is the **highest** priority (filled first in Greedy).
41/// - `content` may be pre-filled (static system prompt) or empty (to be
42///   filled by memories at build time).
43#[derive(Debug, Clone)]
44pub struct Section {
45    /// Display name used as the section header in the rendered prompt.
46    pub name: String,
47    /// Fixed content for this section. Memories are appended after this
48    /// when the builder fills the section.
49    pub content: String,
50    /// Maximum tokens allowed for this section (including fixed content).
51    pub max_tokens: usize,
52    /// Fill priority — lower number = higher priority.
53    pub priority: u8,
54}
55
56/// Template defining prompt structure.
57///
58/// The builder uses `sections` to determine what goes into the prompt and
59/// respects `total_budget` as a hard upper bound across all sections.
60#[derive(Debug, Clone)]
61pub struct PromptTemplate {
62    /// Ordered list of sections to render.
63    pub sections: Vec<Section>,
64    /// Hard token ceiling across the entire rendered prompt.
65    pub total_budget: usize,
66    /// String inserted between non-empty sections. Defaults to `"\n\n---\n\n"`.
67    pub separator: String,
68}
69
70impl Default for PromptTemplate {
71    fn default() -> Self {
72        Self {
73            sections: Vec::new(),
74            total_budget: 4096,
75            separator: "\n\n---\n\n".to_string(),
76        }
77    }
78}
79
80/// Strategy for filling sections with memories.
81#[derive(Debug, Clone, Copy, PartialEq, Eq)]
82pub enum Strategy {
83    /// Sort sections by priority (ascending), fill each with memories until
84    /// the section budget or total budget is exhausted.
85    Greedy,
86    /// Allocate `total_budget` proportionally across sections based on their
87    /// `max_tokens` ratios, then fill each within its allocation.
88    Balanced,
89    /// Sort memories by `created_at` descending (newest first), then fill
90    /// sections by priority (same as Greedy but with recency-sorted memories).
91    Recency,
92}
93
94/// Abstraction for counting tokens in a string.
95///
96/// Implement this trait to plug in an accurate tokenizer such as tiktoken-rs.
97pub trait TokenCounter: Send + Sync {
98    fn count_tokens(&self, text: &str) -> usize;
99}
100
101/// Simple token estimator that assumes ~4 characters per token.
102///
103/// This is a rough heuristic suitable for budgeting purposes. For accurate
104/// counting, implement [`TokenCounter`] using tiktoken-rs or a similar library.
105pub struct SimpleTokenCounter;
106
107impl TokenCounter for SimpleTokenCounter {
108    fn count_tokens(&self, text: &str) -> usize {
109        // ~4 chars per token is a well-known rule-of-thumb for English text.
110        text.len() / 4
111    }
112}
113
114/// Minimal memory representation used by the builder.
115///
116/// The builder only needs content and a timestamp; callers can project from
117/// the full `crate::types::Memory` into this struct before calling `build`.
118#[derive(Debug, Clone)]
119pub struct MemoryEntry {
120    pub content: String,
121    pub created_at: DateTime<Utc>,
122}
123
124impl MemoryEntry {
125    pub fn new(content: impl Into<String>, created_at: DateTime<Utc>) -> Self {
126        Self {
127            content: content.into(),
128            created_at,
129        }
130    }
131}
132
133/// Context builder that assembles memories into structured prompts.
134pub struct ContextBuilder {
135    counter: Box<dyn TokenCounter>,
136}
137
138impl ContextBuilder {
139    /// Create a new builder with the given token counter.
140    pub fn new(counter: Box<dyn TokenCounter>) -> Self {
141        Self { counter }
142    }
143
144    /// Estimate token count for `text` using the internal counter.
145    pub fn estimate_tokens(&self, text: &str) -> usize {
146        self.counter.count_tokens(text)
147    }
148
149    /// Build a prompt string from `template`, filling memory slots with
150    /// `memories` according to `strategy`.
151    ///
152    /// Returns the assembled prompt as a single `String`. Sections that end
153    /// up empty (no fixed content and no memories fit) are omitted entirely.
154    /// Sections whose content exceeds their `max_tokens` budget are truncated
155    /// with `"...[truncated]"`.
156    pub fn build(
157        &self,
158        template: &PromptTemplate,
159        memories: &[MemoryEntry],
160        strategy: Strategy,
161    ) -> String {
162        // Sort sections by priority (ascending = highest priority first).
163        let mut sections: Vec<&Section> = template.sections.iter().collect();
164        sections.sort_by_key(|s| s.priority);
165
166        // Pre-sort memories according to strategy.
167        let sorted_memories: Vec<&MemoryEntry> = match strategy {
168            Strategy::Recency => {
169                let mut m: Vec<&MemoryEntry> = memories.iter().collect();
170                m.sort_by(|a, b| b.created_at.cmp(&a.created_at));
171                m
172            }
173            _ => memories.iter().collect(),
174        };
175
176        // Compute per-section token allocations.
177        let allocations = self.compute_allocations(template, &sections, strategy);
178
179        let mut rendered: Vec<String> = Vec::new();
180        let mut total_used = 0usize;
181
182        for (idx, section) in sections.iter().enumerate() {
183            let section_budget =
184                allocations[idx].min(template.total_budget.saturating_sub(total_used));
185
186            // Build section text starting from fixed content.
187            let mut section_text = section.content.clone();
188
189            // Append memories that still fit.
190            for memory in &sorted_memories {
191                let candidate = if section_text.is_empty() {
192                    memory.content.clone()
193                } else {
194                    format!("{}\n{}", section_text, memory.content)
195                };
196
197                let candidate_tokens = self.counter.count_tokens(&candidate);
198                if candidate_tokens <= section_budget {
199                    section_text = candidate;
200                }
201                // If over section budget, skip this memory and try the next.
202            }
203
204            // Skip empty sections entirely.
205            if section_text.is_empty() {
206                continue;
207            }
208
209            // Truncate if the section text (including fixed content) exceeds budget.
210            let section_tokens = self.counter.count_tokens(&section_text);
211            let final_text = if section_tokens > section_budget {
212                self.truncate_to_budget(&section_text, section_budget)
213            } else {
214                section_text.clone()
215            };
216
217            // Check that adding this section stays within total budget.
218            let separator_tokens = if rendered.is_empty() {
219                0
220            } else {
221                self.counter.count_tokens(&template.separator)
222            };
223            let final_tokens = self.counter.count_tokens(&final_text);
224
225            if total_used + separator_tokens + final_tokens > template.total_budget {
226                // Try to fit a truncated version.
227                let remaining = template
228                    .total_budget
229                    .saturating_sub(total_used + separator_tokens);
230                if remaining == 0 {
231                    break;
232                }
233                let truncated = self.truncate_to_budget(&final_text, remaining);
234                if !truncated.is_empty() {
235                    rendered.push(self.render_section(section, &truncated));
236                    // total_used update omitted: we break immediately after.
237                }
238                break; // No room for further sections.
239            }
240
241            total_used += separator_tokens + final_tokens;
242            rendered.push(self.render_section(section, &final_text));
243        }
244
245        rendered.join(&template.separator)
246    }
247
248    // -----------------------------------------------------------------------
249    // Private helpers
250    // -----------------------------------------------------------------------
251
252    /// Compute the effective token budget for each section, respecting `strategy`.
253    fn compute_allocations(
254        &self,
255        template: &PromptTemplate,
256        sections: &[&Section],
257        strategy: Strategy,
258    ) -> Vec<usize> {
259        match strategy {
260            Strategy::Balanced => {
261                // Proportional: each section gets (its max_tokens / sum_of_max_tokens) * total_budget.
262                let total_weight: usize = sections.iter().map(|s| s.max_tokens).sum();
263                if total_weight == 0 {
264                    return sections.iter().map(|_| 0).collect();
265                }
266                sections
267                    .iter()
268                    .map(|s| {
269                        let ratio = s.max_tokens as f64 / total_weight as f64;
270                        (ratio * template.total_budget as f64).floor() as usize
271                    })
272                    .collect()
273            }
274            // Greedy and Recency both use section.max_tokens directly.
275            Strategy::Greedy | Strategy::Recency => sections.iter().map(|s| s.max_tokens).collect(),
276        }
277    }
278
279    /// Truncate `text` so that it fits within `budget` tokens, appending
280    /// `"...[truncated]"` to signal the cut.
281    fn truncate_to_budget(&self, text: &str, budget: usize) -> String {
282        const SUFFIX: &str = "...[truncated]";
283
284        let suffix_tokens = self.counter.count_tokens(SUFFIX);
285
286        // If the budget can't even fit the suffix, return just the suffix (or empty).
287        if budget <= suffix_tokens {
288            return if budget == 0 {
289                String::new()
290            } else {
291                SUFFIX.to_string()
292            };
293        }
294
295        let char_budget = (budget - suffix_tokens) * 4; // approximate chars for the main text
296
297        if text.len() <= char_budget {
298            // Already fits; only add suffix if strictly over token count.
299            let tokens = self.counter.count_tokens(text);
300            if tokens <= budget {
301                return text.to_string();
302            }
303        }
304
305        // Walk character boundaries to find a safe cut point.
306        let mut end = char_budget.min(text.len());
307        // Align to a char boundary.
308        while end > 0 && !text.is_char_boundary(end) {
309            end -= 1;
310        }
311        format!("{}{}", &text[..end], SUFFIX)
312    }
313
314    /// Render a single section as `"## {name}\n{content}"`.
315    fn render_section(&self, section: &Section, content: &str) -> String {
316        format!("## {}\n{}", section.name, content)
317    }
318}
319
320// ---------------------------------------------------------------------------
321// Tests
322// ---------------------------------------------------------------------------
323
324#[cfg(test)]
325mod tests {
326    use super::*;
327    use chrono::TimeZone;
328
329    fn make_counter() -> Box<dyn TokenCounter> {
330        Box::new(SimpleTokenCounter)
331    }
332
333    fn make_memory(content: &str, days_ago: i64) -> MemoryEntry {
334        let created_at =
335            Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap() + chrono::Duration::days(-days_ago);
336        MemoryEntry::new(content, created_at)
337    }
338
339    fn simple_template(total_budget: usize) -> PromptTemplate {
340        PromptTemplate {
341            sections: vec![
342                Section {
343                    name: "High Priority".into(),
344                    content: String::new(),
345                    max_tokens: total_budget / 2,
346                    priority: 0,
347                },
348                Section {
349                    name: "Low Priority".into(),
350                    content: String::new(),
351                    max_tokens: total_budget / 2,
352                    priority: 1,
353                },
354            ],
355            total_budget,
356            separator: "\n\n---\n\n".into(),
357        }
358    }
359
360    // Test 1: Greedy strategy fills high-priority section first.
361    #[test]
362    fn test_greedy_fills_high_priority_first() {
363        let builder = ContextBuilder::new(make_counter());
364
365        // Total budget = 40 tokens → each section gets 20.
366        // Memory A alone costs 160 chars / 4 = 40 tokens — won't fit in any section.
367        // Two small memories each 20 chars / 4 = 5 tokens — both fit in high-priority.
368        let memories = vec![
369            make_memory("AAAA AAAA AAAA AAAA", 2), // 20 chars = 5 tokens
370            make_memory("BBBB BBBB BBBB BBBB", 3), // 20 chars = 5 tokens
371        ];
372
373        let template = simple_template(40);
374        let result = builder.build(&template, &memories, Strategy::Greedy);
375
376        // High-priority section should appear before low-priority.
377        let high_pos = result.find("High Priority").unwrap_or(usize::MAX);
378        let low_pos = result.find("Low Priority").unwrap_or(usize::MAX);
379        assert!(
380            high_pos < low_pos,
381            "High-priority section must come before low-priority"
382        );
383
384        // Both memories fit — result should contain both.
385        assert!(
386            result.contains("AAAA"),
387            "High-priority content must be present"
388        );
389    }
390
391    // Test 2: Balanced strategy distributes proportionally.
392    #[test]
393    fn test_balanced_proportional_allocation() {
394        let builder = ContextBuilder::new(make_counter());
395
396        // Section A max_tokens = 100, Section B max_tokens = 300.
397        // Total weight = 400, total_budget = 200.
398        // Section A gets (100/400)*200 = 50 tokens, Section B gets (300/400)*200 = 150 tokens.
399        let template = PromptTemplate {
400            sections: vec![
401                Section {
402                    name: "Small".into(),
403                    content: String::new(),
404                    max_tokens: 100,
405                    priority: 0,
406                },
407                Section {
408                    name: "Large".into(),
409                    content: String::new(),
410                    max_tokens: 300,
411                    priority: 1,
412                },
413            ],
414            total_budget: 200,
415            separator: "\n\n---\n\n".into(),
416        };
417
418        // A memory of exactly 140 chars = 35 tokens — fits in Large (150) but not Small (50).
419        let memories = vec![make_memory(&"X".repeat(140), 1)];
420        let result = builder.build(&template, &memories, Strategy::Balanced);
421
422        // The memory should appear in Large section (high allocation).
423        assert!(result.contains("Large"), "Large section must be rendered");
424        assert!(
425            result.contains(&"X".repeat(140)),
426            "Memory must fit in the Large section"
427        );
428    }
429
430    // Test 3: Recency strategy prefers newer memories.
431    #[test]
432    fn test_recency_prefers_newer_memories() {
433        let builder = ContextBuilder::new(make_counter());
434
435        // Budget tight enough for only one memory per section.
436        // old_memory is 60 chars = 15 tokens; new_memory same size.
437        // Section max_tokens = 16 — only one fits.
438        let template = PromptTemplate {
439            sections: vec![Section {
440                name: "Context".into(),
441                content: String::new(),
442                max_tokens: 20, // fits ~1 memory (15 tokens) + header
443                priority: 0,
444            }],
445            total_budget: 40,
446            separator: "\n\n---\n\n".into(),
447        };
448
449        // older memory: days_ago = 10; newer memory: days_ago = 0.
450        let memories = vec![
451            make_memory("OLD_MEMORY_CONTENT_HERE_PADDED", 10), // older
452            make_memory("NEW_MEMORY_CONTENT_HERE_PADDED", 0),  // newer
453        ];
454
455        let result = builder.build(&template, &memories, Strategy::Recency);
456
457        // The newer memory must appear (it is tried first due to recency sort).
458        assert!(
459            result.contains("NEW_MEMORY"),
460            "Recency strategy must prefer newest memory"
461        );
462    }
463
464    // Test 4: Overflow truncation appends "...[truncated]".
465    #[test]
466    fn test_overflow_truncation() {
467        let builder = ContextBuilder::new(make_counter());
468
469        // Section with very small budget: 5 tokens.
470        // Fixed content is 80 chars = 20 tokens — will be truncated.
471        let template = PromptTemplate {
472            sections: vec![Section {
473                name: "Tiny".into(),
474                content: "A".repeat(80), // 20 tokens, exceeds budget of 5
475                max_tokens: 5,
476                priority: 0,
477            }],
478            total_budget: 50,
479            separator: "\n\n---\n\n".into(),
480        };
481
482        let result = builder.build(&template, &[], Strategy::Greedy);
483        assert!(
484            result.contains("...[truncated]"),
485            "Overflowed section must end with ...[truncated]; got: {result}"
486        );
487    }
488
489    // Test 5: Empty sections (no fixed content and no memories fit) are skipped.
490    #[test]
491    fn test_empty_sections_skipped() {
492        let builder = ContextBuilder::new(make_counter());
493
494        let template = PromptTemplate {
495            sections: vec![
496                Section {
497                    name: "Present".into(),
498                    content: "I have content.".into(),
499                    max_tokens: 100,
500                    priority: 0,
501                },
502                Section {
503                    name: "Empty".into(),
504                    content: String::new(), // no fixed content
505                    max_tokens: 100,
506                    priority: 1,
507                },
508            ],
509            total_budget: 500,
510            separator: "\n\n---\n\n".into(),
511        };
512
513        // Pass no memories — the "Empty" section will have nothing to render.
514        let result = builder.build(&template, &[], Strategy::Greedy);
515
516        assert!(
517            result.contains("Present"),
518            "Non-empty section must be rendered"
519        );
520        assert!(
521            !result.contains("Empty"),
522            "Truly empty section must be skipped"
523        );
524    }
525
526    // Test 6: SimpleTokenCounter accuracy.
527    #[test]
528    fn test_simple_token_counter_accuracy() {
529        let counter = SimpleTokenCounter;
530
531        // 0 chars → 0 tokens.
532        assert_eq!(counter.count_tokens(""), 0);
533
534        // 4 chars → 1 token.
535        assert_eq!(counter.count_tokens("abcd"), 1);
536
537        // 8 chars → 2 tokens.
538        assert_eq!(counter.count_tokens("abcdefgh"), 2);
539
540        // 100 chars → 25 tokens.
541        assert_eq!(counter.count_tokens(&"a".repeat(100)), 25);
542
543        // Non-divisible: 10 chars → 2 (integer division).
544        assert_eq!(counter.count_tokens("abcdefghij"), 2);
545    }
546
547    // Test 7: Total budget is respected across all sections.
548    #[test]
549    fn test_total_budget_respected() {
550        let builder = ContextBuilder::new(make_counter());
551
552        // Each section allows 1000 tokens, but total_budget = 100.
553        // The rendered output (headers + separator + content) must fit within 100 tokens.
554        let budget = 100;
555        let template = PromptTemplate {
556            sections: vec![
557                Section {
558                    name: "A".into(),
559                    content: String::new(),
560                    max_tokens: 1000,
561                    priority: 0,
562                },
563                Section {
564                    name: "B".into(),
565                    content: String::new(),
566                    max_tokens: 1000,
567                    priority: 1,
568                },
569            ],
570            total_budget: budget,
571            separator: "\n\n---\n\n".into(),
572        };
573
574        // Each memory is 40 chars = 10 tokens — many of them to stress the budget.
575        let memories: Vec<MemoryEntry> = (0..20)
576            .map(|i| make_memory(&format!("{:0>40}", i), i as i64))
577            .collect();
578
579        let result = builder.build(&template, &memories, Strategy::Greedy);
580
581        // The output token count must not exceed total_budget.
582        let token_count = SimpleTokenCounter.count_tokens(&result);
583        assert!(
584            token_count <= budget,
585            "Output tokens ({token_count}) must not exceed total_budget ({budget})"
586        );
587    }
588
589    // Bonus: estimate_tokens delegates to the counter correctly.
590    #[test]
591    fn test_estimate_tokens_delegation() {
592        let builder = ContextBuilder::new(make_counter());
593        assert_eq!(
594            builder.estimate_tokens("hello"),
595            SimpleTokenCounter.count_tokens("hello")
596        );
597        assert_eq!(builder.estimate_tokens(""), 0);
598    }
599}
engram/intelligence/context_builder.rs

engram/intelligence/
context_builder.rs