engram/intelligence/context_builder.rs
1//! Memory-aware prompt construction engine (T8)
2//!
3//! Assembles retrieved memories into structured LLM prompts with configurable
4//! token budgets, section priorities, and fill strategies.
5//!
6//! # Example
7//!
8//! ```rust,ignore
9//! use engram::intelligence::context_builder::{
10//! ContextBuilder, PromptTemplate, Section, SimpleTokenCounter, Strategy,
11//! };
12//!
13//! let counter = SimpleTokenCounter;
14//! let builder = ContextBuilder::new(Box::new(counter));
15//!
16//! let template = PromptTemplate {
17//! sections: vec![
18//! Section { name: "System".into(), content: "You are helpful.".into(), max_tokens: 100, priority: 0 },
19//! Section { name: "Memories".into(), content: String::new(), max_tokens: 500, priority: 1 },
20//! ],
21//! total_budget: 600,
22//! separator: "\n\n---\n\n".into(),
23//! };
24//!
25//! let result = builder.build(&template, &memories, Strategy::Greedy);
26//! ```
27
28use chrono::{DateTime, Utc};
29
30// NOTE: For production use, tiktoken-rs can be integrated as the token counter
31// implementation by wrapping its BPE encoder in a struct that implements TokenCounter.
32// We intentionally keep this module dependency-free beyond std + chrono.
33
34// ---------------------------------------------------------------------------
35// Public types
36// ---------------------------------------------------------------------------
37
38/// A section in the prompt with its own token budget and priority.
39///
40/// - `priority = 0` is the **highest** priority (filled first in Greedy).
41/// - `content` may be pre-filled (static system prompt) or empty (to be
42/// filled by memories at build time).
43#[derive(Debug, Clone)]
44pub struct Section {
45 /// Display name used as the section header in the rendered prompt.
46 pub name: String,
47 /// Fixed content for this section. Memories are appended after this
48 /// when the builder fills the section.
49 pub content: String,
50 /// Maximum tokens allowed for this section (including fixed content).
51 pub max_tokens: usize,
52 /// Fill priority — lower number = higher priority.
53 pub priority: u8,
54}
55
56/// Template defining prompt structure.
57///
58/// The builder uses `sections` to determine what goes into the prompt and
59/// respects `total_budget` as a hard upper bound across all sections.
60#[derive(Debug, Clone)]
61pub struct PromptTemplate {
62 /// Ordered list of sections to render.
63 pub sections: Vec<Section>,
64 /// Hard token ceiling across the entire rendered prompt.
65 pub total_budget: usize,
66 /// String inserted between non-empty sections. Defaults to `"\n\n---\n\n"`.
67 pub separator: String,
68}
69
70impl Default for PromptTemplate {
71 fn default() -> Self {
72 Self {
73 sections: Vec::new(),
74 total_budget: 4096,
75 separator: "\n\n---\n\n".to_string(),
76 }
77 }
78}
79
80/// Strategy for filling sections with memories.
81#[derive(Debug, Clone, Copy, PartialEq, Eq)]
82pub enum Strategy {
83 /// Sort sections by priority (ascending), fill each with memories until
84 /// the section budget or total budget is exhausted.
85 Greedy,
86 /// Allocate `total_budget` proportionally across sections based on their
87 /// `max_tokens` ratios, then fill each within its allocation.
88 Balanced,
89 /// Sort memories by `created_at` descending (newest first), then fill
90 /// sections by priority (same as Greedy but with recency-sorted memories).
91 Recency,
92}
93
94/// Abstraction for counting tokens in a string.
95///
96/// Implement this trait to plug in an accurate tokenizer such as tiktoken-rs.
97pub trait TokenCounter: Send + Sync {
98 fn count_tokens(&self, text: &str) -> usize;
99}
100
101/// Simple token estimator that assumes ~4 characters per token.
102///
103/// This is a rough heuristic suitable for budgeting purposes. For accurate
104/// counting, implement [`TokenCounter`] using tiktoken-rs or a similar library.
105pub struct SimpleTokenCounter;
106
107impl TokenCounter for SimpleTokenCounter {
108 fn count_tokens(&self, text: &str) -> usize {
109 // ~4 chars per token is a well-known rule-of-thumb for English text.
110 text.len() / 4
111 }
112}
113
114/// Minimal memory representation used by the builder.
115///
116/// The builder only needs content and a timestamp; callers can project from
117/// the full `crate::types::Memory` into this struct before calling `build`.
118#[derive(Debug, Clone)]
119pub struct MemoryEntry {
120 pub content: String,
121 pub created_at: DateTime<Utc>,
122}
123
124impl MemoryEntry {
125 pub fn new(content: impl Into<String>, created_at: DateTime<Utc>) -> Self {
126 Self {
127 content: content.into(),
128 created_at,
129 }
130 }
131}
132
133/// Context builder that assembles memories into structured prompts.
134pub struct ContextBuilder {
135 counter: Box<dyn TokenCounter>,
136}
137
138impl ContextBuilder {
139 /// Create a new builder with the given token counter.
140 pub fn new(counter: Box<dyn TokenCounter>) -> Self {
141 Self { counter }
142 }
143
144 /// Estimate token count for `text` using the internal counter.
145 pub fn estimate_tokens(&self, text: &str) -> usize {
146 self.counter.count_tokens(text)
147 }
148
149 /// Build a prompt string from `template`, filling memory slots with
150 /// `memories` according to `strategy`.
151 ///
152 /// Returns the assembled prompt as a single `String`. Sections that end
153 /// up empty (no fixed content and no memories fit) are omitted entirely.
154 /// Sections whose content exceeds their `max_tokens` budget are truncated
155 /// with `"...[truncated]"`.
156 pub fn build(
157 &self,
158 template: &PromptTemplate,
159 memories: &[MemoryEntry],
160 strategy: Strategy,
161 ) -> String {
162 // Sort sections by priority (ascending = highest priority first).
163 let mut sections: Vec<&Section> = template.sections.iter().collect();
164 sections.sort_by_key(|s| s.priority);
165
166 // Pre-sort memories according to strategy.
167 let sorted_memories: Vec<&MemoryEntry> = match strategy {
168 Strategy::Recency => {
169 let mut m: Vec<&MemoryEntry> = memories.iter().collect();
170 m.sort_by(|a, b| b.created_at.cmp(&a.created_at));
171 m
172 }
173 _ => memories.iter().collect(),
174 };
175
176 // Compute per-section token allocations.
177 let allocations = self.compute_allocations(template, §ions, strategy);
178
179 let mut rendered: Vec<String> = Vec::new();
180 let mut total_used = 0usize;
181
182 for (idx, section) in sections.iter().enumerate() {
183 let section_budget =
184 allocations[idx].min(template.total_budget.saturating_sub(total_used));
185
186 // Build section text starting from fixed content.
187 let mut section_text = section.content.clone();
188
189 // Append memories that still fit.
190 for memory in &sorted_memories {
191 let candidate = if section_text.is_empty() {
192 memory.content.clone()
193 } else {
194 format!("{}\n{}", section_text, memory.content)
195 };
196
197 let candidate_tokens = self.counter.count_tokens(&candidate);
198 if candidate_tokens <= section_budget {
199 section_text = candidate;
200 }
201 // If over section budget, skip this memory and try the next.
202 }
203
204 // Skip empty sections entirely.
205 if section_text.is_empty() {
206 continue;
207 }
208
209 // Truncate if the section text (including fixed content) exceeds budget.
210 let section_tokens = self.counter.count_tokens(§ion_text);
211 let final_text = if section_tokens > section_budget {
212 self.truncate_to_budget(§ion_text, section_budget)
213 } else {
214 section_text.clone()
215 };
216
217 // Check that adding this section stays within total budget.
218 let separator_tokens = if rendered.is_empty() {
219 0
220 } else {
221 self.counter.count_tokens(&template.separator)
222 };
223 let final_tokens = self.counter.count_tokens(&final_text);
224
225 if total_used + separator_tokens + final_tokens > template.total_budget {
226 // Try to fit a truncated version.
227 let remaining = template
228 .total_budget
229 .saturating_sub(total_used + separator_tokens);
230 if remaining == 0 {
231 break;
232 }
233 let truncated = self.truncate_to_budget(&final_text, remaining);
234 if !truncated.is_empty() {
235 rendered.push(self.render_section(section, &truncated));
236 // total_used update omitted: we break immediately after.
237 }
238 break; // No room for further sections.
239 }
240
241 total_used += separator_tokens + final_tokens;
242 rendered.push(self.render_section(section, &final_text));
243 }
244
245 rendered.join(&template.separator)
246 }
247
248 // -----------------------------------------------------------------------
249 // Private helpers
250 // -----------------------------------------------------------------------
251
252 /// Compute the effective token budget for each section, respecting `strategy`.
253 fn compute_allocations(
254 &self,
255 template: &PromptTemplate,
256 sections: &[&Section],
257 strategy: Strategy,
258 ) -> Vec<usize> {
259 match strategy {
260 Strategy::Balanced => {
261 // Proportional: each section gets (its max_tokens / sum_of_max_tokens) * total_budget.
262 let total_weight: usize = sections.iter().map(|s| s.max_tokens).sum();
263 if total_weight == 0 {
264 return sections.iter().map(|_| 0).collect();
265 }
266 sections
267 .iter()
268 .map(|s| {
269 let ratio = s.max_tokens as f64 / total_weight as f64;
270 (ratio * template.total_budget as f64).floor() as usize
271 })
272 .collect()
273 }
274 // Greedy and Recency both use section.max_tokens directly.
275 Strategy::Greedy | Strategy::Recency => sections.iter().map(|s| s.max_tokens).collect(),
276 }
277 }
278
279 /// Truncate `text` so that it fits within `budget` tokens, appending
280 /// `"...[truncated]"` to signal the cut.
281 fn truncate_to_budget(&self, text: &str, budget: usize) -> String {
282 const SUFFIX: &str = "...[truncated]";
283
284 let suffix_tokens = self.counter.count_tokens(SUFFIX);
285
286 // If the budget can't even fit the suffix, return just the suffix (or empty).
287 if budget <= suffix_tokens {
288 return if budget == 0 {
289 String::new()
290 } else {
291 SUFFIX.to_string()
292 };
293 }
294
295 let char_budget = (budget - suffix_tokens) * 4; // approximate chars for the main text
296
297 if text.len() <= char_budget {
298 // Already fits; only add suffix if strictly over token count.
299 let tokens = self.counter.count_tokens(text);
300 if tokens <= budget {
301 return text.to_string();
302 }
303 }
304
305 // Walk character boundaries to find a safe cut point.
306 let mut end = char_budget.min(text.len());
307 // Align to a char boundary.
308 while end > 0 && !text.is_char_boundary(end) {
309 end -= 1;
310 }
311 format!("{}{}", &text[..end], SUFFIX)
312 }
313
314 /// Render a single section as `"## {name}\n{content}"`.
315 fn render_section(&self, section: &Section, content: &str) -> String {
316 format!("## {}\n{}", section.name, content)
317 }
318}
319
320// ---------------------------------------------------------------------------
321// Tests
322// ---------------------------------------------------------------------------
323
324#[cfg(test)]
325mod tests {
326 use super::*;
327 use chrono::TimeZone;
328
329 fn make_counter() -> Box<dyn TokenCounter> {
330 Box::new(SimpleTokenCounter)
331 }
332
333 fn make_memory(content: &str, days_ago: i64) -> MemoryEntry {
334 let created_at =
335 Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap() + chrono::Duration::days(-days_ago);
336 MemoryEntry::new(content, created_at)
337 }
338
339 fn simple_template(total_budget: usize) -> PromptTemplate {
340 PromptTemplate {
341 sections: vec![
342 Section {
343 name: "High Priority".into(),
344 content: String::new(),
345 max_tokens: total_budget / 2,
346 priority: 0,
347 },
348 Section {
349 name: "Low Priority".into(),
350 content: String::new(),
351 max_tokens: total_budget / 2,
352 priority: 1,
353 },
354 ],
355 total_budget,
356 separator: "\n\n---\n\n".into(),
357 }
358 }
359
360 // Test 1: Greedy strategy fills high-priority section first.
361 #[test]
362 fn test_greedy_fills_high_priority_first() {
363 let builder = ContextBuilder::new(make_counter());
364
365 // Total budget = 40 tokens → each section gets 20.
366 // Memory A alone costs 160 chars / 4 = 40 tokens — won't fit in any section.
367 // Two small memories each 20 chars / 4 = 5 tokens — both fit in high-priority.
368 let memories = vec![
369 make_memory("AAAA AAAA AAAA AAAA", 2), // 20 chars = 5 tokens
370 make_memory("BBBB BBBB BBBB BBBB", 3), // 20 chars = 5 tokens
371 ];
372
373 let template = simple_template(40);
374 let result = builder.build(&template, &memories, Strategy::Greedy);
375
376 // High-priority section should appear before low-priority.
377 let high_pos = result.find("High Priority").unwrap_or(usize::MAX);
378 let low_pos = result.find("Low Priority").unwrap_or(usize::MAX);
379 assert!(
380 high_pos < low_pos,
381 "High-priority section must come before low-priority"
382 );
383
384 // Both memories fit — result should contain both.
385 assert!(
386 result.contains("AAAA"),
387 "High-priority content must be present"
388 );
389 }
390
391 // Test 2: Balanced strategy distributes proportionally.
392 #[test]
393 fn test_balanced_proportional_allocation() {
394 let builder = ContextBuilder::new(make_counter());
395
396 // Section A max_tokens = 100, Section B max_tokens = 300.
397 // Total weight = 400, total_budget = 200.
398 // Section A gets (100/400)*200 = 50 tokens, Section B gets (300/400)*200 = 150 tokens.
399 let template = PromptTemplate {
400 sections: vec![
401 Section {
402 name: "Small".into(),
403 content: String::new(),
404 max_tokens: 100,
405 priority: 0,
406 },
407 Section {
408 name: "Large".into(),
409 content: String::new(),
410 max_tokens: 300,
411 priority: 1,
412 },
413 ],
414 total_budget: 200,
415 separator: "\n\n---\n\n".into(),
416 };
417
418 // A memory of exactly 140 chars = 35 tokens — fits in Large (150) but not Small (50).
419 let memories = vec![make_memory(&"X".repeat(140), 1)];
420 let result = builder.build(&template, &memories, Strategy::Balanced);
421
422 // The memory should appear in Large section (high allocation).
423 assert!(result.contains("Large"), "Large section must be rendered");
424 assert!(
425 result.contains(&"X".repeat(140)),
426 "Memory must fit in the Large section"
427 );
428 }
429
430 // Test 3: Recency strategy prefers newer memories.
431 #[test]
432 fn test_recency_prefers_newer_memories() {
433 let builder = ContextBuilder::new(make_counter());
434
435 // Budget tight enough for only one memory per section.
436 // old_memory is 60 chars = 15 tokens; new_memory same size.
437 // Section max_tokens = 16 — only one fits.
438 let template = PromptTemplate {
439 sections: vec![Section {
440 name: "Context".into(),
441 content: String::new(),
442 max_tokens: 20, // fits ~1 memory (15 tokens) + header
443 priority: 0,
444 }],
445 total_budget: 40,
446 separator: "\n\n---\n\n".into(),
447 };
448
449 // older memory: days_ago = 10; newer memory: days_ago = 0.
450 let memories = vec![
451 make_memory("OLD_MEMORY_CONTENT_HERE_PADDED", 10), // older
452 make_memory("NEW_MEMORY_CONTENT_HERE_PADDED", 0), // newer
453 ];
454
455 let result = builder.build(&template, &memories, Strategy::Recency);
456
457 // The newer memory must appear (it is tried first due to recency sort).
458 assert!(
459 result.contains("NEW_MEMORY"),
460 "Recency strategy must prefer newest memory"
461 );
462 }
463
464 // Test 4: Overflow truncation appends "...[truncated]".
465 #[test]
466 fn test_overflow_truncation() {
467 let builder = ContextBuilder::new(make_counter());
468
469 // Section with very small budget: 5 tokens.
470 // Fixed content is 80 chars = 20 tokens — will be truncated.
471 let template = PromptTemplate {
472 sections: vec![Section {
473 name: "Tiny".into(),
474 content: "A".repeat(80), // 20 tokens, exceeds budget of 5
475 max_tokens: 5,
476 priority: 0,
477 }],
478 total_budget: 50,
479 separator: "\n\n---\n\n".into(),
480 };
481
482 let result = builder.build(&template, &[], Strategy::Greedy);
483 assert!(
484 result.contains("...[truncated]"),
485 "Overflowed section must end with ...[truncated]; got: {result}"
486 );
487 }
488
489 // Test 5: Empty sections (no fixed content and no memories fit) are skipped.
490 #[test]
491 fn test_empty_sections_skipped() {
492 let builder = ContextBuilder::new(make_counter());
493
494 let template = PromptTemplate {
495 sections: vec![
496 Section {
497 name: "Present".into(),
498 content: "I have content.".into(),
499 max_tokens: 100,
500 priority: 0,
501 },
502 Section {
503 name: "Empty".into(),
504 content: String::new(), // no fixed content
505 max_tokens: 100,
506 priority: 1,
507 },
508 ],
509 total_budget: 500,
510 separator: "\n\n---\n\n".into(),
511 };
512
513 // Pass no memories — the "Empty" section will have nothing to render.
514 let result = builder.build(&template, &[], Strategy::Greedy);
515
516 assert!(
517 result.contains("Present"),
518 "Non-empty section must be rendered"
519 );
520 assert!(
521 !result.contains("Empty"),
522 "Truly empty section must be skipped"
523 );
524 }
525
526 // Test 6: SimpleTokenCounter accuracy.
527 #[test]
528 fn test_simple_token_counter_accuracy() {
529 let counter = SimpleTokenCounter;
530
531 // 0 chars → 0 tokens.
532 assert_eq!(counter.count_tokens(""), 0);
533
534 // 4 chars → 1 token.
535 assert_eq!(counter.count_tokens("abcd"), 1);
536
537 // 8 chars → 2 tokens.
538 assert_eq!(counter.count_tokens("abcdefgh"), 2);
539
540 // 100 chars → 25 tokens.
541 assert_eq!(counter.count_tokens(&"a".repeat(100)), 25);
542
543 // Non-divisible: 10 chars → 2 (integer division).
544 assert_eq!(counter.count_tokens("abcdefghij"), 2);
545 }
546
547 // Test 7: Total budget is respected across all sections.
548 #[test]
549 fn test_total_budget_respected() {
550 let builder = ContextBuilder::new(make_counter());
551
552 // Each section allows 1000 tokens, but total_budget = 100.
553 // The rendered output (headers + separator + content) must fit within 100 tokens.
554 let budget = 100;
555 let template = PromptTemplate {
556 sections: vec![
557 Section {
558 name: "A".into(),
559 content: String::new(),
560 max_tokens: 1000,
561 priority: 0,
562 },
563 Section {
564 name: "B".into(),
565 content: String::new(),
566 max_tokens: 1000,
567 priority: 1,
568 },
569 ],
570 total_budget: budget,
571 separator: "\n\n---\n\n".into(),
572 };
573
574 // Each memory is 40 chars = 10 tokens — many of them to stress the budget.
575 let memories: Vec<MemoryEntry> = (0..20)
576 .map(|i| make_memory(&format!("{:0>40}", i), i as i64))
577 .collect();
578
579 let result = builder.build(&template, &memories, Strategy::Greedy);
580
581 // The output token count must not exceed total_budget.
582 let token_count = SimpleTokenCounter.count_tokens(&result);
583 assert!(
584 token_count <= budget,
585 "Output tokens ({token_count}) must not exceed total_budget ({budget})"
586 );
587 }
588
589 // Bonus: estimate_tokens delegates to the counter correctly.
590 #[test]
591 fn test_estimate_tokens_delegation() {
592 let builder = ContextBuilder::new(make_counter());
593 assert_eq!(
594 builder.estimate_tokens("hello"),
595 SimpleTokenCounter.count_tokens("hello")
596 );
597 assert_eq!(builder.estimate_tokens(""), 0);
598 }
599}