1use crate::context::estimate_tokens;
26
27#[derive(Debug, Clone)]
29pub struct ContextEntry {
30 pub source: String,
33 pub content: String,
35 pub importance: f32,
37 pub age_seconds: u64,
39 pub relevance: f32,
41}
42
43#[derive(Debug, Clone)]
45pub struct CompactedContext {
46 pub text: String,
48 pub entries_retained: usize,
50 pub entries_dropped: usize,
52 pub tokens: usize,
54}
55
56#[derive(Debug, Clone)]
58pub struct CompactionConfig {
59 pub max_tokens: usize,
61 pub max_entry_chars: usize,
63 pub dedup_threshold: f64,
66}
67
68impl Default for CompactionConfig {
69 fn default() -> Self {
70 Self {
71 max_tokens: 2000,
72 max_entry_chars: 200,
73 dedup_threshold: 0.8,
74 }
75 }
76}
77
78pub fn compact(entries: &[ContextEntry], config: &CompactionConfig) -> CompactedContext {
84 if entries.is_empty() {
85 return CompactedContext {
86 text: String::new(),
87 entries_retained: 0,
88 entries_dropped: 0,
89 tokens: 0,
90 };
91 }
92
93 let mut compressed: Vec<(f64, String, &str)> = entries
95 .iter()
96 .map(|e| {
97 let text = compress_entry(&e.content, config.max_entry_chars);
98 let priority = compute_priority(e);
99 (priority, text, e.source.as_str())
100 })
101 .collect();
102
103 deduplicate(&mut compressed, config.dedup_threshold);
105
106 compressed.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
108
109 let mut output = String::new();
111 let mut retained = 0usize;
112 let mut tokens_used = 0usize;
113
114 let mut by_source: std::collections::BTreeMap<&str, Vec<&str>> =
116 std::collections::BTreeMap::new();
117 let mut budget_entries = Vec::new();
118
119 for (_, text, source) in &compressed {
120 let entry_tokens = estimate_tokens(text);
121 if tokens_used + entry_tokens > config.max_tokens {
122 break;
123 }
124 tokens_used += entry_tokens;
125 retained += 1;
126 budget_entries.push((*source, text.as_str()));
127 }
128
129 for (source, text) in &budget_entries {
131 by_source.entry(source).or_default().push(text);
132 }
133
134 for (source, texts) in &by_source {
135 let header = source_header(source);
136 output.push_str(&header);
137 output.push('\n');
138 for text in texts {
139 output.push_str("- ");
140 output.push_str(text);
141 output.push('\n');
142 }
143 output.push('\n');
144 }
145
146 let final_tokens = estimate_tokens(&output);
147
148 CompactedContext {
149 text: output.trim_end().to_string(),
150 entries_retained: retained,
151 entries_dropped: entries.len().saturating_sub(retained),
152 tokens: final_tokens,
153 }
154}
155
156fn compress_entry(content: &str, max_chars: usize) -> String {
158 let mut text = content.trim().to_string();
159
160 text = text.replace("**", "");
162 text = text.replace("## ", "");
163 text = text.replace("### ", "");
164
165 if text.contains('\n') {
167 text = text
168 .lines()
169 .map(|l| l.trim())
170 .filter(|l| !l.is_empty())
171 .collect::<Vec<_>>()
172 .join("; ");
173 }
174
175 if text.starts_with("- ") {
177 text = text[2..].to_string();
178 }
179 if text.starts_with("• ") {
180 text = text["• ".len()..].to_string();
181 }
182
183 while let Some(start) = text.find('[') {
185 if let Some(end) = text[start..].find(']') {
186 let bracket_content = &text[start + 1..start + end];
187 if bracket_content.contains('|')
189 || bracket_content.contains("sim=")
190 || bracket_content.contains("_memory")
191 {
192 text = format!(
193 "{}{}",
194 text[..start].trim_end(),
195 text[start + end + 1..].trim_start()
196 );
197 continue;
198 }
199 }
200 break;
201 }
202
203 if text.len() > max_chars {
205 text = text.chars().take(max_chars).collect();
206 if let Some(last_space) = text.rfind(' ')
208 && last_space > max_chars / 2
209 {
210 text.truncate(last_space);
211 }
212 text.push_str("...");
213 }
214
215 text
216}
217
218fn compute_priority(entry: &ContextEntry) -> f64 {
221 let importance_norm = entry.importance as f64 / 10.0; let relevance = entry.relevance as f64; let recency = if entry.age_seconds == 0 {
226 1.0
227 } else {
228 (-0.693 * entry.age_seconds as f64 / 3600.0).exp() };
230
231 if relevance > 0.1 {
234 0.4 * relevance + 0.3 * importance_norm + 0.3 * recency
235 } else {
236 0.2 * importance_norm + 0.8 * recency
237 }
238}
239
240fn deduplicate(entries: &mut Vec<(f64, String, &str)>, threshold: f64) {
243 let mut i = 0;
244 while i < entries.len() {
245 let mut duplicate = false;
246 for j in 0..i {
247 if text_overlap(&entries[j].1, &entries[i].1) > threshold {
248 duplicate = true;
249 break;
250 }
251 }
252 if duplicate {
253 entries.remove(i);
254 } else {
255 i += 1;
256 }
257 }
258}
259
260pub fn text_overlap_score(a: &str, b: &str) -> f64 {
264 text_overlap(a, b)
265}
266
267fn text_overlap(a: &str, b: &str) -> f64 {
268 let trigrams_a = word_trigrams(a);
269 let trigrams_b = word_trigrams(b);
270 if trigrams_a.is_empty() && trigrams_b.is_empty() {
271 return 1.0;
272 }
273 let intersection = trigrams_a.intersection(&trigrams_b).count();
274 let union = trigrams_a.union(&trigrams_b).count();
275 if union == 0 {
276 0.0
277 } else {
278 intersection as f64 / union as f64
279 }
280}
281
282fn word_trigrams(text: &str) -> std::collections::HashSet<String> {
283 let words: Vec<&str> = text.split_whitespace().collect();
284 if words.len() < 3 {
285 return words.iter().map(|w| w.to_ascii_lowercase()).collect();
286 }
287 words
288 .windows(3)
289 .map(|w| format!("{} {} {}", w[0], w[1], w[2]).to_ascii_lowercase())
290 .collect()
291}
292
293pub fn compact_text(text: &str, max_tokens: usize) -> String {
302 if text.is_empty() || max_tokens == 0 {
303 return String::new();
304 }
305
306 let current_tokens = crate::context::estimate_tokens(text);
307 if current_tokens <= max_tokens {
308 return text.to_string(); }
310
311 let mut output = String::new();
313 let mut used = 0usize;
314
315 for line in text.lines() {
316 let trimmed = line.trim();
317 if trimmed.is_empty() {
318 continue;
319 }
320
321 if trimmed.starts_with('[') && trimmed.ends_with(']') {
323 let header_tokens = crate::context::estimate_tokens(trimmed);
324 if used + header_tokens > max_tokens {
325 break;
326 }
327 if !output.is_empty() {
328 output.push('\n');
329 }
330 output.push_str(trimmed);
331 output.push('\n');
332 used += header_tokens;
333 continue;
334 }
335
336 let compressed = compress_entry(trimmed, 150);
338 let line_tokens = crate::context::estimate_tokens(&compressed);
339 if used + line_tokens > max_tokens {
340 break;
341 }
342 output.push_str(&compressed);
343 output.push('\n');
344 used += line_tokens;
345 }
346
347 output.trim_end().to_string()
348}
349
350fn source_header(source: &str) -> String {
352 match source {
353 "working" => "[Working Memory]".to_string(),
354 "ambient" => "[Recent Activity]".to_string(),
355 "episodic" | "semantic" => "[Relevant Memories]".to_string(),
356 "procedural" => "[Skills]".to_string(),
357 "relationship" => "[Relationships]".to_string(),
358 "checkpoint" => "[Session Context]".to_string(),
359 "hippocampus" => "[Storage]".to_string(),
360 "topic_summary" => "[Earlier Topics]".to_string(),
361 other => format!("[{other}]"),
362 }
363}
364
365#[cfg(test)]
366mod tests {
367 use super::*;
368
369 fn entry(
370 source: &str,
371 content: &str,
372 importance: f32,
373 age_secs: u64,
374 relevance: f32,
375 ) -> ContextEntry {
376 ContextEntry {
377 source: source.to_string(),
378 content: content.to_string(),
379 importance,
380 age_seconds: age_secs,
381 relevance,
382 }
383 }
384
385 #[test]
386 fn empty_input_produces_empty_output() {
387 let result = compact(&[], &CompactionConfig::default());
388 assert!(result.text.is_empty());
389 assert_eq!(result.entries_retained, 0);
390 }
391
392 #[test]
393 fn single_entry_passes_through() {
394 let entries = vec![entry(
395 "episodic",
396 "User asked about workspace cleanup",
397 5.0,
398 300,
399 0.8,
400 )];
401 let result = compact(&entries, &CompactionConfig::default());
402 assert!(result.text.contains("workspace cleanup"));
403 assert_eq!(result.entries_retained, 1);
404 assert_eq!(result.entries_dropped, 0);
405 }
406
407 #[test]
408 fn duplicates_are_removed() {
409 let entries = vec![
410 entry(
411 "episodic",
412 "Agent cleaned up workspace files",
413 5.0,
414 300,
415 0.8,
416 ),
417 entry("ambient", "Agent cleaned up workspace files", 5.0, 300, 0.0),
418 ];
419 let result = compact(&entries, &CompactionConfig::default());
420 assert_eq!(result.entries_retained, 1);
421 assert_eq!(result.entries_dropped, 1);
422 }
423
424 #[test]
425 fn budget_enforced() {
426 let entries: Vec<ContextEntry> = (0..100)
427 .map(|i| {
428 entry(
429 "episodic",
430 &format!("Memory entry number {i} with some content to take up space"),
431 5.0,
432 i * 60,
433 0.5,
434 )
435 })
436 .collect();
437 let config = CompactionConfig {
438 max_tokens: 100,
439 ..Default::default()
440 };
441 let result = compact(&entries, &config);
442 assert!(result.tokens <= 110); assert!(result.entries_retained < 100);
444 assert!(result.entries_dropped > 0);
445 }
446
447 #[test]
448 fn high_priority_entries_retained_first() {
449 let entries = vec![
450 entry("episodic", "Old low-relevance memory", 1.0, 7200, 0.1),
451 entry("ambient", "Very recent high-importance fact", 9.0, 30, 0.0),
452 entry("semantic", "Highly relevant stored fact", 5.0, 3600, 0.9),
453 ];
454 let config = CompactionConfig {
455 max_tokens: 15, ..Default::default()
457 };
458 let result = compact(&entries, &config);
459 assert!(result.entries_retained >= 1);
461 if result.entries_retained < 3 {
464 assert!(!result.text.contains("Old low-relevance"));
465 }
466 }
467
468 #[test]
469 fn compress_strips_formatting() {
470 let raw = "**Important**: This is a [episodic_memory | sim=0.85] formatted entry\nWith multiple lines\nAnd verbose content";
471 let compressed = compress_entry(raw, 200);
472 assert!(!compressed.contains("**"));
473 assert!(!compressed.contains('\n'));
474 assert!(!compressed.contains("sim=0.85"));
475 }
476
477 #[test]
478 fn compress_truncates_long_entries() {
479 let long = "a ".repeat(200);
480 let compressed = compress_entry(&long, 50);
481 assert!(compressed.len() < 60); assert!(compressed.ends_with("..."));
483 }
484
485 #[test]
486 fn priority_favors_recent_relevant_entries() {
487 let recent_relevant = ContextEntry {
488 source: "episodic".into(),
489 content: "test".into(),
490 importance: 5.0,
491 age_seconds: 60,
492 relevance: 0.9,
493 };
494 let old_irrelevant = ContextEntry {
495 source: "episodic".into(),
496 content: "test".into(),
497 importance: 5.0,
498 age_seconds: 86400,
499 relevance: 0.1,
500 };
501 assert!(compute_priority(&recent_relevant) > compute_priority(&old_irrelevant));
502 }
503
504 #[test]
505 fn text_overlap_identical() {
506 assert!((text_overlap("the quick brown fox", "the quick brown fox") - 1.0).abs() < 0.01);
507 }
508
509 #[test]
510 fn text_overlap_different() {
511 assert!(text_overlap("the quick brown fox", "completely different words here") < 0.1);
512 }
513
514 #[test]
515 fn grouped_output_has_section_headers() {
516 let entries = vec![
517 entry("ambient", "Recent thing happened", 5.0, 60, 0.0),
518 entry("procedural", "How to run a scan", 5.0, 3600, 0.7),
519 ];
520 let result = compact(&entries, &CompactionConfig::default());
521 assert!(result.text.contains("[Recent Activity]"));
522 assert!(result.text.contains("[Skills]"));
523 }
524}