1use roboticus_core::config::MemoryConfig;
2use roboticus_db::Database;
3use serde::Serialize;
4use std::collections::HashSet;
5
6use crate::context::{ComplexityLevel, token_budget};
7use crate::memory::MemoryBudgetManager;
8
9#[derive(Debug, Clone, Default, Serialize)]
11pub struct RetrievalMetrics {
12 pub retrieval_count: usize,
14 pub retrieval_hit: bool,
16 pub avg_similarity: f64,
18 pub budget_utilization: f64,
20 pub tiers: MemoryTierBreakdown,
22}
23
24#[derive(Debug, Clone, Default, Serialize)]
26pub struct MemoryTierBreakdown {
27 pub working: usize,
28 pub episodic: usize,
29 pub semantic: usize,
30 pub procedural: usize,
31 pub relationship: usize,
32}
33
34pub struct RetrievalOutput {
36 pub text: String,
38 pub metrics: RetrievalMetrics,
40}
41
42pub struct MemoryRetriever {
44 budget_manager: MemoryBudgetManager,
45 hybrid_weight: f64,
46 similarity_threshold: f64,
47 decay_half_life_days: f64,
51}
52
53impl MemoryRetriever {
54 pub fn new(config: MemoryConfig) -> Self {
55 let hybrid_weight = config.hybrid_weight;
56 let similarity_threshold = config.similarity_threshold;
57 let decay_half_life_days = config.decay_half_life_days;
58 Self {
59 budget_manager: MemoryBudgetManager::new(config),
60 hybrid_weight,
61 similarity_threshold,
62 decay_half_life_days,
63 }
64 }
65
66 pub fn with_decay_half_life(mut self, days: f64) -> Self {
68 self.decay_half_life_days = days;
69 self
70 }
71
72 pub fn retrieve(
75 &self,
76 db: &Database,
77 session_id: &str,
78 query: &str,
79 query_embedding: Option<&[f32]>,
80 complexity: ComplexityLevel,
81 ) -> String {
82 self.retrieve_with_ann(db, session_id, query, query_embedding, complexity, None)
83 }
84
85 pub fn retrieve_with_ann(
88 &self,
89 db: &Database,
90 session_id: &str,
91 query: &str,
92 query_embedding: Option<&[f32]>,
93 complexity: ComplexityLevel,
94 ann_index: Option<&roboticus_db::ann::AnnIndex>,
95 ) -> String {
96 self.retrieve_with_metrics(
97 db,
98 session_id,
99 query,
100 query_embedding,
101 complexity,
102 ann_index,
103 )
104 .text
105 }
106
107 pub fn retrieve_with_metrics(
113 &self,
114 db: &Database,
115 session_id: &str,
116 query: &str,
117 query_embedding: Option<&[f32]>,
118 complexity: ComplexityLevel,
119 ann_index: Option<&roboticus_db::ann::AnnIndex>,
120 ) -> RetrievalOutput {
121 let total_budget = token_budget(complexity);
122 let budgets = self.budget_manager.allocate_budgets(total_budget);
123
124 let mut sections = Vec::new();
125 let mut tiers = MemoryTierBreakdown::default();
126
127 let working_count = if let Some(s) = self.retrieve_working(db, session_id, budgets.working)
128 {
129 let count = s.lines().filter(|l| l.starts_with("- ")).count();
131 sections.push(s);
132 count
133 } else {
134 0
135 };
136 tiers.working = working_count;
137
138 let ambient_count = if let Some(s) = self.retrieve_recent_ambient(db, budgets.episodic / 3)
144 {
145 let count = s.lines().filter(|l| l.starts_with("- ")).count();
146 sections.push(s);
147 count
148 } else {
149 0
150 };
151 tiers.episodic += ambient_count;
152
153 let relevant = if let (Some(ann), Some(emb)) = (ann_index, query_embedding) {
155 ann.search(emb, 10).map(|results| {
156 results
157 .into_iter()
158 .map(|r| roboticus_db::embeddings::SearchResult {
159 source_table: r.source_table,
160 source_id: r.source_id,
161 content_preview: r.content_preview,
162 similarity: r.similarity,
163 })
164 .collect::<Vec<_>>()
165 })
166 } else {
167 None
168 };
169 let mut relevant = relevant.unwrap_or_else(|| {
170 roboticus_db::embeddings::hybrid_search(
171 db,
172 query,
173 query_embedding,
174 10,
175 self.hybrid_weight,
176 )
177 .unwrap_or_default()
178 });
179
180 if self.similarity_threshold > 0.0 {
181 relevant.retain(|r| r.similarity >= self.similarity_threshold);
182 }
183
184 if !query_requests_inactive_memories(query) {
185 self.filter_inactive_memories(db, &mut relevant);
186 }
187
188 if self.decay_half_life_days > 0.0 {
191 self.rerank_episodic_by_decay(db, &mut relevant);
192 }
193
194 let avg_similarity = if relevant.is_empty() {
197 0.0
198 } else {
199 let sum: f64 = relevant.iter().map(|r| r.similarity).sum();
200 sum / relevant.len() as f64
201 };
202
203 for r in &relevant {
205 match r.source_table.as_str() {
206 "episodic_memory" => tiers.episodic += 1,
207 "semantic_memory" => tiers.semantic += 1,
208 _ => {} }
210 }
211
212 if let Some(s) = self.format_relevant(&relevant, budgets.episodic + budgets.semantic) {
213 sections.push(s);
214 }
215
216 let procedural_count = if let Some(s) = self.retrieve_procedural(db, budgets.procedural) {
217 let count = s.lines().filter(|l| l.starts_with("- ")).count();
218 sections.push(s);
219 count
220 } else {
221 0
222 };
223 tiers.procedural = procedural_count;
224
225 let relationship_count =
226 if let Some(s) = self.retrieve_relationships(db, query, budgets.relationship) {
227 let count = s.lines().filter(|l| l.starts_with("- ")).count();
228 sections.push(s);
229 count
230 } else {
231 0
232 };
233 tiers.relationship = relationship_count;
234
235 let index_entries = roboticus_db::memory_index::top_entries(db, 20).unwrap_or_default();
240 let index_text = roboticus_db::memory_index::format_index_for_injection(&index_entries);
241
242 let direct_sections: Vec<&String> = sections
243 .iter()
244 .filter(|s| s.starts_with("[Working Memory]") || s.starts_with("[Recent Activity]"))
245 .collect();
246
247 let text = if direct_sections.is_empty() && index_text.is_empty() {
248 String::new()
249 } else {
250 let mut block = String::new();
251 for section in &direct_sections {
252 block.push_str(section);
253 block.push_str("\n\n");
254 }
255 if !index_text.is_empty() {
256 block.push_str(&index_text);
257 }
258 block.trim_end().to_string()
259 };
260
261 let memory_tokens = estimate_tokens(&text);
262 let retrieval_count =
263 tiers.working + tiers.episodic + tiers.semantic + tiers.procedural + tiers.relationship;
264
265 let metrics = RetrievalMetrics {
266 retrieval_count,
267 retrieval_hit: retrieval_count > 0,
268 avg_similarity,
269 budget_utilization: if total_budget > 0 {
270 memory_tokens as f64 / total_budget as f64
271 } else {
272 0.0
273 },
274 tiers,
275 };
276
277 RetrievalOutput { text, metrics }
278 }
279
280 fn retrieve_working(
281 &self,
282 db: &Database,
283 session_id: &str,
284 budget_tokens: usize,
285 ) -> Option<String> {
286 if budget_tokens == 0 {
287 return None;
288 }
289
290 let entries = roboticus_db::memory::retrieve_working(db, session_id)
291 .inspect_err(
292 |e| tracing::warn!(error = %e, session_id, "working memory retrieval failed"),
293 )
294 .ok()?;
295 if entries.is_empty() {
296 return None;
297 }
298
299 let mut text = String::from("[Working Memory]\n");
300 let mut used = estimate_tokens(&text);
301
302 for entry in &entries {
303 if entry.entry_type.eq_ignore_ascii_case("turn_summary") {
306 continue;
307 }
308 let line = format!("- [{}] {}\n", entry.entry_type, entry.content);
309 let line_tokens = estimate_tokens(&line);
310 if used + line_tokens > budget_tokens {
311 break;
312 }
313 text.push_str(&line);
314 used += line_tokens;
315 }
316
317 if text.len() > "[Working Memory]\n".len() {
318 Some(text)
319 } else {
320 None
321 }
322 }
323
324 fn retrieve_recent_ambient(&self, db: &Database, budget_tokens: usize) -> Option<String> {
330 if budget_tokens == 0 {
331 return None;
332 }
333
334 let entries = roboticus_db::memory::retrieve_recent_episodic(db, 2, 10)
335 .inspect_err(|e| tracing::warn!(error = %e, "recent ambient memory retrieval failed"))
336 .ok()?;
337 if entries.is_empty() {
338 return None;
339 }
340
341 let mut text = String::from("[Recent Activity]\n");
342 let mut used = estimate_tokens(&text);
343
344 for entry in &entries {
345 let time_label = entry.created_at.get(11..16).unwrap_or("??:??");
346 let classification = if entry.classification.is_empty() {
347 "note"
348 } else {
349 &entry.classification
350 };
351 let line = format!(
352 "- [{}] ({}) {}\n",
353 time_label, classification, entry.content,
354 );
355 let line_tokens = estimate_tokens(&line);
356 if used + line_tokens > budget_tokens {
357 break;
358 }
359 text.push_str(&line);
360 used += line_tokens;
361 }
362
363 if text.len() > "[Recent Activity]\n".len() {
364 Some(text)
365 } else {
366 None
367 }
368 }
369
370 fn format_relevant(
371 &self,
372 results: &[roboticus_db::embeddings::SearchResult],
373 budget_tokens: usize,
374 ) -> Option<String> {
375 if budget_tokens == 0 || results.is_empty() {
376 return None;
377 }
378
379 let mut text = String::from("[Relevant Memories]\n");
380 let mut used = estimate_tokens(&text);
381
382 for result in results {
383 let line = format!(
384 "- [{} | sim={:.2}] {}\n",
385 result.source_table, result.similarity, result.content_preview,
386 );
387 let line_tokens = estimate_tokens(&line);
388 if used + line_tokens > budget_tokens {
389 break;
390 }
391 text.push_str(&line);
392 used += line_tokens;
393 }
394
395 if text.len() > "[Relevant Memories]\n".len() {
396 Some(text)
397 } else {
398 None
399 }
400 }
401
402 fn rerank_episodic_by_decay(
409 &self,
410 db: &Database,
411 results: &mut [roboticus_db::embeddings::SearchResult],
412 ) {
413 let now = chrono::Utc::now();
414
415 let episodic_ids: Vec<&str> = results
419 .iter()
420 .filter(|r| r.source_table == "episodic_memory")
421 .map(|r| r.source_id.as_str())
422 .collect();
423
424 if episodic_ids.is_empty() {
425 return;
426 }
427
428 let age_map: std::collections::HashMap<String, f64> = {
430 let conn = db.conn();
431 let placeholders: Vec<String> =
432 (1..=episodic_ids.len()).map(|i| format!("?{i}")).collect();
433 let sql = format!(
434 "SELECT id, created_at FROM episodic_memory WHERE id IN ({})",
435 placeholders.join(", ")
436 );
437 let mut stmt = match conn.prepare(&sql) {
438 Ok(s) => s,
439 Err(_) => return,
440 };
441 let rows = match stmt
442 .query_map(roboticus_db::params_from_iter(episodic_ids.iter()), |row| {
443 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
444 }) {
445 Ok(r) => r,
446 Err(_) => return,
447 };
448 rows.filter_map(|r| {
449 r.inspect_err(|e| tracing::warn!("skipping corrupted episodic row: {e}"))
450 .ok()
451 })
452 .filter_map(|(id, ts)| {
453 chrono::DateTime::parse_from_rfc3339(&ts)
454 .ok()
455 .map(|created| {
456 let age = (now - created.with_timezone(&chrono::Utc))
462 .to_std()
463 .map(|d| d.as_secs_f64() / 86_400.0)
464 .unwrap_or(0.0);
465 (id, age)
466 })
467 })
468 .collect()
469 }; for result in results.iter_mut() {
472 if result.source_table != "episodic_memory" {
473 continue;
474 }
475 if result.source_id.is_empty() {
476 result.similarity *= 0.5;
480 continue;
481 }
482 if let Some(&age) = age_map.get(&result.source_id) {
483 let decay_factor = (0.5_f64).powf(age / self.decay_half_life_days);
484 let clamped = decay_factor.max(0.05);
487 result.similarity *= clamped;
488 }
489 }
490
491 results.sort_by(|a, b| {
493 b.similarity
494 .partial_cmp(&a.similarity)
495 .unwrap_or(std::cmp::Ordering::Equal)
496 });
497 }
498
499 fn filter_inactive_memories(
500 &self,
501 db: &Database,
502 results: &mut Vec<roboticus_db::embeddings::SearchResult>,
503 ) {
504 let episodic_ids: Vec<&str> = results
505 .iter()
506 .filter(|r| r.source_table == "episodic_memory" && !r.source_id.is_empty())
507 .map(|r| r.source_id.as_str())
508 .collect();
509 let semantic_ids: Vec<&str> = results
510 .iter()
511 .filter(|r| r.source_table == "semantic_memory" && !r.source_id.is_empty())
512 .map(|r| r.source_id.as_str())
513 .collect();
514
515 let episodic_inactive = self.load_inactive_ids(db, "episodic_memory", &episodic_ids);
516 let semantic_inactive = self.load_inactive_ids(db, "semantic_memory", &semantic_ids);
517
518 results.retain(|r| match r.source_table.as_str() {
519 "episodic_memory" => !episodic_inactive.contains(r.source_id.as_str()),
520 "semantic_memory" => !semantic_inactive.contains(r.source_id.as_str()),
521 _ => true,
522 });
523 }
524
525 fn load_inactive_ids(&self, db: &Database, table: &str, ids: &[&str]) -> HashSet<String> {
526 if ids.is_empty() {
527 return HashSet::new();
528 }
529
530 let conn = db.conn();
531 let placeholders: Vec<String> = (1..=ids.len()).map(|i| format!("?{i}")).collect();
532 let sql = format!(
533 "SELECT id, memory_state FROM {table} WHERE id IN ({})",
534 placeholders.join(", ")
535 );
536 let mut stmt = match conn.prepare(&sql) {
537 Ok(stmt) => stmt,
538 Err(e) => {
539 tracing::warn!(error = %e, table, "failed to prepare inactive-memory query");
540 return HashSet::new();
541 }
542 };
543 let rows = match stmt.query_map(roboticus_db::params_from_iter(ids.iter()), |row| {
544 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
545 }) {
546 Ok(rows) => rows,
547 Err(e) => {
548 tracing::warn!(error = %e, table, "failed to query inactive memories");
549 return HashSet::new();
550 }
551 };
552
553 let mut inactive = HashSet::new();
554 for row in rows {
555 match row {
556 Ok((id, state)) if !state.eq_ignore_ascii_case("active") => {
557 inactive.insert(id);
558 }
559 Ok(_) => {}
560 Err(e) => tracing::warn!(error = %e, table, "skipping invalid memory-state row"),
561 }
562 }
563 inactive
564 }
565
566 fn retrieve_procedural(&self, db: &Database, budget_tokens: usize) -> Option<String> {
567 if budget_tokens == 0 {
568 return None;
569 }
570
571 let conn = db.conn();
573 let mut stmt = conn
574 .prepare(
575 "SELECT name, steps, success_count, failure_count FROM procedural_memory \
576 WHERE success_count > 0 OR failure_count > 0 \
577 ORDER BY success_count + failure_count DESC LIMIT 5",
578 )
579 .ok()?;
580
581 let rows: Vec<(String, String, i64, i64)> = stmt
582 .query_map([], |row| {
583 Ok((
584 row.get::<_, String>(0)?,
585 row.get::<_, String>(1)?,
586 row.get::<_, i64>(2)?,
587 row.get::<_, i64>(3)?,
588 ))
589 })
590 .inspect_err(|e| tracing::warn!("failed to query tool experience: {e}"))
591 .ok()?
592 .filter_map(|r| {
593 r.inspect_err(|e| tracing::warn!("skipping corrupted tool experience row: {e}"))
594 .ok()
595 })
596 .collect();
597
598 if rows.is_empty() {
599 return None;
600 }
601
602 let mut text = String::from("[Tool Experience]\n");
603 let mut used = estimate_tokens(&text);
604
605 for (name, _steps, successes, failures) in &rows {
606 let total = *successes + *failures;
607 let rate = if total > 0 {
608 (*successes as f64 / total as f64 * 100.0) as u32
609 } else {
610 0
611 };
612 let line = format!("- {name}: {successes}/{total} success ({rate}%)\n");
613 let line_tokens = estimate_tokens(&line);
614 if used + line_tokens > budget_tokens {
615 break;
616 }
617 text.push_str(&line);
618 used += line_tokens;
619 }
620
621 if text.len() > "[Tool Experience]\n".len() {
622 Some(text)
623 } else {
624 None
625 }
626 }
627
628 fn retrieve_relationships(
629 &self,
630 db: &Database,
631 query: &str,
632 budget_tokens: usize,
633 ) -> Option<String> {
634 if budget_tokens == 0 {
635 return None;
636 }
637
638 let conn = db.conn();
639 let mut stmt = conn
640 .prepare(
641 "SELECT entity_id, entity_name, trust_score, interaction_count \
642 FROM relationship_memory ORDER BY interaction_count DESC LIMIT 5",
643 )
644 .ok()?;
645
646 let rows: Vec<(String, Option<String>, f64, i64)> = stmt
647 .query_map([], |row| {
648 Ok((
649 row.get::<_, String>(0)?,
650 row.get::<_, Option<String>>(1)?,
651 row.get::<_, f64>(2)?,
652 row.get::<_, i64>(3)?,
653 ))
654 })
655 .inspect_err(|e| tracing::warn!("failed to query relationship memory: {e}"))
656 .ok()?
657 .filter_map(|r| {
658 r.inspect_err(|e| tracing::warn!("skipping corrupted relationship row: {e}"))
659 .ok()
660 })
661 .collect();
662
663 if rows.is_empty() {
664 return None;
665 }
666
667 let query_lower = query.to_lowercase();
669 let relevant: Vec<_> = rows
670 .into_iter()
671 .filter(|(id, name, _, count)| {
672 *count > 2
673 || query_lower.contains(&id.to_lowercase())
674 || name
675 .as_ref()
676 .is_some_and(|n| query_lower.contains(&n.to_lowercase()))
677 })
678 .collect();
679
680 if relevant.is_empty() {
681 return None;
682 }
683
684 let mut text = String::from("[Known Entities]\n");
685 let mut used = estimate_tokens(&text);
686
687 for (entity_id, name, trust, count) in &relevant {
688 let display = name.as_deref().unwrap_or(entity_id);
689 let line = format!("- {display}: trust={trust:.1}, interactions={count}\n");
690 let line_tokens = estimate_tokens(&line);
691 if used + line_tokens > budget_tokens {
692 break;
693 }
694 text.push_str(&line);
695 used += line_tokens;
696 }
697
698 if text.len() > "[Known Entities]\n".len() {
699 Some(text)
700 } else {
701 None
702 }
703 }
704}
705
706fn query_requests_inactive_memories(query: &str) -> bool {
707 let lower = query.to_ascii_lowercase();
708 [
709 "history",
710 "historical",
711 "previous",
712 "previously",
713 "earlier",
714 "before",
715 "past",
716 "old",
717 "resolved",
718 "stale",
719 "archive",
720 "archived",
721 ]
722 .iter()
723 .any(|term| lower.contains(term))
724}
725
726fn estimate_tokens(text: &str) -> usize {
727 text.len().div_ceil(4)
728}
729
730pub struct ChunkConfig {
733 pub max_tokens: usize,
734 pub overlap_tokens: usize,
735}
736
737impl Default for ChunkConfig {
738 fn default() -> Self {
739 Self {
740 max_tokens: 512,
741 overlap_tokens: 64,
742 }
743 }
744}
745
746pub struct Chunk {
747 pub text: String,
748 pub index: usize,
749 pub start_char: usize,
750 pub end_char: usize,
751}
752
753fn floor_char_boundary(text: &str, pos: usize) -> usize {
755 if pos >= text.len() {
756 return text.len();
757 }
758 let mut p = pos;
759 while p > 0 && !text.is_char_boundary(p) {
760 p -= 1;
761 }
762 p
763}
764
765pub fn chunk_text(text: &str, config: &ChunkConfig) -> Vec<Chunk> {
767 if text.is_empty() || config.max_tokens == 0 {
768 return Vec::new();
769 }
770
771 let max_bytes = config.max_tokens * 4;
772 let overlap_bytes = config.overlap_tokens * 4;
773
774 if text.len() <= max_bytes {
775 return vec![Chunk {
776 text: text.to_string(),
777 index: 0,
778 start_char: 0,
779 end_char: text.len(),
780 }];
781 }
782
783 let step = max_bytes.saturating_sub(overlap_bytes).max(1);
784 let mut chunks = Vec::new();
785 let mut start = 0;
786
787 while start < text.len() {
788 let raw_end = floor_char_boundary(text, (start + max_bytes).min(text.len()));
789
790 let end = find_break_point(text, start, raw_end);
791
792 chunks.push(Chunk {
793 text: text[start..end].to_string(),
794 index: chunks.len(),
795 start_char: start,
796 end_char: end,
797 });
798
799 if end >= text.len() {
800 break;
801 }
802
803 let advance = step.min(end - start).max(1);
804 start = floor_char_boundary(text, start + advance);
805 }
806
807 chunks
808}
809
810fn find_break_point(text: &str, start: usize, raw_end: usize) -> usize {
811 if raw_end >= text.len() {
812 return text.len();
813 }
814
815 let search_start = floor_char_boundary(text, start + (raw_end - start) / 2);
816 let window = &text[search_start..raw_end];
817
818 if let Some(pos) = window.rfind("\n\n") {
819 return search_start + pos + 2;
820 }
821 for delim in [". ", ".\n", "? ", "! "] {
822 if let Some(pos) = window.rfind(delim) {
823 return search_start + pos + delim.len();
824 }
825 }
826 if let Some(pos) = window.rfind(' ') {
827 return search_start + pos + 1;
828 }
829
830 raw_end
831}
832
833#[cfg(test)]
834mod tests {
835 use super::*;
836
837 fn test_db() -> Database {
838 Database::new(":memory:").unwrap()
839 }
840
841 fn default_config() -> MemoryConfig {
842 MemoryConfig::default()
843 }
844
845 #[test]
846 fn retriever_empty_db_returns_empty() {
847 let db = test_db();
848 let retriever = MemoryRetriever::new(default_config());
849 let session_id = roboticus_db::sessions::find_or_create(&db, "test-agent", None).unwrap();
850 let result = retriever.retrieve(&db, &session_id, "hello", None, ComplexityLevel::L1);
851 assert!(result.is_empty());
852 }
853
854 #[test]
855 fn retriever_returns_working_memory() {
856 let db = test_db();
857 let retriever = MemoryRetriever::new(default_config());
858 let session_id = roboticus_db::sessions::find_or_create(&db, "test-agent", None).unwrap();
859
860 roboticus_db::memory::store_working(&db, &session_id, "goal", "find documentation", 8)
861 .unwrap();
862
863 let result = retriever.retrieve(&db, &session_id, "hello", None, ComplexityLevel::L2);
864 assert!(result.contains("Working Memory"));
865 assert!(result.contains("find documentation"));
866 }
867
868 #[test]
869 fn retriever_skips_turn_summary_working_entries() {
870 let db = test_db();
871 let retriever = MemoryRetriever::new(default_config());
872 let session_id = roboticus_db::sessions::find_or_create(&db, "test-agent", None).unwrap();
873
874 roboticus_db::memory::store_working(
875 &db,
876 &session_id,
877 "turn_summary",
878 "Good to be back on familiar ground.",
879 9,
880 )
881 .unwrap();
882 roboticus_db::memory::store_working(&db, &session_id, "goal", "fix Telegram loop", 8)
883 .unwrap();
884
885 let result = retriever.retrieve(&db, &session_id, "telegram", None, ComplexityLevel::L2);
886 assert!(result.contains("Working Memory"));
887 assert!(result.contains("fix Telegram loop"));
888 assert!(!result.contains("Good to be back on familiar ground."));
889 }
890
891 #[test]
892 fn retriever_returns_relevant_memories() {
893 let db = test_db();
894 let retriever = MemoryRetriever::new(default_config());
895 let session_id = roboticus_db::sessions::find_or_create(&db, "test-agent", None).unwrap();
896
897 let id = roboticus_db::memory::store_semantic(&db, "facts", "sky", "the sky is blue", 0.9)
898 .unwrap();
899 roboticus_db::memory_index::upsert_index_entry(
901 &db,
902 "semantic_memory",
903 &id,
904 "the sky is blue",
905 Some("facts"),
906 )
907 .unwrap();
908
909 let result = retriever.retrieve(&db, &session_id, "sky", None, ComplexityLevel::L2);
910 assert!(
911 result.contains("[Memory Index"),
912 "index-only injection should contain the memory index header"
913 );
914 assert!(
915 result.contains("the sky is blue"),
916 "index entry summary should appear in output"
917 );
918 }
919
920 #[test]
921 fn retriever_returns_procedural_experience() {
922 let db = test_db();
923 let retriever = MemoryRetriever::new(default_config());
924 let session_id = roboticus_db::sessions::find_or_create(&db, "test-agent", None).unwrap();
925
926 roboticus_db::memory::store_procedural(&db, "web_search", "search the web").unwrap();
927 roboticus_db::memory::record_procedural_success(&db, "web_search").unwrap();
928 roboticus_db::memory::record_procedural_success(&db, "web_search").unwrap();
929
930 let output = retriever.retrieve_with_metrics(
932 &db,
933 &session_id,
934 "search",
935 None,
936 ComplexityLevel::L2,
937 None,
938 );
939 assert!(
940 output.metrics.tiers.procedural >= 1,
941 "procedural tier should count the stored tool experience"
942 );
943 }
944
945 #[test]
946 fn retriever_returns_relationships() {
947 let db = test_db();
948 let retriever = MemoryRetriever::new(default_config());
949 let session_id = roboticus_db::sessions::find_or_create(&db, "test-agent", None).unwrap();
950
951 roboticus_db::memory::store_relationship(&db, "user-1", "Jon", 0.9).unwrap();
952 let output = retriever.retrieve_with_metrics(
954 &db,
955 &session_id,
956 "Jon",
957 None,
958 ComplexityLevel::L2,
959 None,
960 );
961 assert!(
962 output.metrics.tiers.relationship >= 1,
963 "relationship tier should count the stored entity"
964 );
965 }
966
967 #[test]
968 fn retriever_respects_zero_budget() {
969 let config = MemoryConfig {
970 working_budget_pct: 0.0,
971 episodic_budget_pct: 0.0,
972 semantic_budget_pct: 0.0,
973 procedural_budget_pct: 0.0,
974 relationship_budget_pct: 100.0,
975 ..default_config()
976 };
977 let db = test_db();
978 let retriever = MemoryRetriever::new(config);
979 let session_id = roboticus_db::sessions::find_or_create(&db, "test-agent", None).unwrap();
980
981 roboticus_db::memory::store_working(&db, &session_id, "goal", "test", 5).unwrap();
982
983 let result = retriever.retrieve(&db, &session_id, "test", None, ComplexityLevel::L0);
984 assert!(!result.contains("Working Memory"));
985 }
986
987 #[test]
988 fn retriever_similarity_threshold_filters_low_similarity_results() {
989 let config = MemoryConfig {
990 similarity_threshold: 0.4,
991 ..default_config()
992 };
993 let db = test_db();
994 let retriever = MemoryRetriever::new(config);
995 let session_id = roboticus_db::sessions::find_or_create(&db, "test-agent", None).unwrap();
996
997 let active_id = roboticus_db::memory::store_semantic(
998 &db,
999 "facts",
1000 "high-match",
1001 "deployment rollback stabilizes the incident",
1002 0.9,
1003 )
1004 .unwrap();
1005 let low_id = roboticus_db::memory::store_semantic(
1006 &db,
1007 "facts",
1008 "low-match",
1009 "botanical orchids in alpine valleys",
1010 0.9,
1011 )
1012 .unwrap();
1013
1014 roboticus_db::embeddings::store_embedding(
1015 &db,
1016 "emb-high",
1017 "semantic_memory",
1018 &active_id,
1019 "deployment rollback stabilizes the incident",
1020 &[1.0, 0.0],
1021 )
1022 .unwrap();
1023 roboticus_db::embeddings::store_embedding(
1024 &db,
1025 "emb-low",
1026 "semantic_memory",
1027 &low_id,
1028 "botanical orchids in alpine valleys",
1029 &[-1.0, 0.0],
1030 )
1031 .unwrap();
1032
1033 roboticus_db::memory_index::upsert_index_entry(
1035 &db,
1036 "semantic_memory",
1037 &active_id,
1038 "deployment rollback stabilizes the incident",
1039 Some("facts"),
1040 )
1041 .unwrap();
1042 roboticus_db::memory_index::upsert_index_entry(
1043 &db,
1044 "semantic_memory",
1045 &low_id,
1046 "botanical orchids in alpine valleys",
1047 Some("facts"),
1048 )
1049 .unwrap();
1050
1051 let output = retriever.retrieve_with_metrics(
1052 &db,
1053 &session_id,
1054 "deployment rollback stabilizes the incident",
1055 Some(&[1.0, 0.0]),
1056 ComplexityLevel::L2,
1057 None,
1058 );
1059 assert!(
1061 output.metrics.avg_similarity >= 0.4,
1062 "avg similarity should be above the configured threshold"
1063 );
1064 assert!(
1066 output.metrics.tiers.semantic >= 1,
1067 "at least the high-similarity match should be counted"
1068 );
1069 }
1070
1071 #[test]
1074 fn chunk_empty_text() {
1075 let chunks = chunk_text("", &ChunkConfig::default());
1076 assert!(chunks.is_empty());
1077 }
1078
1079 #[test]
1080 fn chunk_short_text() {
1081 let text = "This is a short sentence.";
1082 let chunks = chunk_text(text, &ChunkConfig::default());
1083 assert_eq!(chunks.len(), 1);
1084 assert_eq!(chunks[0].text, text);
1085 assert_eq!(chunks[0].index, 0);
1086 }
1087
1088 #[test]
1089 fn chunk_long_text_produces_overlapping_chunks() {
1090 let text = "word ".repeat(1000);
1091 let config = ChunkConfig {
1092 max_tokens: 50,
1093 overlap_tokens: 10,
1094 };
1095 let chunks = chunk_text(&text, &config);
1096 assert!(chunks.len() > 1);
1097
1098 for (i, chunk) in chunks.iter().enumerate() {
1099 assert_eq!(chunk.index, i);
1100 assert!(!chunk.text.is_empty());
1101 }
1102
1103 for i in 1..chunks.len() {
1105 assert!(chunks[i].start_char < chunks[i - 1].end_char);
1106 }
1107 }
1108
1109 #[test]
1110 fn chunk_respects_sentence_boundaries() {
1111 let text = "First sentence. Second sentence. Third sentence. Fourth sentence. Fifth sentence. \
1112 Sixth sentence. Seventh sentence. Eighth sentence. Ninth sentence. Tenth sentence.";
1113 let config = ChunkConfig {
1114 max_tokens: 20,
1115 overlap_tokens: 5,
1116 };
1117 let chunks = chunk_text(text, &config);
1118 for chunk in &chunks {
1120 if chunk.end_char < text.len() {
1121 let ends_at_boundary = chunk.text.ends_with(". ")
1122 || chunk.text.ends_with('.')
1123 || chunk.text.ends_with(' ');
1124 assert!(
1125 ends_at_boundary,
1126 "chunk should end at a boundary: {:?}",
1127 &chunk.text[chunk.text.len().saturating_sub(10)..]
1128 );
1129 }
1130 }
1131 }
1132
1133 #[test]
1134 fn chunk_covers_full_text() {
1135 let text = "a ".repeat(500);
1136 let config = ChunkConfig {
1137 max_tokens: 25,
1138 overlap_tokens: 5,
1139 };
1140 let chunks = chunk_text(&text, &config);
1141
1142 assert_eq!(chunks.first().unwrap().start_char, 0);
1143 assert_eq!(chunks.last().unwrap().end_char, text.len());
1144 }
1145
1146 #[test]
1147 fn chunk_zero_max_tokens() {
1148 let chunks = chunk_text(
1149 "some text",
1150 &ChunkConfig {
1151 max_tokens: 0,
1152 overlap_tokens: 0,
1153 },
1154 );
1155 assert!(chunks.is_empty());
1156 }
1157
1158 #[test]
1159 fn estimate_tokens_basic() {
1160 assert_eq!(estimate_tokens(""), 0);
1161 assert_eq!(estimate_tokens("abcd"), 1);
1162 assert_eq!(estimate_tokens("hello world!"), 3);
1163 }
1164
1165 #[test]
1166 fn chunk_multibyte_does_not_panic() {
1167 let text = "Hello \u{1F600} world. ".repeat(200);
1168 let config = ChunkConfig {
1169 max_tokens: 20,
1170 overlap_tokens: 5,
1171 };
1172 let chunks = chunk_text(&text, &config);
1173 assert!(chunks.len() > 1);
1174 for chunk in &chunks {
1175 assert!(!chunk.text.is_empty());
1176 let _ = chunk.text.as_bytes();
1178 }
1179 }
1180
1181 #[test]
1182 fn chunk_cjk_text() {
1183 let text = "\u{4F60}\u{597D}\u{4E16}\u{754C} ".repeat(300);
1184 let config = ChunkConfig {
1185 max_tokens: 15,
1186 overlap_tokens: 3,
1187 };
1188 let chunks = chunk_text(&text, &config);
1189 assert!(chunks.len() > 1);
1190 assert_eq!(chunks.first().unwrap().start_char, 0);
1191 assert_eq!(chunks.last().unwrap().end_char, text.len());
1192 }
1193
1194 #[test]
1195 fn floor_char_boundary_ascii() {
1196 let text = "hello world";
1197 assert_eq!(floor_char_boundary(text, 5), 5);
1198 assert_eq!(floor_char_boundary(text, 0), 0);
1199 assert_eq!(floor_char_boundary(text, 100), text.len());
1200 }
1201
1202 #[test]
1203 fn floor_char_boundary_multibyte() {
1204 let text = "caf\u{00E9}";
1206 assert_eq!(text.len(), 5);
1207 assert_eq!(floor_char_boundary(text, 4), 3);
1209 assert_eq!(floor_char_boundary(text, 3), 3);
1211 assert_eq!(floor_char_boundary(text, 5), 5);
1213 }
1214
1215 #[test]
1216 fn floor_char_boundary_emoji() {
1217 let text = "a\u{1F600}b"; assert_eq!(text.len(), 6);
1219 assert_eq!(floor_char_boundary(text, 2), 1);
1221 assert_eq!(floor_char_boundary(text, 5), 5);
1223 }
1224
1225 #[test]
1226 fn estimate_tokens_rounding() {
1227 assert_eq!(estimate_tokens("a"), 1);
1229 assert_eq!(estimate_tokens("abcde"), 2);
1231 assert_eq!(estimate_tokens("abcdefgh"), 2);
1233 }
1234
1235 #[test]
1236 fn retriever_with_procedural_no_history() {
1237 let db = test_db();
1239 let retriever = MemoryRetriever::new(default_config());
1240 let session_id = roboticus_db::sessions::find_or_create(&db, "test-agent", None).unwrap();
1241
1242 roboticus_db::memory::store_procedural(&db, "unused_tool", "a tool").unwrap();
1243
1244 let result = retriever.retrieve(&db, &session_id, "test", None, ComplexityLevel::L2);
1245 assert!(
1246 !result.contains("Tool Experience"),
1247 "tools with no success/failure should not appear"
1248 );
1249 }
1250
1251 #[test]
1252 fn chunk_with_paragraph_breaks() {
1253 let text = "Paragraph one content.\n\nParagraph two content.\n\nParagraph three content.\n\n\
1254 Paragraph four content.\n\nParagraph five content.";
1255 let config = ChunkConfig {
1256 max_tokens: 15,
1257 overlap_tokens: 3,
1258 };
1259 let chunks = chunk_text(text, &config);
1260 for chunk in &chunks {
1262 if chunk.end_char < text.len() {
1263 let last_few = &chunk.text[chunk.text.len().saturating_sub(5)..];
1265 let has_good_break =
1266 last_few.contains('\n') || last_few.contains(". ") || last_few.ends_with(' ');
1267 assert!(has_good_break, "chunk should end at a reasonable boundary");
1268 }
1269 }
1270 }
1271
1272 #[test]
1273 fn chunk_config_default() {
1274 let config = ChunkConfig::default();
1275 assert_eq!(config.max_tokens, 512);
1276 assert_eq!(config.overlap_tokens, 64);
1277 }
1278
1279 #[test]
1280 fn find_break_point_at_end_of_text() {
1281 let text = "Hello world.";
1282 assert_eq!(find_break_point(text, 0, text.len()), text.len());
1283 }
1284
1285 #[test]
1286 fn retriever_relationships_high_interaction_count() {
1287 let db = test_db();
1288 let retriever = MemoryRetriever::new(default_config());
1289 let session_id = roboticus_db::sessions::find_or_create(&db, "test-agent", None).unwrap();
1290
1291 for _ in 0..4 {
1294 roboticus_db::memory::store_relationship(&db, "alice", "Alice Smith", 0.8).unwrap();
1295 }
1296
1297 let output = retriever.retrieve_with_metrics(
1299 &db,
1300 &session_id,
1301 "some random query",
1302 None,
1303 ComplexityLevel::L2,
1304 None,
1305 );
1306 assert!(
1307 output.metrics.tiers.relationship >= 1,
1308 "high interaction count entity should be retrieved into relationship tier"
1309 );
1310 }
1311
1312 #[test]
1313 fn retriever_suppresses_stale_digests_by_default() {
1314 let db = test_db();
1315 let retriever = MemoryRetriever::new(default_config());
1316 let session_id = roboticus_db::sessions::find_or_create(&db, "agent-1", None).unwrap();
1317 let stale_id = roboticus_db::memory::store_episodic_with_meta(
1318 &db,
1319 "digest",
1320 "[Session Digest] alpha rollout incident resolved",
1321 9,
1322 Some("agent-1"),
1323 "active",
1324 None,
1325 )
1326 .unwrap();
1327 roboticus_db::memory::mark_episodic_digests_stale_for_owner(
1328 &db,
1329 "agent-1",
1330 "newer-digest",
1331 "superseded",
1332 )
1333 .unwrap();
1334 let conn = db.conn();
1335 conn.execute(
1336 "UPDATE episodic_memory SET memory_state = 'stale' WHERE id = ?1",
1337 [stale_id],
1338 )
1339 .unwrap();
1340 drop(conn);
1341 roboticus_db::memory::store_episodic_with_meta(
1342 &db,
1343 "digest",
1344 "[Session Digest] beta stabilization plan active",
1345 9,
1346 Some("agent-1"),
1347 "active",
1348 None,
1349 )
1350 .unwrap();
1351
1352 let output = retriever.retrieve_with_metrics(
1353 &db,
1354 &session_id,
1355 "alpha beta digest",
1356 None,
1357 ComplexityLevel::L2,
1358 None,
1359 );
1360 assert!(
1366 output.metrics.tiers.episodic >= 1,
1367 "active digest should be retrieved"
1368 );
1369 }
1370
1371 #[test]
1372 fn retriever_includes_stale_digests_when_history_requested() {
1373 let db = test_db();
1374 let retriever = MemoryRetriever::new(default_config());
1375 let session_id = roboticus_db::sessions::find_or_create(&db, "agent-1", None).unwrap();
1376 roboticus_db::memory::store_episodic_with_meta(
1377 &db,
1378 "digest",
1379 "[Session Digest] alpha rollout incident resolved",
1380 9,
1381 Some("agent-1"),
1382 "stale",
1383 Some("superseded"),
1384 )
1385 .unwrap();
1386 roboticus_db::memory::store_episodic_with_meta(
1387 &db,
1388 "digest",
1389 "[Session Digest] beta stabilization plan active",
1390 9,
1391 Some("agent-1"),
1392 "active",
1393 None,
1394 )
1395 .unwrap();
1396
1397 let output = retriever.retrieve_with_metrics(
1399 &db,
1400 &session_id,
1401 "show previous history for the alpha beta digest",
1402 None,
1403 ComplexityLevel::L2,
1404 None,
1405 );
1406 assert!(
1408 output.metrics.tiers.episodic >= 2,
1409 "history query should include both stale and active digests: got {}",
1410 output.metrics.tiers.episodic
1411 );
1412 }
1413
1414 #[test]
1415 fn retriever_suppresses_stale_semantic_summaries_by_default() {
1416 let db = test_db();
1417 let retriever = MemoryRetriever::new(default_config());
1418 let session_id = roboticus_db::sessions::find_or_create(&db, "agent-1", None).unwrap();
1419
1420 roboticus_db::memory::store_semantic(
1421 &db,
1422 "learned",
1423 "session:agent-1:alpha",
1424 "alpha policy was retired after the incident",
1425 0.8,
1426 )
1427 .unwrap();
1428 let active_id = roboticus_db::memory::store_semantic(
1429 &db,
1430 "learned",
1431 "session:agent-1:beta",
1432 "beta policy is active with the latest safeguards",
1433 0.9,
1434 )
1435 .unwrap();
1436 roboticus_db::memory::mark_semantic_stale_by_category_and_key_prefix(
1437 &db,
1438 "learned",
1439 "session:agent-1:",
1440 &active_id,
1441 "superseded_by_newer_session_summary",
1442 )
1443 .unwrap();
1444
1445 let output = retriever.retrieve_with_metrics(
1447 &db,
1448 &session_id,
1449 "alpha beta policy safeguards",
1450 None,
1451 ComplexityLevel::L2,
1452 None,
1453 );
1454 assert!(
1457 output.metrics.tiers.semantic <= 1,
1458 "stale semantic summaries should be suppressed: got {} semantic",
1459 output.metrics.tiers.semantic
1460 );
1461 }
1462
1463 #[test]
1464 fn retriever_includes_stale_semantic_summaries_when_history_requested() {
1465 let db = test_db();
1466 let retriever = MemoryRetriever::new(default_config());
1467 let session_id = roboticus_db::sessions::find_or_create(&db, "agent-1", None).unwrap();
1468
1469 roboticus_db::memory::store_semantic(
1470 &db,
1471 "learned",
1472 "session:agent-1:alpha",
1473 "alpha policy was retired after the incident",
1474 0.8,
1475 )
1476 .unwrap();
1477 let active_id = roboticus_db::memory::store_semantic(
1478 &db,
1479 "learned",
1480 "session:agent-1:beta",
1481 "beta policy is active with the latest safeguards",
1482 0.9,
1483 )
1484 .unwrap();
1485 roboticus_db::memory::mark_semantic_stale_by_category_and_key_prefix(
1486 &db,
1487 "learned",
1488 "session:agent-1:",
1489 &active_id,
1490 "superseded_by_newer_session_summary",
1491 )
1492 .unwrap();
1493
1494 let output = retriever.retrieve_with_metrics(
1496 &db,
1497 &session_id,
1498 "show history of the alpha beta policy change",
1499 None,
1500 ComplexityLevel::L2,
1501 None,
1502 );
1503 assert!(
1505 output.metrics.tiers.semantic >= 2,
1506 "history query should include stale semantic summaries: got {}",
1507 output.metrics.tiers.semantic
1508 );
1509 }
1510
1511 #[test]
1512 fn retrieve_with_metrics_empty_db() {
1513 let db = test_db();
1514 let retriever = MemoryRetriever::new(default_config());
1515 let session_id = roboticus_db::sessions::find_or_create(&db, "test-agent", None).unwrap();
1516 let output = retriever.retrieve_with_metrics(
1517 &db,
1518 &session_id,
1519 "hello",
1520 None,
1521 ComplexityLevel::L1,
1522 None,
1523 );
1524 assert!(output.text.is_empty());
1525 assert!(!output.metrics.retrieval_hit);
1526 assert_eq!(output.metrics.retrieval_count, 0);
1527 assert_eq!(output.metrics.avg_similarity, 0.0);
1528 assert_eq!(output.metrics.budget_utilization, 0.0);
1529 }
1530
1531 #[test]
1532 fn retrieve_with_metrics_working_memory_counted() {
1533 let db = test_db();
1534 let retriever = MemoryRetriever::new(default_config());
1535 let session_id = roboticus_db::sessions::find_or_create(&db, "test-agent", None).unwrap();
1536
1537 roboticus_db::memory::store_working(&db, &session_id, "goal", "fix the pipeline", 8)
1538 .unwrap();
1539 roboticus_db::memory::store_working(&db, &session_id, "note", "version 0.11", 7).unwrap();
1540
1541 let output = retriever.retrieve_with_metrics(
1542 &db,
1543 &session_id,
1544 "hello",
1545 None,
1546 ComplexityLevel::L2,
1547 None,
1548 );
1549 assert!(output.metrics.retrieval_hit);
1550 assert!(
1551 output.metrics.tiers.working >= 2,
1552 "working tier count should reflect stored entries"
1553 );
1554 assert!(output.metrics.retrieval_count >= 2);
1555 assert!(output.metrics.budget_utilization > 0.0);
1556
1557 let json = serde_json::to_string(&output.metrics.tiers).unwrap();
1559 let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
1560 assert!(parsed["working"].as_u64().unwrap() >= 2);
1561 }
1562
1563 #[test]
1564 fn retrieve_with_metrics_procedural_counted() {
1565 let db = test_db();
1566 let retriever = MemoryRetriever::new(default_config());
1567 let session_id = roboticus_db::sessions::find_or_create(&db, "test-agent", None).unwrap();
1568
1569 roboticus_db::memory::store_procedural(&db, "web_search", "search the web").unwrap();
1570 roboticus_db::memory::record_procedural_success(&db, "web_search").unwrap();
1571
1572 let output = retriever.retrieve_with_metrics(
1573 &db,
1574 &session_id,
1575 "search",
1576 None,
1577 ComplexityLevel::L2,
1578 None,
1579 );
1580 assert!(output.metrics.tiers.procedural >= 1);
1581 }
1582}