1use std::future::Future;
17use std::pin::Pin;
18
19use futures::StreamExt as _;
20use futures::stream::FuturesUnordered;
21
22use zeph_common::memory::{AsyncMemoryRouter, CompressionLevel, GraphRecallParams, TokenCounting};
23use zeph_llm::provider::{Message, MessageMetadata, MessagePart, Role};
24
25use crate::error::AssemblerError;
26use crate::input::ContextAssemblyInput;
27use crate::slot::ContextSlot;
28
29pub(crate) fn levels_to_flags(levels: &[CompressionLevel]) -> (bool, bool, bool) {
37 if levels.is_empty() {
38 return (true, true, true);
39 }
40 let episodic = levels.contains(&CompressionLevel::Episodic);
41 let procedural = levels.contains(&CompressionLevel::Procedural);
42 let declarative = levels.contains(&CompressionLevel::Declarative);
43 (episodic, procedural, declarative)
44}
45
46pub const SUMMARY_PREFIX: &str = "[conversation summaries]\n";
48pub const CROSS_SESSION_PREFIX: &str = "[cross-session context]\n";
50pub const RECALL_PREFIX: &str = "[semantic recall]\n";
52pub const CORRECTIONS_PREFIX: &str = "[past corrections]\n";
54pub const DOCUMENT_RAG_PREFIX: &str = "## Relevant documents\n";
56pub const GRAPH_FACTS_PREFIX: &str = "[known facts]\n";
58
59#[derive(Default)]
64pub struct PreparedContext {
65 pub graph_facts: Option<Message>,
67 pub doc_rag: Option<Message>,
69 pub corrections: Option<Message>,
71 pub recall: Option<Message>,
73 pub recall_confidence: Option<f32>,
75 pub cross_session: Option<Message>,
77 pub summaries: Option<Message>,
79 pub code_context: Option<String>,
81 pub persona_facts: Option<Message>,
83 pub trajectory_hints: Option<Message>,
85 pub tree_memory: Option<Message>,
87 pub reasoning_hints: Option<Message>,
89 pub memory_first: bool,
91 pub recent_history_budget: usize,
93 pub background_tasks: Vec<tokio::task::JoinHandle<()>>,
98}
99
100pub struct ContextAssembler;
104
105type CtxFuture<'a> = Pin<Box<dyn Future<Output = Result<ContextSlot, AssemblerError>> + Send + 'a>>;
106
107fn empty_prepared_context() -> PreparedContext {
108 PreparedContext::default()
109}
110
111fn resolve_effective_strategy(
112 memory: &crate::input::ContextMemoryView,
113 sidequest_turn_counter: u64,
114) -> zeph_config::ContextStrategy {
115 match memory.context_strategy {
116 zeph_config::ContextStrategy::MemoryFirst => zeph_config::ContextStrategy::MemoryFirst,
117 zeph_config::ContextStrategy::Adaptive => {
118 if sidequest_turn_counter >= u64::from(memory.crossover_turn_threshold) {
119 zeph_config::ContextStrategy::MemoryFirst
120 } else {
121 zeph_config::ContextStrategy::FullHistory
122 }
123 }
124 _ => zeph_config::ContextStrategy::FullHistory,
125 }
126}
127
128fn correction_params(cfg: Option<&crate::input::CorrectionConfig>) -> (usize, f32) {
129 cfg.filter(|c| c.correction_detection)
130 .map_or((3, 0.75), |c| {
131 (
132 c.correction_recall_limit as usize,
133 c.correction_min_similarity,
134 )
135 })
136}
137
138#[allow(clippy::too_many_arguments)]
145fn schedule_context_fetchers<'r>(
146 memory: &'r crate::input::ContextMemoryView,
147 tc: &'r dyn TokenCounting,
148 query: &'r str,
149 scrub: fn(&str) -> std::borrow::Cow<'_, str>,
150 index: Option<&'r dyn crate::input::IndexAccess>,
151 router_ref: &'r dyn AsyncMemoryRouter,
152 summaries_budget: usize,
153 cross_session_budget: usize,
154 semantic_recall_budget: usize,
155 code_context_budget: usize,
156 graph_facts_budget: usize,
157 recall_limit: usize,
158 min_sim: f32,
159 active_levels: &[CompressionLevel],
160) -> FuturesUnordered<CtxFuture<'r>> {
161 let (episodic_active, procedural_active, declarative_active) = levels_to_flags(active_levels);
165
166 let fetchers: FuturesUnordered<CtxFuture<'r>> = FuturesUnordered::new();
167
168 if episodic_active && summaries_budget > 0 {
169 fetchers.push(Box::pin(async move {
170 fetch_summaries(memory, summaries_budget, tc)
171 .await
172 .map(ContextSlot::Summaries)
173 }));
174 }
175 if episodic_active && cross_session_budget > 0 {
176 fetchers.push(Box::pin(async move {
177 fetch_cross_session(memory, query, cross_session_budget, tc)
178 .await
179 .map(ContextSlot::CrossSession)
180 }));
181 }
182 if episodic_active && semantic_recall_budget > 0 {
183 fetchers.push(Box::pin(async move {
184 fetch_semantic_recall(memory, query, semantic_recall_budget, tc, Some(router_ref))
185 .await
186 .map(|(msg, score)| ContextSlot::SemanticRecall(msg, score))
187 }));
188 fetchers.push(Box::pin(async move {
189 fetch_document_rag(memory, query, semantic_recall_budget, tc)
190 .await
191 .map(ContextSlot::DocumentRag)
192 }));
193 }
194 fetchers.push(Box::pin(async move {
196 fetch_corrections(memory, query, recall_limit, min_sim, scrub)
197 .await
198 .map(ContextSlot::Corrections)
199 }));
200 if code_context_budget > 0
202 && let Some(idx) = index
203 {
204 fetchers.push(Box::pin(async move {
205 let result: Result<Option<String>, AssemblerError> =
206 idx.fetch_code_rag(query, code_context_budget).await;
207 result.map(ContextSlot::CodeContext)
208 }));
209 }
210 if declarative_active && graph_facts_budget > 0 {
211 fetchers.push(Box::pin(async move {
212 fetch_graph_facts(memory, query, graph_facts_budget, tc)
213 .await
214 .map(ContextSlot::GraphFacts)
215 }));
216 }
217 if declarative_active && memory.persona_config.context_budget_tokens > 0 {
218 fetchers.push(Box::pin(async move {
219 let persona_budget = memory.persona_config.context_budget_tokens;
220 fetch_persona_facts(memory, persona_budget, tc)
221 .await
222 .map(ContextSlot::PersonaFacts)
223 }));
224 }
225 if procedural_active && memory.trajectory_config.context_budget_tokens > 0 {
226 fetchers.push(Box::pin(async move {
227 let tbudget = memory.trajectory_config.context_budget_tokens;
228 fetch_trajectory_hints(memory, tbudget, tc)
229 .await
230 .map(ContextSlot::TrajectoryHints)
231 }));
232 }
233 if declarative_active && memory.tree_config.context_budget_tokens > 0 {
234 fetchers.push(Box::pin(async move {
235 let tbudget = memory.tree_config.context_budget_tokens;
236 fetch_tree_memory(memory, tbudget, tc)
237 .await
238 .map(ContextSlot::TreeMemory)
239 }));
240 }
241 if procedural_active
242 && memory.reasoning_config.enabled
243 && memory.reasoning_config.context_budget_tokens > 0
244 {
245 fetchers.push(Box::pin(async move {
246 let rbudget = memory.reasoning_config.context_budget_tokens;
247 let top_k = memory.reasoning_config.top_k;
248 fetch_reasoning_strategies(memory, query, rbudget, top_k, tc)
249 .await
250 .map(|(msg, handle)| ContextSlot::ReasoningStrategies(msg, handle))
251 }));
252 }
253
254 fetchers
255}
256
257async fn drive_fetchers(
258 mut fetchers: FuturesUnordered<CtxFuture<'_>>,
259 prepared: &mut PreparedContext,
260) -> Result<(), AssemblerError> {
261 while let Some(result) = fetchers.next().await {
262 match result {
263 Ok(slot) => match slot {
264 ContextSlot::Summaries(msg) => prepared.summaries = msg,
265 ContextSlot::CrossSession(msg) => prepared.cross_session = msg,
266 ContextSlot::SemanticRecall(msg, score) => {
267 prepared.recall = msg;
268 prepared.recall_confidence = score;
269 }
270 ContextSlot::DocumentRag(msg) => prepared.doc_rag = msg,
271 ContextSlot::Corrections(msg) => prepared.corrections = msg,
272 ContextSlot::CodeContext(text) => prepared.code_context = text,
273 ContextSlot::GraphFacts(msg) => prepared.graph_facts = msg,
274 ContextSlot::PersonaFacts(msg) => prepared.persona_facts = msg,
275 ContextSlot::TrajectoryHints(msg) => prepared.trajectory_hints = msg,
276 ContextSlot::TreeMemory(msg) => prepared.tree_memory = msg,
277 ContextSlot::ReasoningStrategies(msg, handle) => {
278 prepared.reasoning_hints = msg;
279 if let Some(h) = handle {
280 prepared.background_tasks.push(h);
281 }
282 }
283 },
284 Err(e) => return Err(e),
285 }
286 }
287 Ok(())
288}
289
290impl ContextAssembler {
291 pub async fn gather(
299 input: &ContextAssemblyInput<'_>,
300 ) -> Result<PreparedContext, AssemblerError> {
301 let Some(ref budget) = input.context_manager.budget else {
302 return Ok(empty_prepared_context());
303 };
304
305 let memory = input.memory;
306 let tc = input.token_counter;
307
308 let effective_strategy = resolve_effective_strategy(memory, input.sidequest_turn_counter);
309 let memory_first = effective_strategy == zeph_config::ContextStrategy::MemoryFirst;
310
311 let system_prompt = input
312 .messages
313 .first()
314 .filter(|m| m.role == Role::System)
315 .map_or("", |m| m.content.as_str());
316
317 let digest_tokens = memory
318 .cached_session_digest
319 .as_ref()
320 .map_or(0, |(_, tokens)| *tokens);
321
322 let alloc = budget.allocate_with_opts(
323 system_prompt,
324 input.skills_prompt,
325 tc,
326 memory.graph_config.enabled,
327 digest_tokens,
328 memory_first,
329 );
330
331 let (recall_limit, min_sim) = correction_params(input.correction_config.as_ref());
332
333 let router_ref: &dyn AsyncMemoryRouter = input.router.as_ref();
334
335 tracing::debug!(
336 active_sources = alloc.active_sources(),
337 active_levels = ?input.active_levels,
338 "context budget allocated"
339 );
340
341 let fetchers = schedule_context_fetchers(
342 memory,
343 tc,
344 input.query,
345 input.scrub,
346 input.index,
347 router_ref,
348 alloc.summaries,
349 alloc.cross_session,
350 alloc.semantic_recall,
351 alloc.code_context,
352 alloc.graph_facts,
353 recall_limit,
354 min_sim,
355 input.active_levels,
356 );
357
358 let mut prepared = empty_prepared_context();
359 prepared.memory_first = memory_first;
360 prepared.recent_history_budget = alloc.recent_history;
361
362 drive_fetchers(fetchers, &mut prepared).await?;
363 Ok(prepared)
364 }
365}
366
367pub fn effective_recall_timeout_ms(configured: u64) -> u64 {
372 if configured == 0 {
373 tracing::warn!(
374 "recall_timeout_ms is 0, which would disable spreading activation recall; \
375 clamping to 100ms"
376 );
377 100
378 } else {
379 configured
380 }
381}
382
383use crate::input::ContextMemoryView;
384
385#[tracing::instrument(name = "context.graph_facts", skip_all)]
386#[allow(clippy::too_many_lines)] pub(crate) async fn fetch_graph_facts(
388 memory: &ContextMemoryView,
389 query: &str,
390 budget_tokens: usize,
391 tc: &dyn TokenCounting,
392) -> Result<Option<Message>, AssemblerError> {
393 use zeph_common::memory::{RecallView, SpreadingActivationParams, classify_graph_subgraph};
394
395 if budget_tokens == 0 || !memory.graph_config.enabled {
396 return Ok(None);
397 }
398 let Some(ref mem) = memory.memory else {
399 return Ok(None);
400 };
401 let recall_limit = memory.graph_config.recall_limit;
402 let temporal_decay_rate = memory.graph_config.temporal_decay_rate;
403 let sa_config = &memory.graph_config.spreading_activation;
404
405 let fused_query;
407 let effective_query = if let Some(ref state) = memory.memcot_state {
408 let max_state_chars = 2 * query.len();
409 let state_slice = if state.len() > max_state_chars {
410 let boundary = state.floor_char_boundary(max_state_chars);
411 &state[..boundary]
412 } else {
413 state.as_str()
414 };
415 fused_query = format!("[state] {state_slice}\n{query}");
416 &fused_query as &str
417 } else {
418 query
419 };
420
421 let edge_types = classify_graph_subgraph(effective_query);
422
423 let view = match memory.memcot_config.recall_view {
424 zeph_config::RecallViewConfig::ZoomIn => RecallView::ZoomIn,
425 zeph_config::RecallViewConfig::ZoomOut => RecallView::ZoomOut,
426 _ => RecallView::Head,
427 };
428
429 let sa_params = if sa_config.enabled {
430 Some(SpreadingActivationParams {
431 decay_lambda: sa_config.decay_lambda,
432 max_hops: sa_config.max_hops,
433 activation_threshold: sa_config.activation_threshold,
434 inhibition_threshold: sa_config.inhibition_threshold,
435 max_activated_nodes: sa_config.max_activated_nodes,
436 temporal_decay_rate,
437 seed_structural_weight: sa_config.seed_structural_weight,
438 seed_community_cap: sa_config.seed_community_cap,
439 })
440 } else {
441 None
442 };
443
444 let timeout_ms = effective_recall_timeout_ms(sa_config.recall_timeout_ms);
445 let recall_fut = mem.recall_graph_facts(
446 effective_query,
447 GraphRecallParams {
448 limit: recall_limit,
449 view,
450 zoom_out_neighbor_cap: memory.memcot_config.zoom_out_neighbor_cap,
451 max_hops: memory.graph_config.max_hops,
452 temporal_decay_rate,
453 edge_types: &edge_types,
454 spreading_activation: sa_params,
455 },
456 );
457 let recalled = match tokio::time::timeout(
458 std::time::Duration::from_millis(timeout_ms),
459 recall_fut,
460 )
461 .await
462 {
463 Ok(Ok(facts)) => facts,
464 Ok(Err(e)) => {
465 tracing::warn!("graph recall failed: {e:#}");
466 Vec::new()
467 }
468 Err(_) => {
469 tracing::warn!("graph recall timed out ({timeout_ms}ms)");
470 Vec::new()
471 }
472 };
473
474 if recalled.is_empty() {
475 return Ok(None);
476 }
477
478 let mut body = String::from(GRAPH_FACTS_PREFIX);
479 let mut tokens_so_far = tc.count_tokens(&body);
480
481 for rf in &recalled {
482 let fact_text = rf.fact.replace(['\n', '\r', '<', '>'], " ");
483 let line = if let Some(score) = rf.activation_score {
484 format!(
485 "- {} (confidence: {:.2}, activation: {:.2})\n",
486 fact_text, rf.confidence, score
487 )
488 } else {
489 format!("- {} (confidence: {:.2})\n", fact_text, rf.confidence)
490 };
491 let line_tokens = tc.count_tokens(&line);
492 if tokens_so_far + line_tokens > budget_tokens {
493 break;
494 }
495 body.push_str(&line);
496 tokens_so_far += line_tokens;
497
498 for nb in &rf.neighbors {
500 let nb_text = nb.fact.replace(['\n', '\r', '<', '>'], " ");
501 let nb_line = format!(" ~ {} (confidence: {:.2})\n", nb_text, nb.confidence);
502 let nb_tokens = tc.count_tokens(&nb_line);
503 if tokens_so_far + nb_tokens > budget_tokens {
504 break;
505 }
506 body.push_str(&nb_line);
507 tokens_so_far += nb_tokens;
508 }
509
510 if let Some(ref snippet) = rf.provenance_snippet {
512 let snip_line = format!(
513 " [source: {}]\n",
514 snippet.replace(['\n', '\r', '<', '>'], " ")
515 );
516 let snip_tokens = tc.count_tokens(&snip_line);
517 if tokens_so_far + snip_tokens <= budget_tokens {
518 body.push_str(&snip_line);
519 tokens_so_far += snip_tokens;
520 }
521 }
522 }
523
524 if body == GRAPH_FACTS_PREFIX {
525 return Ok(None);
526 }
527
528 Ok(Some(Message::from_legacy(Role::System, body)))
529}
530
531#[tracing::instrument(name = "context.persona_facts", skip_all)]
532pub(crate) async fn fetch_persona_facts(
533 memory: &ContextMemoryView,
534 budget_tokens: usize,
535 tc: &dyn TokenCounting,
536) -> Result<Option<Message>, AssemblerError> {
537 if budget_tokens == 0 || !memory.persona_config.enabled {
538 return Ok(None);
539 }
540 let Some(ref mem) = memory.memory else {
541 return Ok(None);
542 };
543
544 let min_confidence = memory.persona_config.min_confidence;
545 let facts = mem
546 .load_persona_facts(min_confidence)
547 .await
548 .map_err(AssemblerError::Memory)?;
549
550 if facts.is_empty() {
551 return Ok(None);
552 }
553
554 let mut body = String::from(crate::slot::PERSONA_PREFIX);
555 let mut tokens_so_far = tc.count_tokens(&body);
556
557 for fact in &facts {
558 let line = format!("[{}] {}\n", fact.category, fact.content);
559 let line_tokens = tc.count_tokens(&line);
560 if tokens_so_far + line_tokens > budget_tokens {
561 break;
562 }
563 body.push_str(&line);
564 tokens_so_far += line_tokens;
565 }
566
567 if body == crate::slot::PERSONA_PREFIX {
568 return Ok(None);
569 }
570
571 Ok(Some(Message::from_legacy(Role::System, body)))
572}
573
574#[tracing::instrument(name = "context.trajectory_hints", skip_all)]
575pub(crate) async fn fetch_trajectory_hints(
576 memory: &ContextMemoryView,
577 budget_tokens: usize,
578 tc: &dyn TokenCounting,
579) -> Result<Option<Message>, AssemblerError> {
580 if budget_tokens == 0 || !memory.trajectory_config.enabled {
581 return Ok(None);
582 }
583 let Some(ref mem) = memory.memory else {
584 return Ok(None);
585 };
586
587 let top_k = memory.trajectory_config.recall_top_k;
588 let min_conf = memory.trajectory_config.min_confidence;
589 let entries = mem
593 .load_trajectory_entries(Some("procedural"), top_k)
594 .await
595 .map_err(AssemblerError::Memory)?;
596
597 if entries.is_empty() {
598 return Ok(None);
599 }
600
601 let mut body = String::from(crate::slot::TRAJECTORY_PREFIX);
602 let mut tokens_so_far = tc.count_tokens(&body);
603
604 for entry in entries
605 .iter()
606 .filter(|e| e.confidence >= min_conf)
607 .take(top_k)
608 {
609 let line = format!("- {}: {}\n", entry.intent, entry.outcome);
610 let line_tokens = tc.count_tokens(&line);
611 if tokens_so_far + line_tokens > budget_tokens {
612 break;
613 }
614 body.push_str(&line);
615 tokens_so_far += line_tokens;
616 }
617
618 if body == crate::slot::TRAJECTORY_PREFIX {
619 return Ok(None);
620 }
621
622 Ok(Some(Message::from_legacy(Role::System, body)))
623}
624
625#[tracing::instrument(name = "context.tree_memory", skip_all)]
626pub(crate) async fn fetch_tree_memory(
627 memory: &ContextMemoryView,
628 budget_tokens: usize,
629 tc: &dyn TokenCounting,
630) -> Result<Option<Message>, AssemblerError> {
631 if budget_tokens == 0 || !memory.tree_config.enabled {
632 return Ok(None);
633 }
634 let Some(ref mem) = memory.memory else {
635 return Ok(None);
636 };
637
638 let top_k = memory.tree_config.recall_top_k;
639 let nodes = mem
640 .load_tree_nodes(1, top_k)
641 .await
642 .map_err(AssemblerError::Memory)?;
643
644 if nodes.is_empty() {
645 return Ok(None);
646 }
647
648 let mut body = String::from(crate::slot::TREE_MEMORY_PREFIX);
649 let mut tokens_so_far = tc.count_tokens(&body);
650
651 for node in nodes.iter().take(top_k) {
652 let line = format!("- {}\n", node.content);
653 let line_tokens = tc.count_tokens(&line);
654 if tokens_so_far + line_tokens > budget_tokens {
655 break;
656 }
657 body.push_str(&line);
658 tokens_so_far += line_tokens;
659 }
660
661 if body == crate::slot::TREE_MEMORY_PREFIX {
662 return Ok(None);
663 }
664
665 Ok(Some(Message::from_legacy(Role::System, body)))
666}
667
668#[tracing::instrument(name = "context.reasoning_strategies", skip_all)]
669pub(crate) async fn fetch_reasoning_strategies(
670 memory: &ContextMemoryView,
671 query: &str,
672 budget_tokens: usize,
673 top_k: usize,
674 tc: &dyn TokenCounting,
675) -> Result<(Option<Message>, Option<tokio::task::JoinHandle<()>>), AssemblerError> {
676 let budget_tokens = budget_tokens.min(500);
678 if budget_tokens == 0 {
679 return Ok((None, None));
680 }
681 let Some(ref mem) = memory.memory else {
682 return Ok((None, None));
683 };
684
685 let strategies = mem
686 .retrieve_reasoning_strategies(query, top_k)
687 .await
688 .map_err(AssemblerError::Memory)?;
689
690 if strategies.is_empty() {
691 return Ok((None, None));
692 }
693
694 let mut body = String::from(crate::slot::REASONING_PREFIX);
695 let mut tokens_so_far = tc.count_tokens(&body);
696 let mut injected_ids: Vec<String> = Vec::new();
697
698 for s in strategies.iter().take(top_k) {
699 let safe_summary = s.summary.replace(['\n', '\r', '<', '>'], " ");
702 let line = format!("- [{}] {}\n", s.outcome, safe_summary);
703 let line_tokens = tc.count_tokens(&line);
704 if tokens_so_far + line_tokens > budget_tokens {
705 break;
706 }
707 body.push_str(&line);
708 tokens_so_far += line_tokens;
709 injected_ids.push(s.id.clone());
710 }
711
712 if body == crate::slot::REASONING_PREFIX {
713 return Ok((None, None));
714 }
715
716 let handle = if injected_ids.is_empty() {
720 None
721 } else {
722 let mem_clone = mem.clone();
723 Some(tokio::spawn(async move {
724 if let Err(e) = mem_clone.mark_reasoning_used(&injected_ids).await {
725 tracing::warn!(error = %e, "reasoning: mark_used failed");
726 }
727 }))
728 };
729
730 Ok((Some(Message::from_legacy(Role::System, body)), handle))
731}
732
733#[tracing::instrument(name = "context.corrections", skip_all)]
734pub(crate) async fn fetch_corrections(
735 memory: &ContextMemoryView,
736 query: &str,
737 limit: usize,
738 min_score: f32,
739 scrub: fn(&str) -> std::borrow::Cow<'_, str>,
740) -> Result<Option<Message>, AssemblerError> {
741 let Some(ref mem) = memory.memory else {
742 return Ok(None);
743 };
744 let corrections = mem
745 .retrieve_corrections(query, limit, min_score)
746 .await
747 .map_err(AssemblerError::Memory)?;
748 if corrections.is_empty() {
749 return Ok(None);
750 }
751 let mut text = String::from(CORRECTIONS_PREFIX);
752 for c in &corrections {
753 text.push_str("- Past user correction: \"");
754 text.push_str(&scrub(&c.correction_text));
755 text.push_str("\"\n");
756 }
757 Ok(Some(Message::from_legacy(Role::System, text)))
758}
759
760#[tracing::instrument(name = "context.semantic_recall", skip_all)]
761pub(crate) async fn fetch_semantic_recall(
762 memory: &ContextMemoryView,
763 query: &str,
764 token_budget: usize,
765 tc: &dyn TokenCounting,
766 router: Option<&dyn AsyncMemoryRouter>,
767) -> Result<(Option<Message>, Option<f32>), AssemblerError> {
768 let Some(ref mem) = memory.memory else {
769 return Ok((None, None));
770 };
771 if memory.recall_limit == 0 || token_budget == 0 {
772 return Ok((None, None));
773 }
774
775 let recalled = mem
776 .recall(query, memory.recall_limit, router)
777 .await
778 .map_err(AssemblerError::Memory)?;
779 if recalled.is_empty() {
780 return Ok((None, None));
781 }
782
783 let top_score = recalled.first().map(|r| r.score);
784
785 let mut recall_text = String::with_capacity(token_budget * 3);
786 recall_text.push_str(RECALL_PREFIX);
787 let mut tokens_used = tc.count_tokens(&recall_text);
788
789 for item in &recalled {
790 if item.content.starts_with("[skipped]") || item.content.starts_with("[stopped]") {
791 continue;
792 }
793 let entry = format!("- [{}] {}\n", item.role, item.content);
794 let entry_tokens = tc.count_tokens(&entry);
795 if tokens_used + entry_tokens > token_budget {
796 break;
797 }
798 recall_text.push_str(&entry);
799 tokens_used += entry_tokens;
800 }
801
802 if tokens_used > tc.count_tokens(RECALL_PREFIX) {
803 Ok((
804 Some(Message::from_parts(
805 Role::System,
806 vec![MessagePart::Recall { text: recall_text }],
807 )),
808 top_score,
809 ))
810 } else {
811 Ok((None, None))
812 }
813}
814
815#[tracing::instrument(name = "context.document_rag", skip_all)]
816pub(crate) async fn fetch_document_rag(
817 memory: &ContextMemoryView,
818 query: &str,
819 token_budget: usize,
820 tc: &dyn TokenCounting,
821) -> Result<Option<Message>, AssemblerError> {
822 if !memory.document_config.rag_enabled || token_budget == 0 {
823 return Ok(None);
824 }
825 let Some(ref mem) = memory.memory else {
826 return Ok(None);
827 };
828
829 let collection = &memory.document_config.collection;
830 let top_k = memory.document_config.top_k;
831 let chunks = mem
832 .search_document_collection(collection, query, top_k)
833 .await
834 .map_err(AssemblerError::Memory)?;
835 if chunks.is_empty() {
836 return Ok(None);
837 }
838
839 let mut text = String::from(DOCUMENT_RAG_PREFIX);
840 let mut tokens_used = tc.count_tokens(&text);
841
842 for chunk in &chunks {
843 if chunk.text.is_empty() {
844 continue;
845 }
846 let entry = format!("{}\n", chunk.text);
847 let cost = tc.count_tokens(&entry);
848 if tokens_used + cost > token_budget {
849 break;
850 }
851 text.push_str(&entry);
852 tokens_used += cost;
853 }
854
855 if tokens_used > tc.count_tokens(DOCUMENT_RAG_PREFIX) {
856 Ok(Some(Message {
857 role: Role::System,
858 content: text,
859 parts: vec![],
860 metadata: MessageMetadata::default(),
861 }))
862 } else {
863 Ok(None)
864 }
865}
866
867#[tracing::instrument(name = "context.summaries", skip_all)]
868pub(crate) async fn fetch_summaries(
869 memory: &ContextMemoryView,
870 token_budget: usize,
871 tc: &dyn TokenCounting,
872) -> Result<Option<Message>, AssemblerError> {
873 let (Some(mem), Some(cid)) = (&memory.memory, memory.conversation_id) else {
874 return Ok(None);
875 };
876 if token_budget == 0 {
877 return Ok(None);
878 }
879
880 let summaries = mem
881 .load_summaries(cid)
882 .await
883 .map_err(AssemblerError::Memory)?;
884 if summaries.is_empty() {
885 return Ok(None);
886 }
887
888 let mut summary_text = String::from(SUMMARY_PREFIX);
889 let mut tokens_used = tc.count_tokens(&summary_text);
890
891 for summary in summaries.iter().rev() {
892 let first = summary.first_message_id.unwrap_or(0);
893 let last = summary.last_message_id.unwrap_or(0);
894 let entry = format!("- Messages {first}-{last}: {}\n", summary.content);
895 let cost = tc.count_tokens(&entry);
896 if tokens_used + cost > token_budget {
897 break;
898 }
899 summary_text.push_str(&entry);
900 tokens_used += cost;
901 }
902
903 if tokens_used > tc.count_tokens(SUMMARY_PREFIX) {
904 Ok(Some(Message::from_parts(
905 Role::System,
906 vec![MessagePart::Summary { text: summary_text }],
907 )))
908 } else {
909 Ok(None)
910 }
911}
912
913#[tracing::instrument(name = "context.cross_session", skip_all)]
914pub(crate) async fn fetch_cross_session(
915 memory: &ContextMemoryView,
916 query: &str,
917 token_budget: usize,
918 tc: &dyn TokenCounting,
919) -> Result<Option<Message>, AssemblerError> {
920 let (Some(mem), Some(cid)) = (&memory.memory, memory.conversation_id) else {
921 return Ok(None);
922 };
923 if token_budget == 0 {
924 return Ok(None);
925 }
926
927 let threshold = memory.cross_session_score_threshold;
928 let results: Vec<_> = mem
929 .search_session_summaries(query, 5, Some(cid))
930 .await
931 .map_err(AssemblerError::Memory)?
932 .into_iter()
933 .filter(|r| r.score >= threshold)
934 .collect();
935 if results.is_empty() {
936 return Ok(None);
937 }
938
939 let mut text = String::from(CROSS_SESSION_PREFIX);
940 let mut tokens_used = tc.count_tokens(&text);
941
942 for item in &results {
943 let entry = format!("- {}\n", item.summary_text);
944 let cost = tc.count_tokens(&entry);
945 if tokens_used + cost > token_budget {
946 break;
947 }
948 text.push_str(&entry);
949 tokens_used += cost;
950 }
951
952 if tokens_used > tc.count_tokens(CROSS_SESSION_PREFIX) {
953 Ok(Some(Message::from_parts(
954 Role::System,
955 vec![MessagePart::CrossSession { text }],
956 )))
957 } else {
958 Ok(None)
959 }
960}
961
962pub const MAX_KEEP_TAIL_SCAN: usize = 50;
965
966#[must_use]
974pub fn memory_first_keep_tail(messages: &[Message], history_start: usize) -> usize {
975 use zeph_llm::provider::MessagePart;
976
977 let mut keep_tail = 2usize;
978 let len = messages.len();
979 let max = len.saturating_sub(history_start);
980
981 while keep_tail < max {
982 let first_retained = &messages[len - keep_tail];
983 let is_tool_result = first_retained.role == Role::User
984 && first_retained
985 .parts
986 .iter()
987 .any(|p| matches!(p, MessagePart::ToolResult { .. }));
988
989 if is_tool_result {
990 keep_tail += 1;
991 } else {
992 break;
993 }
994
995 if keep_tail >= MAX_KEEP_TAIL_SCAN {
996 let preceding_idx = len.saturating_sub(keep_tail + 1);
997 if preceding_idx >= history_start {
998 let preceding = &messages[preceding_idx];
999 let is_tool_use = preceding.role == Role::Assistant
1000 && preceding
1001 .parts
1002 .iter()
1003 .any(|p| matches!(p, MessagePart::ToolUse { .. }));
1004 if is_tool_use {
1005 keep_tail += 1;
1006 }
1007 }
1008 break;
1009 }
1010 }
1011
1012 keep_tail
1013}
1014
1015#[cfg(test)]
1016mod tests {
1017 use super::*;
1018 use crate::input::ContextMemoryView;
1019 use zeph_common::memory::CompressionLevel;
1020 use zeph_config::{
1021 ContextStrategy, DocumentConfig, GraphConfig, PersonaConfig, ReasoningConfig,
1022 TrajectoryConfig, TreeConfig,
1023 };
1024
1025 struct NaiveTokenCounter;
1026 impl zeph_common::memory::TokenCounting for NaiveTokenCounter {
1027 fn count_tokens(&self, text: &str) -> usize {
1028 text.split_whitespace().count()
1029 }
1030 fn count_tool_schema_tokens(&self, schema: &serde_json::Value) -> usize {
1031 schema.to_string().split_whitespace().count()
1032 }
1033 }
1034
1035 fn empty_view() -> ContextMemoryView {
1036 ContextMemoryView {
1037 memory: None,
1038 conversation_id: None,
1039 recall_limit: 10,
1040 cross_session_score_threshold: 0.5,
1041 context_strategy: ContextStrategy::default(),
1042 crossover_turn_threshold: 5,
1043 cached_session_digest: None,
1044 graph_config: GraphConfig::default(),
1045 document_config: DocumentConfig::default(),
1046 persona_config: PersonaConfig::default(),
1047 trajectory_config: TrajectoryConfig::default(),
1048 reasoning_config: ReasoningConfig::default(),
1049 memcot_config: zeph_config::MemCotConfig::default(),
1050 memcot_state: None,
1051 tree_config: TreeConfig::default(),
1052 }
1053 }
1054
1055 #[tokio::test]
1058 async fn fetch_graph_facts_returns_none_when_memory_is_none() {
1059 let view = empty_view();
1060 let tc = NaiveTokenCounter;
1061 let result = fetch_graph_facts(&view, "test", 1000, &tc).await.unwrap();
1062 assert!(result.is_none());
1063 }
1064
1065 #[tokio::test]
1066 async fn fetch_graph_facts_returns_none_when_budget_zero() {
1067 let mut view = empty_view();
1068 view.graph_config.enabled = true;
1069 let tc = NaiveTokenCounter;
1070 let result = fetch_graph_facts(&view, "test", 0, &tc).await.unwrap();
1071 assert!(result.is_none());
1072 }
1073
1074 #[tokio::test]
1075 async fn fetch_graph_facts_returns_none_when_graph_disabled() {
1076 let mut view = empty_view();
1077 view.graph_config.enabled = false;
1078 let tc = NaiveTokenCounter;
1079 let result = fetch_graph_facts(&view, "test", 1000, &tc).await.unwrap();
1080 assert!(result.is_none());
1081 }
1082
1083 #[tokio::test]
1086 async fn fetch_persona_facts_returns_none_when_memory_is_none() {
1087 let view = empty_view();
1088 let tc = NaiveTokenCounter;
1089 let result = fetch_persona_facts(&view, 1000, &tc).await.unwrap();
1090 assert!(result.is_none());
1091 }
1092
1093 #[tokio::test]
1094 async fn fetch_persona_facts_returns_none_when_budget_zero() {
1095 let mut view = empty_view();
1096 view.persona_config.enabled = true;
1097 let tc = NaiveTokenCounter;
1098 let result = fetch_persona_facts(&view, 0, &tc).await.unwrap();
1099 assert!(result.is_none());
1100 }
1101
1102 #[tokio::test]
1105 async fn fetch_trajectory_hints_returns_none_when_memory_is_none() {
1106 let view = empty_view();
1107 let tc = NaiveTokenCounter;
1108 let result = fetch_trajectory_hints(&view, 1000, &tc).await.unwrap();
1109 assert!(result.is_none());
1110 }
1111
1112 #[tokio::test]
1113 async fn fetch_trajectory_hints_returns_none_when_budget_zero() {
1114 let mut view = empty_view();
1115 view.trajectory_config.enabled = true;
1116 let tc = NaiveTokenCounter;
1117 let result = fetch_trajectory_hints(&view, 0, &tc).await.unwrap();
1118 assert!(result.is_none());
1119 }
1120
1121 #[tokio::test]
1124 async fn fetch_tree_memory_returns_none_when_memory_is_none() {
1125 let view = empty_view();
1126 let tc = NaiveTokenCounter;
1127 let result = fetch_tree_memory(&view, 1000, &tc).await.unwrap();
1128 assert!(result.is_none());
1129 }
1130
1131 #[tokio::test]
1132 async fn fetch_tree_memory_returns_none_when_budget_zero() {
1133 let mut view = empty_view();
1134 view.tree_config.enabled = true;
1135 let tc = NaiveTokenCounter;
1136 let result = fetch_tree_memory(&view, 0, &tc).await.unwrap();
1137 assert!(result.is_none());
1138 }
1139
1140 #[tokio::test]
1143 async fn fetch_corrections_returns_none_when_memory_is_none() {
1144 let view = empty_view();
1145 let result = fetch_corrections(&view, "test", 10, 0.5, |s| s.into())
1146 .await
1147 .unwrap();
1148 assert!(result.is_none());
1149 }
1150
1151 #[tokio::test]
1154 async fn fetch_semantic_recall_returns_none_when_memory_is_none() {
1155 let view = empty_view();
1156 let tc = NaiveTokenCounter;
1157 let result = fetch_semantic_recall(&view, "test", 1000, &tc, None)
1158 .await
1159 .unwrap();
1160 assert!(result.0.is_none() && result.1.is_none());
1161 }
1162
1163 #[tokio::test]
1164 async fn fetch_semantic_recall_returns_none_when_budget_zero() {
1165 let view = empty_view();
1166 let tc = NaiveTokenCounter;
1167 let result = fetch_semantic_recall(&view, "test", 0, &tc, None)
1168 .await
1169 .unwrap();
1170 assert!(result.0.is_none() && result.1.is_none());
1171 }
1172
1173 #[tokio::test]
1176 async fn fetch_document_rag_returns_none_when_memory_is_none() {
1177 let mut view = empty_view();
1178 view.document_config.rag_enabled = true;
1179 let tc = NaiveTokenCounter;
1180 let result = fetch_document_rag(&view, "test", 1000, &tc).await.unwrap();
1181 assert!(result.is_none());
1182 }
1183
1184 #[tokio::test]
1185 async fn fetch_document_rag_returns_none_when_rag_disabled() {
1186 let view = empty_view();
1187 let tc = NaiveTokenCounter;
1188 let result = fetch_document_rag(&view, "test", 1000, &tc).await.unwrap();
1189 assert!(result.is_none());
1190 }
1191
1192 #[tokio::test]
1195 async fn fetch_summaries_returns_none_when_memory_is_none() {
1196 let view = empty_view();
1197 let tc = NaiveTokenCounter;
1198 let result = fetch_summaries(&view, 1000, &tc).await.unwrap();
1199 assert!(result.is_none());
1200 }
1201
1202 #[tokio::test]
1205 async fn fetch_cross_session_returns_none_when_memory_is_none() {
1206 let view = empty_view();
1207 let tc = NaiveTokenCounter;
1208 let result = fetch_cross_session(&view, "test", 1000, &tc).await.unwrap();
1209 assert!(result.is_none());
1210 }
1211
1212 #[test]
1215 fn levels_to_flags_empty_slice_enables_all_tiers() {
1216 let (e, p, d) = levels_to_flags(&[]);
1217 assert!(e, "episodic should be active for empty slice");
1218 assert!(p, "procedural should be active for empty slice");
1219 assert!(d, "declarative should be active for empty slice");
1220 }
1221
1222 #[test]
1223 fn levels_to_flags_full_set_enables_all_tiers() {
1224 let all = &[
1225 CompressionLevel::Episodic,
1226 CompressionLevel::Procedural,
1227 CompressionLevel::Declarative,
1228 ];
1229 let (e, p, d) = levels_to_flags(all);
1230 assert!(e);
1231 assert!(p);
1232 assert!(d);
1233 }
1234
1235 #[test]
1236 fn levels_to_flags_episodic_only() {
1237 let (e, p, d) = levels_to_flags(&[CompressionLevel::Episodic]);
1238 assert!(e);
1239 assert!(!p, "procedural should be inactive");
1240 assert!(!d, "declarative should be inactive");
1241 }
1242
1243 #[test]
1244 fn levels_to_flags_episodic_and_procedural() {
1245 let (e, p, d) =
1246 levels_to_flags(&[CompressionLevel::Episodic, CompressionLevel::Procedural]);
1247 assert!(e);
1248 assert!(p);
1249 assert!(!d, "declarative should be inactive");
1250 }
1251
1252 #[test]
1253 fn levels_to_flags_declarative_only() {
1254 let (e, p, d) = levels_to_flags(&[CompressionLevel::Declarative]);
1255 assert!(!e, "episodic should be inactive");
1256 assert!(!p, "procedural should be inactive");
1257 assert!(d);
1258 }
1259
1260 #[tokio::test]
1263 async fn fetch_reasoning_strategies_returns_none_when_memory_is_none() {
1264 let mut view = empty_view();
1265 view.reasoning_config.enabled = true;
1266 let tc = NaiveTokenCounter;
1267 let (result, handle) = fetch_reasoning_strategies(&view, "query", 1000, 3, &tc)
1268 .await
1269 .unwrap();
1270 assert!(result.is_none());
1271 assert!(handle.is_none());
1272 }
1273
1274 #[tokio::test]
1275 async fn fetch_reasoning_strategies_returns_none_when_budget_zero() {
1276 let mut view = empty_view();
1277 view.reasoning_config.enabled = true;
1278 let tc = NaiveTokenCounter;
1279 let (result, handle) = fetch_reasoning_strategies(&view, "query", 0, 3, &tc)
1280 .await
1281 .unwrap();
1282 assert!(result.is_none());
1283 assert!(handle.is_none());
1284 }
1285
1286 use std::sync::{Arc, Mutex};
1289 use zeph_common::memory::{
1290 ContextMemoryBackend, GraphRecallParams, MemCorrection, MemDocumentChunk, MemGraphFact,
1291 MemPersonaFact, MemReasoningStrategy, MemRecalledMessage, MemSessionSummary, MemSummary,
1292 MemTrajectoryEntry, MemTreeNode,
1293 };
1294
1295 const KNOWN_FAIL_ON: &[&str] = &[
1297 "load_persona_facts",
1298 "load_trajectory_entries",
1299 "load_tree_nodes",
1300 "load_summaries",
1301 "retrieve_reasoning_strategies",
1302 "mark_reasoning_used",
1303 "retrieve_corrections",
1304 "recall",
1305 "recall_graph_facts",
1306 "search_session_summaries",
1307 "search_document_collection",
1308 ];
1309
1310 #[derive(Default)]
1311 struct MockMemoryBackend {
1312 persona_facts: Vec<MemPersonaFact>,
1313 trajectory_entries: Vec<MemTrajectoryEntry>,
1314 tree_nodes: Vec<MemTreeNode>,
1315 summaries: Vec<MemSummary>,
1316 reasoning_strategies: Vec<MemReasoningStrategy>,
1317 corrections: Vec<MemCorrection>,
1318 recalled: Vec<MemRecalledMessage>,
1319 graph_facts: Vec<MemGraphFact>,
1320 session_summaries: Vec<MemSessionSummary>,
1321 document_chunks: Vec<MemDocumentChunk>,
1322 fail_on: Option<&'static str>,
1324 marked_ids: Mutex<Vec<String>>,
1326 }
1327
1328 impl MockMemoryBackend {
1329 fn with_fail_on(method: &'static str) -> Self {
1330 debug_assert!(
1331 KNOWN_FAIL_ON.contains(&method),
1332 "unknown fail_on method name: {method}"
1333 );
1334 Self {
1335 fail_on: Some(method),
1336 ..Default::default()
1337 }
1338 }
1339
1340 fn fail_err(method: &str) -> Box<dyn std::error::Error + Send + Sync> {
1341 format!("mock error in {method}").into()
1342 }
1343 }
1344
1345 impl ContextMemoryBackend for MockMemoryBackend {
1346 fn load_persona_facts<'a>(
1347 &'a self,
1348 _min_confidence: f64,
1349 ) -> std::pin::Pin<
1350 Box<
1351 dyn std::future::Future<
1352 Output = Result<
1353 Vec<MemPersonaFact>,
1354 Box<dyn std::error::Error + Send + Sync>,
1355 >,
1356 > + Send
1357 + 'a,
1358 >,
1359 > {
1360 let result = if self.fail_on == Some("load_persona_facts") {
1361 Err(Self::fail_err("load_persona_facts"))
1362 } else {
1363 Ok(self.persona_facts.clone())
1364 };
1365 Box::pin(async move { result })
1366 }
1367
1368 fn load_trajectory_entries<'a>(
1369 &'a self,
1370 _tier: Option<&'a str>,
1371 _top_k: usize,
1372 ) -> std::pin::Pin<
1373 Box<
1374 dyn std::future::Future<
1375 Output = Result<
1376 Vec<MemTrajectoryEntry>,
1377 Box<dyn std::error::Error + Send + Sync>,
1378 >,
1379 > + Send
1380 + 'a,
1381 >,
1382 > {
1383 let result = if self.fail_on == Some("load_trajectory_entries") {
1384 Err(Self::fail_err("load_trajectory_entries"))
1385 } else {
1386 Ok(self.trajectory_entries.clone())
1387 };
1388 Box::pin(async move { result })
1389 }
1390
1391 fn load_tree_nodes<'a>(
1392 &'a self,
1393 _level: u32,
1394 _top_k: usize,
1395 ) -> std::pin::Pin<
1396 Box<
1397 dyn std::future::Future<
1398 Output = Result<Vec<MemTreeNode>, Box<dyn std::error::Error + Send + Sync>>,
1399 > + Send
1400 + 'a,
1401 >,
1402 > {
1403 let result = if self.fail_on == Some("load_tree_nodes") {
1404 Err(Self::fail_err("load_tree_nodes"))
1405 } else {
1406 Ok(self.tree_nodes.clone())
1407 };
1408 Box::pin(async move { result })
1409 }
1410
1411 fn load_summaries<'a>(
1412 &'a self,
1413 _conversation_id: i64,
1414 ) -> std::pin::Pin<
1415 Box<
1416 dyn std::future::Future<
1417 Output = Result<Vec<MemSummary>, Box<dyn std::error::Error + Send + Sync>>,
1418 > + Send
1419 + 'a,
1420 >,
1421 > {
1422 let result = if self.fail_on == Some("load_summaries") {
1423 Err(Self::fail_err("load_summaries"))
1424 } else {
1425 Ok(self.summaries.clone())
1426 };
1427 Box::pin(async move { result })
1428 }
1429
1430 fn retrieve_reasoning_strategies<'a>(
1431 &'a self,
1432 _query: &'a str,
1433 _top_k: usize,
1434 ) -> std::pin::Pin<
1435 Box<
1436 dyn std::future::Future<
1437 Output = Result<
1438 Vec<MemReasoningStrategy>,
1439 Box<dyn std::error::Error + Send + Sync>,
1440 >,
1441 > + Send
1442 + 'a,
1443 >,
1444 > {
1445 let result = if self.fail_on == Some("retrieve_reasoning_strategies") {
1446 Err(Self::fail_err("retrieve_reasoning_strategies"))
1447 } else {
1448 Ok(self.reasoning_strategies.clone())
1449 };
1450 Box::pin(async move { result })
1451 }
1452
1453 fn mark_reasoning_used<'a>(
1454 &'a self,
1455 ids: &'a [String],
1456 ) -> std::pin::Pin<
1457 Box<
1458 dyn std::future::Future<
1459 Output = Result<(), Box<dyn std::error::Error + Send + Sync>>,
1460 > + Send
1461 + 'a,
1462 >,
1463 > {
1464 if self.fail_on == Some("mark_reasoning_used") {
1465 return Box::pin(async move { Err(Self::fail_err("mark_reasoning_used")) });
1466 }
1467 let mut guard = self.marked_ids.lock().expect("marked_ids poisoned");
1468 guard.extend_from_slice(ids);
1469 Box::pin(async move { Ok(()) })
1470 }
1471
1472 fn retrieve_corrections<'a>(
1473 &'a self,
1474 _query: &'a str,
1475 _limit: usize,
1476 _min_score: f32,
1477 ) -> std::pin::Pin<
1478 Box<
1479 dyn std::future::Future<
1480 Output = Result<
1481 Vec<MemCorrection>,
1482 Box<dyn std::error::Error + Send + Sync>,
1483 >,
1484 > + Send
1485 + 'a,
1486 >,
1487 > {
1488 let result = if self.fail_on == Some("retrieve_corrections") {
1489 Err(Self::fail_err("retrieve_corrections"))
1490 } else {
1491 Ok(self.corrections.clone())
1492 };
1493 Box::pin(async move { result })
1494 }
1495
1496 fn recall<'a>(
1497 &'a self,
1498 _query: &'a str,
1499 _limit: usize,
1500 _router: Option<&'a dyn zeph_common::memory::AsyncMemoryRouter>,
1501 ) -> std::pin::Pin<
1502 Box<
1503 dyn std::future::Future<
1504 Output = Result<
1505 Vec<MemRecalledMessage>,
1506 Box<dyn std::error::Error + Send + Sync>,
1507 >,
1508 > + Send
1509 + 'a,
1510 >,
1511 > {
1512 let result = if self.fail_on == Some("recall") {
1513 Err(Self::fail_err("recall"))
1514 } else {
1515 Ok(self.recalled.clone())
1516 };
1517 Box::pin(async move { result })
1518 }
1519
1520 fn recall_graph_facts<'a>(
1521 &'a self,
1522 _query: &'a str,
1523 _params: GraphRecallParams<'a>,
1524 ) -> std::pin::Pin<
1525 Box<
1526 dyn std::future::Future<
1527 Output = Result<
1528 Vec<MemGraphFact>,
1529 Box<dyn std::error::Error + Send + Sync>,
1530 >,
1531 > + Send
1532 + 'a,
1533 >,
1534 > {
1535 let result = if self.fail_on == Some("recall_graph_facts") {
1536 Err(Self::fail_err("recall_graph_facts"))
1537 } else {
1538 Ok(self.graph_facts.clone())
1539 };
1540 Box::pin(async move { result })
1541 }
1542
1543 fn search_session_summaries<'a>(
1544 &'a self,
1545 _query: &'a str,
1546 _limit: usize,
1547 _current_conversation_id: Option<i64>,
1548 ) -> std::pin::Pin<
1549 Box<
1550 dyn std::future::Future<
1551 Output = Result<
1552 Vec<MemSessionSummary>,
1553 Box<dyn std::error::Error + Send + Sync>,
1554 >,
1555 > + Send
1556 + 'a,
1557 >,
1558 > {
1559 let result = if self.fail_on == Some("search_session_summaries") {
1560 Err(Self::fail_err("search_session_summaries"))
1561 } else {
1562 Ok(self.session_summaries.clone())
1563 };
1564 Box::pin(async move { result })
1565 }
1566
1567 fn search_document_collection<'a>(
1568 &'a self,
1569 _collection: &'a str,
1570 _query: &'a str,
1571 _top_k: usize,
1572 ) -> std::pin::Pin<
1573 Box<
1574 dyn std::future::Future<
1575 Output = Result<
1576 Vec<MemDocumentChunk>,
1577 Box<dyn std::error::Error + Send + Sync>,
1578 >,
1579 > + Send
1580 + 'a,
1581 >,
1582 > {
1583 let result = if self.fail_on == Some("search_document_collection") {
1584 Err(Self::fail_err("search_document_collection"))
1585 } else {
1586 Ok(self.document_chunks.clone())
1587 };
1588 Box::pin(async move { result })
1589 }
1590 }
1591
1592 fn mock_view(mock: MockMemoryBackend) -> ContextMemoryView {
1593 let mut v = empty_view();
1594 v.memory = Some(Arc::new(mock));
1595 v
1596 }
1597
1598 #[tokio::test]
1601 async fn fetch_graph_facts_returns_message_when_memory_present() {
1602 let mock = MockMemoryBackend {
1603 graph_facts: vec![zeph_common::memory::MemGraphFact {
1604 fact: "Rust is fast".to_string(),
1605 confidence: 0.9,
1606 activation_score: None,
1607 neighbors: vec![],
1608 provenance_snippet: None,
1609 }],
1610 ..Default::default()
1611 };
1612 let mut view = mock_view(mock);
1613 view.graph_config.enabled = true;
1614 view.graph_config.spreading_activation.recall_timeout_ms = 5000;
1616 let tc = NaiveTokenCounter;
1617 let result = fetch_graph_facts(&view, "test", 1000, &tc).await.unwrap();
1618 assert!(result.is_some(), "expected Some message");
1619 let msg = result.unwrap();
1620 assert!(
1621 msg.content.contains("Rust is fast"),
1622 "expected fact text in output, got: {}",
1623 msg.content
1624 );
1625 assert!(
1626 msg.content.starts_with(GRAPH_FACTS_PREFIX),
1627 "expected GRAPH_FACTS_PREFIX"
1628 );
1629 }
1630
1631 #[tokio::test]
1632 async fn fetch_graph_facts_swallows_error_and_returns_none() {
1633 let mock = MockMemoryBackend::with_fail_on("recall_graph_facts");
1634 let mut view = mock_view(mock);
1635 view.graph_config.enabled = true;
1636 view.graph_config.spreading_activation.recall_timeout_ms = 5000;
1637 let tc = NaiveTokenCounter;
1638 let result = fetch_graph_facts(&view, "test", 1000, &tc).await.unwrap();
1640 assert!(
1641 result.is_none(),
1642 "expected None when recall_graph_facts errors"
1643 );
1644 }
1645
1646 #[tokio::test]
1647 async fn fetch_graph_facts_returns_none_when_facts_empty() {
1648 let mock = MockMemoryBackend::default(); let mut view = mock_view(mock);
1650 view.graph_config.enabled = true;
1651 view.graph_config.spreading_activation.recall_timeout_ms = 5000;
1652 let tc = NaiveTokenCounter;
1653 let result = fetch_graph_facts(&view, "test", 1000, &tc).await.unwrap();
1654 assert!(result.is_none());
1655 }
1656
1657 #[tokio::test]
1660 async fn fetch_persona_facts_returns_message_when_persona_enabled() {
1661 let mock = MockMemoryBackend {
1662 persona_facts: vec![MemPersonaFact {
1663 category: "preference".to_string(),
1664 content: "prefers concise answers".to_string(),
1665 }],
1666 ..Default::default()
1667 };
1668 let mut view = mock_view(mock);
1669 view.persona_config.enabled = true;
1670 view.persona_config.context_budget_tokens = 1000;
1671 let tc = NaiveTokenCounter;
1672 let result = fetch_persona_facts(&view, 1000, &tc).await.unwrap();
1673 assert!(result.is_some());
1674 let msg = result.unwrap();
1675 assert!(msg.content.contains("preference"));
1676 assert!(msg.content.contains("prefers concise answers"));
1677 assert!(msg.content.starts_with(crate::slot::PERSONA_PREFIX));
1678 }
1679
1680 #[tokio::test]
1681 async fn fetch_persona_facts_propagates_error() {
1682 let mock = MockMemoryBackend::with_fail_on("load_persona_facts");
1683 let mut view = mock_view(mock);
1684 view.persona_config.enabled = true;
1685 let tc = NaiveTokenCounter;
1686 let result = fetch_persona_facts(&view, 1000, &tc).await;
1687 assert!(
1688 result.is_err(),
1689 "expected Err from load_persona_facts failure"
1690 );
1691 }
1692
1693 #[tokio::test]
1696 async fn fetch_trajectory_hints_returns_message_when_trajectory_enabled() {
1697 let mock = MockMemoryBackend {
1698 trajectory_entries: vec![MemTrajectoryEntry {
1699 intent: "summarize code".to_string(),
1700 outcome: "produced concise summary".to_string(),
1701 confidence: 0.9,
1702 }],
1703 ..Default::default()
1704 };
1705 let mut view = mock_view(mock);
1706 view.trajectory_config.enabled = true;
1707 view.trajectory_config.context_budget_tokens = 1000;
1708 view.trajectory_config.min_confidence = 0.5;
1709 let tc = NaiveTokenCounter;
1710 let result = fetch_trajectory_hints(&view, 1000, &tc).await.unwrap();
1711 assert!(result.is_some());
1712 let msg = result.unwrap();
1713 assert!(msg.content.contains("summarize code"));
1714 assert!(msg.content.starts_with(crate::slot::TRAJECTORY_PREFIX));
1715 }
1716
1717 #[tokio::test]
1718 async fn fetch_trajectory_hints_passes_tier_filter() {
1719 let mock = MockMemoryBackend {
1722 trajectory_entries: vec![
1723 MemTrajectoryEntry {
1724 intent: "debug async code".to_string(),
1725 outcome: "fixed deadlock".to_string(),
1726 confidence: 0.85,
1727 },
1728 MemTrajectoryEntry {
1729 intent: "low confidence task".to_string(),
1730 outcome: "irrelevant".to_string(),
1731 confidence: 0.3,
1732 },
1733 ],
1734 ..Default::default()
1735 };
1736 let mut view = mock_view(mock);
1737 view.trajectory_config.enabled = true;
1738 view.trajectory_config.context_budget_tokens = 1000;
1739 view.trajectory_config.min_confidence = 0.5;
1740 let tc = NaiveTokenCounter;
1741 let result = fetch_trajectory_hints(&view, 1000, &tc).await.unwrap();
1742 assert!(result.is_some(), "expected Some message");
1743 let msg = result.unwrap();
1744 assert!(
1745 msg.content.contains("debug async code"),
1746 "high-confidence entry must be included"
1747 );
1748 assert!(
1749 !msg.content.contains("low confidence task"),
1750 "entry below min_confidence must be filtered out"
1751 );
1752 }
1753
1754 #[tokio::test]
1755 async fn fetch_trajectory_hints_propagates_error() {
1756 let mock = MockMemoryBackend::with_fail_on("load_trajectory_entries");
1757 let mut view = mock_view(mock);
1758 view.trajectory_config.enabled = true;
1759 let tc = NaiveTokenCounter;
1760 let result = fetch_trajectory_hints(&view, 1000, &tc).await;
1761 assert!(result.is_err());
1762 }
1763
1764 #[tokio::test]
1767 async fn fetch_tree_memory_returns_message_when_tree_enabled() {
1768 let mock = MockMemoryBackend {
1769 tree_nodes: vec![MemTreeNode {
1770 content: "Topic: async Rust patterns".to_string(),
1771 }],
1772 ..Default::default()
1773 };
1774 let mut view = mock_view(mock);
1775 view.tree_config.enabled = true;
1776 view.tree_config.context_budget_tokens = 1000;
1777 let tc = NaiveTokenCounter;
1778 let result = fetch_tree_memory(&view, 1000, &tc).await.unwrap();
1779 assert!(result.is_some());
1780 let msg = result.unwrap();
1781 assert!(msg.content.contains("async Rust patterns"));
1782 assert!(msg.content.starts_with(crate::slot::TREE_MEMORY_PREFIX));
1783 }
1784
1785 #[tokio::test]
1786 async fn fetch_tree_memory_propagates_error() {
1787 let mock = MockMemoryBackend::with_fail_on("load_tree_nodes");
1788 let mut view = mock_view(mock);
1789 view.tree_config.enabled = true;
1790 let tc = NaiveTokenCounter;
1791 let result = fetch_tree_memory(&view, 1000, &tc).await;
1792 assert!(result.is_err());
1793 }
1794
1795 #[tokio::test]
1798 async fn fetch_corrections_returns_message_when_corrections_present() {
1799 let mock = MockMemoryBackend {
1800 corrections: vec![MemCorrection {
1801 correction_text: "use snake_case not camelCase".to_string(),
1802 }],
1803 ..Default::default()
1804 };
1805 let view = mock_view(mock);
1806 let result = fetch_corrections(&view, "query", 10, 0.5, |s| s.into())
1807 .await
1808 .unwrap();
1809 assert!(result.is_some());
1810 let msg = result.unwrap();
1811 assert!(msg.content.contains("snake_case"));
1812 assert!(msg.content.starts_with(CORRECTIONS_PREFIX));
1813 }
1814
1815 #[tokio::test]
1816 async fn fetch_corrections_propagates_error() {
1817 let mock = MockMemoryBackend::with_fail_on("retrieve_corrections");
1820 let view = mock_view(mock);
1821 let result = fetch_corrections(&view, "query", 10, 0.5, |s| s.into()).await;
1822 assert!(result.is_err(), "expected Err, got {result:?}");
1823 }
1824
1825 #[tokio::test]
1828 async fn fetch_semantic_recall_returns_message_with_content() {
1829 let mock = MockMemoryBackend {
1830 recalled: vec![
1831 MemRecalledMessage {
1832 role: "user".to_string(),
1833 content: "how does tokio work".to_string(),
1834 score: 0.95,
1835 },
1836 MemRecalledMessage {
1837 role: "assistant".to_string(),
1838 content: "tokio is an async runtime".to_string(),
1839 score: 0.88,
1840 },
1841 ],
1842 ..Default::default()
1843 };
1844 let mut view = mock_view(mock);
1845 view.recall_limit = 10;
1846 let tc = NaiveTokenCounter;
1847 let (msg, score) = fetch_semantic_recall(&view, "tokio", 1000, &tc, None)
1848 .await
1849 .unwrap();
1850 assert!(msg.is_some(), "expected Some message");
1851 assert!(score.is_some_and(|s| (s - 0.95_f32).abs() < f32::EPSILON));
1853 let msg = msg.unwrap();
1854 let has_recall_part = msg.parts.iter().any(|p| {
1856 if let zeph_llm::provider::MessagePart::Recall { text } = p {
1857 text.contains("how does tokio work")
1858 } else {
1859 false
1860 }
1861 });
1862 assert!(has_recall_part, "expected recalled content in Recall part");
1863 }
1864
1865 #[tokio::test]
1866 async fn fetch_semantic_recall_returns_none_when_recalled_empty() {
1867 let mock = MockMemoryBackend::default();
1868 let mut view = mock_view(mock);
1869 view.recall_limit = 10;
1870 let tc = NaiveTokenCounter;
1871 let (msg, score) = fetch_semantic_recall(&view, "query", 1000, &tc, None)
1872 .await
1873 .unwrap();
1874 assert!(msg.is_none());
1875 assert!(score.is_none());
1876 }
1877
1878 #[tokio::test]
1879 async fn fetch_semantic_recall_propagates_error() {
1880 let mock = MockMemoryBackend::with_fail_on("recall");
1881 let mut view = mock_view(mock);
1882 view.recall_limit = 10;
1883 let tc = NaiveTokenCounter;
1884 let result = fetch_semantic_recall(&view, "query", 1000, &tc, None).await;
1885 assert!(result.is_err());
1886 }
1887
1888 #[tokio::test]
1891 async fn fetch_document_rag_returns_message_when_rag_enabled() {
1892 let mock = MockMemoryBackend {
1893 document_chunks: vec![MemDocumentChunk {
1894 text: "Rust ownership rules prevent data races".to_string(),
1895 }],
1896 ..Default::default()
1897 };
1898 let mut view = mock_view(mock);
1899 view.document_config.rag_enabled = true;
1900 let tc = NaiveTokenCounter;
1901 let result = fetch_document_rag(&view, "ownership", 1000, &tc)
1902 .await
1903 .unwrap();
1904 assert!(result.is_some());
1905 let msg = result.unwrap();
1906 assert!(msg.content.contains("ownership rules"));
1907 assert!(msg.content.starts_with(DOCUMENT_RAG_PREFIX));
1908 }
1909
1910 #[tokio::test]
1911 async fn fetch_document_rag_propagates_error() {
1912 let mock = MockMemoryBackend::with_fail_on("search_document_collection");
1913 let mut view = mock_view(mock);
1914 view.document_config.rag_enabled = true;
1915 let tc = NaiveTokenCounter;
1916 let result = fetch_document_rag(&view, "query", 1000, &tc).await;
1917 assert!(result.is_err());
1918 }
1919
1920 #[tokio::test]
1923 async fn fetch_summaries_returns_message_when_summaries_present() {
1924 let mock = MockMemoryBackend {
1925 summaries: vec![MemSummary {
1926 first_message_id: Some(1),
1927 last_message_id: Some(5),
1928 content: "User asked about async Rust".to_string(),
1929 }],
1930 ..Default::default()
1931 };
1932 let mut view = mock_view(mock);
1933 view.conversation_id = Some(42);
1934 let tc = NaiveTokenCounter;
1935 let result = fetch_summaries(&view, 1000, &tc).await.unwrap();
1936 assert!(result.is_some());
1937 let msg = result.unwrap();
1938 let has_summary_part = msg.parts.iter().any(|p| {
1939 if let zeph_llm::provider::MessagePart::Summary { text } = p {
1940 text.contains("Messages 1-5") && text.contains("async Rust")
1941 } else {
1942 false
1943 }
1944 });
1945 assert!(
1946 has_summary_part,
1947 "expected Summary part with messages range"
1948 );
1949 }
1950
1951 #[tokio::test]
1952 async fn fetch_summaries_returns_none_without_conversation_id() {
1953 let mock = MockMemoryBackend {
1954 summaries: vec![MemSummary {
1955 first_message_id: Some(1),
1956 last_message_id: Some(5),
1957 content: "some content".to_string(),
1958 }],
1959 ..Default::default()
1960 };
1961 let mut view = mock_view(mock);
1962 view.conversation_id = None; let tc = NaiveTokenCounter;
1964 let result = fetch_summaries(&view, 1000, &tc).await.unwrap();
1965 assert!(result.is_none());
1966 }
1967
1968 #[tokio::test]
1969 async fn fetch_summaries_propagates_error() {
1970 let mock = MockMemoryBackend::with_fail_on("load_summaries");
1971 let mut view = mock_view(mock);
1972 view.conversation_id = Some(42);
1973 let tc = NaiveTokenCounter;
1974 let result = fetch_summaries(&view, 1000, &tc).await;
1975 assert!(result.is_err());
1976 }
1977
1978 #[tokio::test]
1981 async fn fetch_cross_session_returns_message_when_results_present() {
1982 let mock = MockMemoryBackend {
1983 session_summaries: vec![MemSessionSummary {
1984 summary_text: "Previous session: debugging tokio deadlock".to_string(),
1985 score: 0.9,
1986 }],
1987 ..Default::default()
1988 };
1989 let mut view = mock_view(mock);
1990 view.conversation_id = Some(1);
1991 view.cross_session_score_threshold = 0.5;
1992 let tc = NaiveTokenCounter;
1993 let result = fetch_cross_session(&view, "async", 1000, &tc)
1994 .await
1995 .unwrap();
1996 assert!(result.is_some());
1997 let msg = result.unwrap();
1998 let has_cross_session_part = msg.parts.iter().any(|p| {
1999 if let zeph_llm::provider::MessagePart::CrossSession { text } = p {
2000 text.contains("tokio deadlock")
2001 } else {
2002 false
2003 }
2004 });
2005 assert!(has_cross_session_part);
2006 }
2007
2008 #[tokio::test]
2009 async fn fetch_cross_session_propagates_error() {
2010 let mock = MockMemoryBackend::with_fail_on("search_session_summaries");
2011 let mut view = mock_view(mock);
2012 view.conversation_id = Some(1);
2013 let tc = NaiveTokenCounter;
2014 let result = fetch_cross_session(&view, "query", 1000, &tc).await;
2015 assert!(result.is_err());
2016 }
2017
2018 #[tokio::test]
2021 async fn fetch_reasoning_strategies_returns_message_and_marks_used() {
2022 let mock = Arc::new(MockMemoryBackend {
2023 reasoning_strategies: vec![
2024 MemReasoningStrategy {
2025 id: "strat-1".to_string(),
2026 outcome: "success".to_string(),
2027 summary: "break the problem into small steps".to_string(),
2028 },
2029 MemReasoningStrategy {
2030 id: "strat-2".to_string(),
2031 outcome: "success".to_string(),
2032 summary: "use tracing spans for debugging".to_string(),
2033 },
2034 ],
2035 ..Default::default()
2036 });
2037 let marked_ids = Arc::clone(&mock);
2038 let mut view = empty_view();
2039 view.memory = Some(mock);
2040 view.reasoning_config.enabled = true;
2041 view.reasoning_config.context_budget_tokens = 1000;
2042 let tc = NaiveTokenCounter;
2043 let (result, handle) = fetch_reasoning_strategies(&view, "debug", 1000, 5, &tc)
2044 .await
2045 .unwrap();
2046 assert!(result.is_some());
2047 let msg = result.unwrap();
2048 assert!(msg.content.starts_with(crate::slot::REASONING_PREFIX));
2049 assert!(msg.content.contains("break the problem"));
2050
2051 if let Some(h) = handle {
2053 h.await.expect("mark_reasoning_used task panicked");
2054 }
2055
2056 let ids = marked_ids.marked_ids.lock().expect("marked_ids poisoned");
2057 assert!(
2058 ids.contains(&"strat-1".to_string()),
2059 "expected strat-1 marked"
2060 );
2061 assert!(
2062 ids.contains(&"strat-2".to_string()),
2063 "expected strat-2 marked"
2064 );
2065 }
2066
2067 #[tokio::test]
2068 async fn fetch_reasoning_strategies_propagates_error() {
2069 let mock = MockMemoryBackend::with_fail_on("retrieve_reasoning_strategies");
2070 let mut view = mock_view(mock);
2071 view.reasoning_config.enabled = true;
2072 let tc = NaiveTokenCounter;
2073 let result = fetch_reasoning_strategies(&view, "query", 1000, 3, &tc).await;
2074 assert!(result.is_err());
2075 }
2076
2077 #[tokio::test]
2080 async fn fetch_semantic_recall_skips_skipped_and_stopped_messages() {
2081 let mock = MockMemoryBackend {
2082 recalled: vec![
2083 MemRecalledMessage {
2084 role: "user".to_string(),
2085 content: "[skipped] some content".to_string(),
2086 score: 0.95,
2087 },
2088 MemRecalledMessage {
2089 role: "user".to_string(),
2090 content: "[stopped] other content".to_string(),
2091 score: 0.90,
2092 },
2093 MemRecalledMessage {
2094 role: "user".to_string(),
2095 content: "valid content to recall".to_string(),
2096 score: 0.85,
2097 },
2098 ],
2099 ..Default::default()
2100 };
2101 let mut view = mock_view(mock);
2102 view.recall_limit = 10;
2103 let tc = NaiveTokenCounter;
2104 let (msg, _) = fetch_semantic_recall(&view, "query", 1000, &tc, None)
2105 .await
2106 .unwrap();
2107 assert!(msg.is_some());
2108 let msg = msg.unwrap();
2109 let full_text = msg.parts.iter().find_map(|p| {
2110 if let zeph_llm::provider::MessagePart::Recall { text } = p {
2111 Some(text.clone())
2112 } else {
2113 None
2114 }
2115 });
2116 let text = full_text.unwrap_or_default();
2117 assert!(
2118 !text.contains("[skipped]"),
2119 "skipped messages must be excluded"
2120 );
2121 assert!(
2122 !text.contains("[stopped]"),
2123 "stopped messages must be excluded"
2124 );
2125 assert!(
2126 text.contains("valid content to recall"),
2127 "valid messages must be included"
2128 );
2129 }
2130
2131 #[tokio::test]
2132 async fn fetch_cross_session_filters_below_threshold() {
2133 let mock = MockMemoryBackend {
2134 session_summaries: vec![
2135 MemSessionSummary {
2136 summary_text: "high relevance session".to_string(),
2137 score: 0.9,
2138 },
2139 MemSessionSummary {
2140 summary_text: "low relevance session".to_string(),
2141 score: 0.2,
2142 },
2143 ],
2144 ..Default::default()
2145 };
2146 let mut view = mock_view(mock);
2147 view.conversation_id = Some(1);
2148 view.cross_session_score_threshold = 0.5;
2149 let tc = NaiveTokenCounter;
2150 let result = fetch_cross_session(&view, "query", 1000, &tc)
2151 .await
2152 .unwrap();
2153 assert!(result.is_some());
2154 let msg = result.unwrap();
2155 let text = msg
2156 .parts
2157 .iter()
2158 .find_map(|p| {
2159 if let zeph_llm::provider::MessagePart::CrossSession { text } = p {
2160 Some(text.clone())
2161 } else {
2162 None
2163 }
2164 })
2165 .unwrap_or_default();
2166 assert!(
2167 text.contains("high relevance"),
2168 "high score must be included"
2169 );
2170 assert!(
2171 !text.contains("low relevance"),
2172 "low score must be filtered out"
2173 );
2174 }
2175
2176 #[tokio::test]
2177 async fn fetch_document_rag_skips_empty_chunks() {
2178 let mock = MockMemoryBackend {
2179 document_chunks: vec![
2180 MemDocumentChunk {
2181 text: String::new(),
2182 }, MemDocumentChunk {
2184 text: "real content here".to_string(),
2185 },
2186 ],
2187 ..Default::default()
2188 };
2189 let mut view = mock_view(mock);
2190 view.document_config.rag_enabled = true;
2191 let tc = NaiveTokenCounter;
2192 let result = fetch_document_rag(&view, "query", 1000, &tc).await.unwrap();
2193 assert!(result.is_some());
2194 let msg = result.unwrap();
2195 assert!(msg.content.contains("real content here"));
2196 assert!(!msg.content.contains("\n\n\n"));
2198 }
2199
2200 #[tokio::test]
2201 async fn fetch_graph_facts_sanitizes_injection_payloads() {
2202 let mock = MockMemoryBackend {
2204 graph_facts: vec![zeph_common::memory::MemGraphFact {
2205 fact: "fact with <script>alert(1)</script> and\nnewline".to_string(),
2206 confidence: 0.8,
2207 activation_score: None,
2208 neighbors: vec![],
2209 provenance_snippet: None,
2210 }],
2211 ..Default::default()
2212 };
2213 let mut view = mock_view(mock);
2214 view.graph_config.enabled = true;
2215 view.graph_config.spreading_activation.recall_timeout_ms = 5000;
2216 let tc = NaiveTokenCounter;
2217 let result = fetch_graph_facts(&view, "test", 1000, &tc).await.unwrap();
2218 assert!(result.is_some());
2219 let msg = result.unwrap();
2220 assert!(
2221 !msg.content.contains('<'),
2222 "angle brackets must be sanitized"
2223 );
2224 assert!(
2227 !msg.content.contains("\n\n"),
2228 "embedded newlines must be sanitized, no double-newline sequences expected"
2229 );
2230 }
2231
2232 #[tokio::test]
2233 async fn fetch_reasoning_strategies_sanitizes_injection_payloads() {
2234 let mock = MockMemoryBackend {
2236 reasoning_strategies: vec![MemReasoningStrategy {
2237 id: "s1".to_string(),
2238 outcome: "success".to_string(),
2239 summary: "strategy with <b>bold</b> and\nnewline".to_string(),
2240 }],
2241 ..Default::default()
2242 };
2243 let mut view = mock_view(mock);
2244 view.reasoning_config.enabled = true;
2245 let tc = NaiveTokenCounter;
2246 let (result, _handle) = fetch_reasoning_strategies(&view, "query", 1000, 3, &tc)
2247 .await
2248 .unwrap();
2249 assert!(result.is_some());
2250 let msg = result.unwrap();
2251 assert!(
2252 !msg.content.contains('<'),
2253 "angle brackets must be sanitized in strategy summaries"
2254 );
2255 }
2256
2257 #[tokio::test]
2260 async fn fetch_persona_facts_truncates_at_budget() {
2261 let tc = NaiveTokenCounter;
2262 let first_line = "[pref] brief\n";
2264 let budget = tc.count_tokens(crate::slot::PERSONA_PREFIX) + tc.count_tokens(first_line);
2265 let mock = MockMemoryBackend {
2266 persona_facts: vec![
2267 MemPersonaFact {
2268 category: "pref".to_string(),
2269 content: "brief".to_string(),
2270 },
2271 MemPersonaFact {
2272 category: "lang".to_string(),
2273 content: "english".to_string(),
2274 },
2275 ],
2276 ..Default::default()
2277 };
2278 let mut view = mock_view(mock);
2279 view.persona_config.enabled = true;
2280 let result = fetch_persona_facts(&view, budget, &tc).await.unwrap();
2281 let msg = result.unwrap();
2282 assert!(msg.content.contains("brief"), "first fact must be included");
2283 assert!(
2284 !msg.content.contains("english"),
2285 "second fact must be truncated by budget"
2286 );
2287 }
2288
2289 #[tokio::test]
2290 async fn fetch_semantic_recall_truncates_at_budget() {
2291 let tc = NaiveTokenCounter;
2292 let first_entry = "- [user] first message\n";
2294 let budget = tc.count_tokens(RECALL_PREFIX) + tc.count_tokens(first_entry);
2295 let mock = MockMemoryBackend {
2296 recalled: vec![
2297 MemRecalledMessage {
2298 role: "user".to_string(),
2299 content: "first message".to_string(),
2300 score: 0.95,
2301 },
2302 MemRecalledMessage {
2303 role: "user".to_string(),
2304 content: "second message that should be truncated".to_string(),
2305 score: 0.80,
2306 },
2307 ],
2308 ..Default::default()
2309 };
2310 let mut view = mock_view(mock);
2311 view.recall_limit = 10;
2312 let (msg, _) = fetch_semantic_recall(&view, "query", budget, &tc, None)
2313 .await
2314 .unwrap();
2315 assert!(msg.is_some());
2316 let text = msg
2317 .unwrap()
2318 .parts
2319 .iter()
2320 .find_map(|p| {
2321 if let zeph_llm::provider::MessagePart::Recall { text } = p {
2322 Some(text.clone())
2323 } else {
2324 None
2325 }
2326 })
2327 .unwrap_or_default();
2328 assert!(
2329 text.contains("first message"),
2330 "first entry must be included"
2331 );
2332 assert!(
2333 !text.contains("second message"),
2334 "second entry must be truncated by budget"
2335 );
2336 }
2337
2338 #[tokio::test]
2341 async fn fetch_graph_facts_sanitizes_provenance_snippet() {
2342 use zeph_common::memory::MemGraphNeighbor;
2343 let mock = MockMemoryBackend {
2344 graph_facts: vec![zeph_common::memory::MemGraphFact {
2345 fact: "safe fact".to_string(),
2346 confidence: 0.9,
2347 activation_score: None,
2348 neighbors: vec![MemGraphNeighbor {
2349 fact: "neighbor".to_string(),
2350 confidence: 0.7,
2351 }],
2352 provenance_snippet: Some("source with <injection>\nand newline".to_string()),
2353 }],
2354 ..Default::default()
2355 };
2356 let mut view = mock_view(mock);
2357 view.graph_config.enabled = true;
2358 view.graph_config.spreading_activation.recall_timeout_ms = 5000;
2359 let tc = NaiveTokenCounter;
2360 let result = fetch_graph_facts(&view, "test", 1000, &tc).await.unwrap();
2361 assert!(result.is_some());
2362 let msg = result.unwrap();
2363 assert!(
2364 !msg.content.contains('<'),
2365 "angle brackets in provenance_snippet must be sanitized"
2366 );
2367 assert!(
2368 !msg.content.contains("\n\n"),
2369 "newlines in provenance_snippet must be sanitized"
2370 );
2371 assert!(
2372 msg.content.contains("[source:"),
2373 "provenance snippet must be rendered"
2374 );
2375 }
2376}