1use zeph_context::budget::ContextBudget;
7use zeph_context::fidelity::FidelityScorer;
8use zeph_llm::LlmProvider;
9use zeph_llm::provider::{Message, MessagePart, Role};
10
11use crate::error::ContextError;
12use crate::helpers::{
13 CODE_CONTEXT_PREFIX, CORRECTIONS_PREFIX, CROSS_SESSION_PREFIX, DOCUMENT_RAG_PREFIX,
14 GRAPH_FACTS_PREFIX, LSP_NOTE_PREFIX, PERSONA_PREFIX, REASONING_PREFIX, RECALL_PREFIX,
15 SESSION_DIGEST_PREFIX, SUMMARY_PREFIX, TRAJECTORY_PREFIX, TREE_MEMORY_PREFIX,
16};
17use crate::state::{
18 ContextAssemblyView, ContextDelta, ContextSummarizationView, MessageWindowView,
19 ProviderHandles, StatusSink,
20};
21
22pub struct SemanticRecallParams<'a> {
30 pub query: &'a str,
32 pub token_budget: usize,
34 pub recall_limit: usize,
36 pub context_format: zeph_config::ContextFormat,
38 pub conversation_id: Option<zeph_memory::ConversationId>,
40 pub tiered_classifier: Option<&'a std::sync::Arc<zeph_llm::any::AnyProvider>>,
42 pub tiered_validator: Option<&'a std::sync::Arc<zeph_llm::any::AnyProvider>>,
44 pub tiered_config: &'a zeph_config::memory::TieredRetrievalConfig,
46}
47
48#[derive(Debug, Default)]
66pub struct ContextService;
67
68impl ContextService {
69 #[must_use]
73 pub fn new() -> Self {
74 Self
75 }
76
77 pub fn clear_history(&self, window: &mut MessageWindowView<'_>) {
85 let system_prompt = window.messages.first().cloned();
86 window.messages.clear();
87 if let Some(sp) = system_prompt {
88 window.messages.push(sp);
89 }
90 window.completed_tool_ids.clear();
91 recompute_prompt_tokens(window);
92 }
93
94 pub fn remove_recall_messages(&self, window: &mut MessageWindowView<'_>) {
96 remove_by_part_or_prefix(window.messages, RECALL_PREFIX, |p| {
97 matches!(p, MessagePart::Recall { .. })
98 });
99 }
100
101 pub fn remove_correction_messages(&self, window: &mut MessageWindowView<'_>) {
103 remove_by_prefix(window.messages, Role::System, CORRECTIONS_PREFIX);
104 }
105
106 pub fn remove_graph_facts_messages(&self, window: &mut MessageWindowView<'_>) {
108 remove_by_prefix(window.messages, Role::System, GRAPH_FACTS_PREFIX);
109 }
110
111 pub fn remove_persona_facts_messages(&self, window: &mut MessageWindowView<'_>) {
113 remove_by_prefix(window.messages, Role::System, PERSONA_PREFIX);
114 }
115
116 pub fn remove_trajectory_hints_messages(&self, window: &mut MessageWindowView<'_>) {
118 remove_by_prefix(window.messages, Role::System, TRAJECTORY_PREFIX);
119 }
120
121 pub fn remove_tree_memory_messages(&self, window: &mut MessageWindowView<'_>) {
123 remove_by_prefix(window.messages, Role::System, TREE_MEMORY_PREFIX);
124 }
125
126 pub fn remove_reasoning_strategies_messages(&self, window: &mut MessageWindowView<'_>) {
128 remove_by_prefix(window.messages, Role::System, REASONING_PREFIX);
129 }
130
131 pub fn remove_lsp_messages(&self, window: &mut MessageWindowView<'_>) {
136 remove_by_prefix(window.messages, Role::System, LSP_NOTE_PREFIX);
137 }
138
139 pub fn remove_code_context_messages(&self, window: &mut MessageWindowView<'_>) {
141 remove_by_part_or_prefix(window.messages, CODE_CONTEXT_PREFIX, |p| {
142 matches!(p, MessagePart::CodeContext { .. })
143 });
144 }
145
146 pub fn remove_summary_messages(&self, window: &mut MessageWindowView<'_>) {
148 remove_by_part_or_prefix(window.messages, SUMMARY_PREFIX, |p| {
149 matches!(p, MessagePart::Summary { .. })
150 });
151 }
152
153 pub fn remove_cross_session_messages(&self, window: &mut MessageWindowView<'_>) {
155 remove_by_part_or_prefix(window.messages, CROSS_SESSION_PREFIX, |p| {
156 matches!(p, MessagePart::CrossSession { .. })
157 });
158 }
159
160 pub fn remove_session_digest_message(&self, window: &mut MessageWindowView<'_>) {
162 remove_by_prefix(window.messages, Role::User, SESSION_DIGEST_PREFIX);
163 }
164
165 pub fn remove_document_rag_messages(&self, window: &mut MessageWindowView<'_>) {
167 remove_by_prefix(window.messages, Role::System, DOCUMENT_RAG_PREFIX);
168 }
169
170 pub fn trim_messages_to_budget(&self, window: &mut MessageWindowView<'_>, token_budget: usize) {
178 if token_budget == 0 {
179 return;
180 }
181
182 let history_start = window
184 .messages
185 .iter()
186 .position(|m| m.role != Role::System)
187 .unwrap_or(window.messages.len());
188
189 if history_start >= window.messages.len() {
190 return;
191 }
192
193 let mut total = 0usize;
194 let mut keep_from = window.messages.len();
195
196 for i in (history_start..window.messages.len()).rev() {
197 let msg_tokens = window
198 .token_counter
199 .count_message_tokens(&window.messages[i]);
200 if total + msg_tokens > token_budget {
201 break;
202 }
203 total += msg_tokens;
204 keep_from = i;
205 }
206
207 if keep_from > history_start {
208 let removed = keep_from - history_start;
209 window.messages.drain(history_start..keep_from);
210 recompute_prompt_tokens(window);
211 tracing::info!(
212 removed,
213 token_budget,
214 "trimmed messages to fit context budget"
215 );
216 }
217 }
218
219 #[tracing::instrument(name = "agent_context.service.inject_semantic_recall", skip_all, err)]
231 pub async fn inject_semantic_recall(
232 &self,
233 query: &str,
234 token_budget: usize,
235 window: &mut MessageWindowView<'_>,
236 view: &ContextAssemblyView<'_>,
237 ) -> Result<(), ContextError> {
238 self.remove_recall_messages(window);
239
240 let params = SemanticRecallParams {
241 query,
242 token_budget,
243 recall_limit: view.recall_limit,
244 context_format: view.context_format,
245 conversation_id: view.conversation_id,
246 tiered_classifier: view.tiered_retrieval_classifier.as_ref(),
247 tiered_validator: view.tiered_retrieval_validator.as_ref(),
248 tiered_config: &view.tiered_retrieval_config,
249 };
250 let msg = self
251 .run_tiered_recall(¶ms, window, view.memory.as_deref())
252 .await?;
253
254 if let Some(msg) = msg
255 && window.messages.len() > 1
256 {
257 window.messages.insert(1, msg);
258 }
259
260 Ok(())
261 }
262
263 #[tracing::instrument(
275 name = "agent_context.service.inject_semantic_recall_bare",
276 skip_all,
277 err
278 )]
279 pub async fn inject_semantic_recall_bare(
280 &self,
281 params: SemanticRecallParams<'_>,
282 window: &mut MessageWindowView<'_>,
283 memory: Option<&zeph_memory::semantic::SemanticMemory>,
284 ) -> Result<(), ContextError> {
285 self.remove_recall_messages(window);
286
287 let msg = self.run_tiered_recall(¶ms, window, memory).await?;
288
289 if let Some(msg) = msg
290 && window.messages.len() > 1
291 {
292 window.messages.insert(1, msg);
293 }
294
295 Ok(())
296 }
297
298 async fn run_tiered_recall(
303 &self,
304 params: &SemanticRecallParams<'_>,
305 window: &MessageWindowView<'_>,
306 memory: Option<&zeph_memory::semantic::SemanticMemory>,
307 ) -> Result<Option<Message>, ContextError> {
308 if params.tiered_config.enabled {
309 use tracing::Instrument as _;
310 let Some(mem) = memory else {
311 return Ok(None);
312 };
313 let result = tokio::time::timeout(
314 std::time::Duration::from_secs(30),
315 zeph_memory::recall_tiered(
316 mem,
317 params.query,
318 params.conversation_id,
319 params.tiered_classifier,
320 params.tiered_validator,
321 params.tiered_config,
322 Some(params.token_budget),
323 )
324 .instrument(tracing::info_span!("agent_context.tiered_retrieval.recall")),
325 )
326 .await
327 .map_err(|_| {
328 tracing::warn!("tiered_retrieval: recall_tiered timed out after 30s");
329 ContextError::Memory(zeph_memory::MemoryError::Timeout(
330 "recall_tiered timed out".to_owned(),
331 ))
332 })?
333 .map_err(ContextError::Memory)?;
334
335 tracing::debug!(
336 intent = %result.intent,
337 tokens_used = result.tokens_used,
338 tier_escalated = result.tier_escalated,
339 count = result.messages.len(),
340 "tiered_retrieval: recall complete"
341 );
342
343 if result.messages.is_empty() {
344 return Ok(None);
345 }
346
347 let recalled_text = result
348 .messages
349 .iter()
350 .map(|m| m.message.content.as_str())
351 .collect::<Vec<_>>()
352 .join("\n---\n");
353 Ok(Some(Message::from_legacy(
354 Role::User,
355 format!("{RECALL_PREFIX}{recalled_text}"),
356 )))
357 } else {
358 let (msg, _score) = crate::helpers::fetch_semantic_recall_raw(
359 memory,
360 params.recall_limit,
361 params.context_format,
362 params.query,
363 params.token_budget,
364 &window.token_counter,
365 None,
366 None,
367 )
368 .await?;
369 Ok(msg)
370 }
371 }
372
373 #[tracing::instrument(
382 name = "agent_context.service.inject_cross_session_context",
383 skip_all,
384 err
385 )]
386 pub async fn inject_cross_session_context(
387 &self,
388 query: &str,
389 token_budget: usize,
390 window: &mut MessageWindowView<'_>,
391 view: &ContextAssemblyView<'_>,
392 ) -> Result<(), ContextError> {
393 self.remove_cross_session_messages(window);
394
395 if let Some(msg) = crate::helpers::fetch_cross_session_raw(
396 view.memory.as_deref(),
397 view.conversation_id,
398 view.cross_session_score_threshold,
399 query,
400 token_budget,
401 &view.token_counter,
402 )
403 .await?
404 && window.messages.len() > 1
405 {
406 window.messages.insert(1, msg);
407 tracing::debug!("injected cross-session context");
408 }
409
410 Ok(())
411 }
412
413 #[tracing::instrument(name = "agent_context.service.inject_summaries", skip_all, err)]
422 pub async fn inject_summaries(
423 &self,
424 token_budget: usize,
425 window: &mut MessageWindowView<'_>,
426 view: &ContextAssemblyView<'_>,
427 ) -> Result<(), ContextError> {
428 self.remove_summary_messages(window);
429
430 if let Some(msg) = crate::helpers::fetch_summaries_raw(
431 view.memory.as_deref(),
432 view.conversation_id,
433 token_budget,
434 &view.token_counter,
435 )
436 .await?
437 && window.messages.len() > 1
438 {
439 window.messages.insert(1, msg);
440 tracing::debug!("injected summaries into context");
441 }
442
443 Ok(())
444 }
445
446 #[tracing::instrument(name = "agent_context.service.disambiguate_skills", skip_all)]
451 pub async fn disambiguate_skills(
452 &self,
453 query: &str,
454 all_meta: &[&zeph_skills::loader::SkillMeta],
455 scored: &[zeph_skills::ScoredMatch],
456 providers: &ProviderHandles,
457 ) -> Option<Vec<usize>> {
458 use std::fmt::Write as _;
459
460 let mut candidates = String::new();
461 for sm in scored {
462 if let Some(meta) = all_meta.get(sm.index) {
463 let _ = writeln!(
464 candidates,
465 "- {} (score: {:.3}): {}",
466 meta.name, sm.score, meta.description
467 );
468 }
469 }
470
471 let prompt = format!(
472 "The user said: \"{query}\"\n\n\
473 These skills matched with similar scores:\n{candidates}\n\
474 Which skill best matches the user's intent? \
475 Return the skill_name, your confidence (0-1), and any extracted parameters."
476 );
477
478 let messages = vec![zeph_llm::provider::Message::from_legacy(
479 zeph_llm::provider::Role::User,
480 prompt,
481 )];
482 match providers
483 .disambiguate
484 .chat_typed::<zeph_skills::IntentClassification>(&messages)
485 .await
486 {
487 Ok(classification) => {
488 tracing::info!(
489 skill = %classification.skill_name,
490 confidence = classification.confidence,
491 "disambiguation selected skill"
492 );
493 let mut indices: Vec<usize> = scored.iter().map(|s| s.index).collect();
494 if let Some(pos) = indices.iter().position(|&i| {
495 all_meta
496 .get(i)
497 .is_some_and(|m| m.name == classification.skill_name)
498 }) {
499 indices.swap(0, pos);
500 }
501 Some(indices)
502 }
503 Err(e) => {
504 tracing::warn!("disambiguation failed, using original order: {e:#}");
505 None
506 }
507 }
508 }
509
510 #[allow(clippy::too_many_lines)] #[tracing::instrument(name = "agent_context.service.prepare_context", skip_all, err)]
523 pub async fn prepare_context(
524 &self,
525 query: &str,
526 window: &mut MessageWindowView<'_>,
527 view: &mut ContextAssemblyView<'_>,
528 ) -> Result<ContextDelta, ContextError> {
529 if view.context_manager.budget.is_none() {
530 return Ok(ContextDelta::default());
531 }
532
533 self.remove_session_digest_message(window);
535 self.remove_summary_messages(window);
536 self.remove_cross_session_messages(window);
537 self.remove_recall_messages(window);
538 self.remove_document_rag_messages(window);
539 self.remove_correction_messages(window);
540 self.remove_code_context_messages(window);
541 self.remove_graph_facts_messages(window);
542 self.remove_persona_facts_messages(window);
543 self.remove_trajectory_hints_messages(window);
544 self.remove_tree_memory_messages(window);
545 if view.reasoning_config.enabled {
546 self.remove_reasoning_strategies_messages(window);
547 }
548
549 if let Some(explorer) = view.proactive_explorer.clone()
551 && let Some(domain) = explorer.classify(query)
552 {
553 let already_known = {
554 let registry_guard = view.skill_registry.read();
555 explorer.has_knowledge(®istry_guard, &domain)
556 };
557 let excluded = explorer.is_excluded(&domain);
558
559 if !already_known && !excluded {
560 tracing::debug!(domain = %domain.0, query_len = query.len(), "proactive.explore triggered");
561 let timeout_ms = explorer.timeout_ms();
562 let result = tokio::time::timeout(
563 std::time::Duration::from_millis(timeout_ms),
564 explorer.explore(&domain),
565 )
566 .await;
567 match result {
568 Ok(Ok(())) => {
569 view.skill_registry.write().reload(view.skill_paths);
570 tracing::debug!(domain = %domain.0, "proactive.explore complete, registry reloaded");
571 }
572 Ok(Err(e)) => {
573 tracing::warn!(domain = %domain.0, error = %e, "proactive exploration failed");
574 }
575 Err(_) => {
576 tracing::warn!(domain = %domain.0, timeout_ms, "proactive exploration timed out");
577 }
578 }
579 }
580 }
581
582 let active_levels: &'static [zeph_memory::compression::CompressionLevel] =
584 if let Some(ref budget) = view.context_manager.budget {
585 let used = view.cached_prompt_tokens;
586 let max = budget.max_tokens();
587 #[allow(clippy::cast_precision_loss)]
588 let remaining_ratio = if max == 0 {
589 1.0_f32
590 } else {
591 1.0 - (used as f32 / max as f32).clamp(0.0, 1.0)
592 };
593 let levels =
594 zeph_memory::compression::RetrievalPolicy::default().select(remaining_ratio);
595 tracing::debug!(
596 remaining_ratio,
597 active_levels = ?levels,
598 "compression_spectrum: retrieval policy selected"
599 );
600 levels
601 } else {
602 &[]
603 };
604
605 let memory_backend: Option<std::sync::Arc<dyn zeph_common::memory::ContextMemoryBackend>> =
606 view.memory.clone().map(
607 |m| -> std::sync::Arc<dyn zeph_common::memory::ContextMemoryBackend> {
608 std::sync::Arc::new(crate::memory_backend::SemanticMemoryBackend::new(m))
609 },
610 );
611
612 let memory_view = zeph_context::input::ContextMemoryView {
613 memory: memory_backend,
614 conversation_id: view.conversation_id.map(|c| c.0),
615 recall_limit: view.recall_limit,
616 cross_session_score_threshold: view.cross_session_score_threshold,
617 context_strategy: view.context_strategy,
618 crossover_turn_threshold: view.crossover_turn_threshold,
619 cached_session_digest: view.cached_session_digest.clone(),
620 graph_config: view.graph_config.clone(),
621 document_config: view.document_config.clone(),
622 persona_config: view.persona_config.clone(),
623 trajectory_config: view.trajectory_config.clone(),
624 reasoning_config: view.reasoning_config.clone(),
625 memcot_config: view.memcot_config.clone(),
626 memcot_state: view.memcot_state.clone(),
627 tree_config: view.tree_config.clone(),
628 };
629
630 #[cfg(feature = "index")]
631 let index_access = view.index;
632 #[cfg(not(feature = "index"))]
633 let index_access: Option<&dyn zeph_context::input::IndexAccess> = None;
634
635 let router = crate::memory_backend::build_memory_router(view.context_manager);
636
637 let input = zeph_context::input::ContextAssemblyInput {
638 memory: &memory_view,
639 context_manager: view.context_manager,
640 token_counter: &*view.token_counter,
641 skills_prompt: view.last_skills_prompt,
642 index: index_access,
643 correction_config: view.correction_config,
644 sidequest_turn_counter: view.sidequest_turn_counter,
645 messages: window.messages,
646 query,
647 scrub: view.scrub,
648 active_levels,
649 router,
650 planned_next_tools: view.planned_next_tools,
651 };
652
653 let mut prepared = zeph_context::assembler::ContextAssembler::gather(&input).await?;
654
655 if view.tiered_retrieval_config.enabled {
660 prepared.recall = None;
661 }
662
663 for handle in prepared.background_tasks.drain(..) {
667 let task_supervisor = std::sync::Arc::clone(&view.task_supervisor);
668 task_supervisor.spawn(zeph_common::task_supervisor::TaskDescriptor {
669 name: "context.assembly.background",
670 restart: zeph_common::task_supervisor::RestartPolicy::RunOnce,
671 factory: {
672 let cell = std::sync::Arc::new(std::sync::Mutex::new(Some(async move {
673 let _ = handle.await;
674 })));
675 move || {
676 let f = cell.lock().ok().and_then(|mut g| g.take());
677 async move {
678 if let Some(f) = f {
679 f.await;
680 }
681 }
682 }
683 },
684 });
685 }
686
687 let (delta, inserted_count) = self.apply_prepared_context(window, view, prepared).await;
688
689 if view.tiered_retrieval_config.enabled {
690 self.inject_semantic_recall(query, usize::MAX, window, view)
691 .await?;
692 }
693
694 let memory_first_active =
699 view.context_strategy == zeph_config::ContextStrategy::MemoryFirst;
700 if let Some(fidelity_cfg) = view.fidelity_config
701 && fidelity_cfg.enabled
702 && !memory_first_active
703 {
704 use tracing::Instrument as _;
705 if let Some(ref tx) = view.status_tx {
706 let _ = tx.send("Scoring context fidelity\u{2026}".into());
707 }
708 let embed_provider = view
709 .fidelity_semantic_provider
710 .as_deref()
711 .map(|p| p as &dyn zeph_llm::LlmProviderDyn);
712 let compress_provider = view
713 .fidelity_compress_provider
714 .as_deref()
715 .map(|p| p as &dyn zeph_llm::LlmProviderDyn);
716 let fidelity_span = tracing::info_span!(
717 "context.fidelity.score",
718 message_count = window.messages.len(),
719 query_len = query.len(),
720 );
721 FidelityScorer
722 .score_and_apply(
723 window.messages,
724 query,
725 view.planned_next_tools,
726 fidelity_cfg,
727 &*view.token_counter,
728 inserted_count,
729 false, embed_provider,
731 compress_provider,
732 )
733 .instrument(fidelity_span)
734 .await;
735 persist_fidelity_tags(window.messages, view.memory.as_deref()).await;
737 recompute_prompt_tokens(window);
738 if let Some(ref tx) = view.status_tx {
739 let _ = tx.send(String::new());
740 }
741 }
742
743 Ok(delta)
744 }
745
746 #[allow(clippy::too_many_lines)] async fn apply_prepared_context(
756 &self,
757 window: &mut MessageWindowView<'_>,
758 view: &mut ContextAssemblyView<'_>,
759 prepared: zeph_context::assembler::PreparedContext,
760 ) -> (ContextDelta, usize) {
761 use std::borrow::Cow;
762 use zeph_llm::provider::{Message, MessageMetadata, Role};
763 use zeph_sanitizer::{ContentSource, ContentSourceKind, MemorySourceHint};
764
765 *view.last_recall_confidence = prepared.recall_confidence;
767
768 if prepared.memory_first {
770 let history_start = 1usize;
771 let len = window.messages.len();
772 let keep_tail =
773 zeph_context::assembler::memory_first_keep_tail(window.messages, history_start);
774 if len > history_start + keep_tail {
775 window.messages.drain(history_start..len - keep_tail);
776 recompute_prompt_tokens(window);
777 tracing::debug!(
778 strategy = "memory_first",
779 keep_tail,
780 "dropped conversation history, kept last {keep_tail} messages"
781 );
782 }
783 }
784
785 let mut inserted_count: usize = 0;
788
789 if let Some(msg) = prepared.graph_facts.filter(|_| window.messages.len() > 1) {
791 let sanitized = self
792 .sanitize_memory_message(msg, MemorySourceHint::ExternalContent, view)
793 .await;
794 window.messages.insert(1, sanitized);
795 inserted_count += 1;
796 tracing::debug!("injected knowledge graph facts into context");
797 }
798 if let Some(msg) = prepared.doc_rag.filter(|_| window.messages.len() > 1) {
799 let sanitized = self
800 .sanitize_memory_message(msg, MemorySourceHint::ExternalContent, view)
801 .await;
802 window.messages.insert(1, sanitized);
803 inserted_count += 1;
804 tracing::debug!("injected document RAG context");
805 }
806 if let Some(msg) = prepared.corrections.filter(|_| window.messages.len() > 1) {
807 let sanitized = self
808 .sanitize_memory_message(msg, MemorySourceHint::ConversationHistory, view)
809 .await;
810 window.messages.insert(1, sanitized);
811 inserted_count += 1;
812 tracing::debug!("injected past corrections into context");
813 }
814 if let Some(msg) = prepared.recall.filter(|_| window.messages.len() > 1) {
815 let sanitized = self
816 .sanitize_memory_message(msg, MemorySourceHint::ConversationHistory, view)
817 .await;
818 window.messages.insert(1, sanitized);
819 inserted_count += 1;
820 }
821 if let Some(msg) = prepared.cross_session.filter(|_| window.messages.len() > 1) {
822 let sanitized = self
823 .sanitize_memory_message(msg, MemorySourceHint::LlmSummary, view)
824 .await;
825 window.messages.insert(1, sanitized);
826 inserted_count += 1;
827 }
828 if let Some(msg) = prepared.summaries.filter(|_| window.messages.len() > 1) {
829 let sanitized = self
830 .sanitize_memory_message(msg, MemorySourceHint::LlmSummary, view)
831 .await;
832 window.messages.insert(1, sanitized);
833 inserted_count += 1;
834 tracing::debug!("injected summaries into context");
835 }
836 if let Some(msg) = prepared.persona_facts.filter(|_| window.messages.len() > 1) {
837 let sanitized = self
838 .sanitize_memory_message(msg, MemorySourceHint::ExternalContent, view)
839 .await;
840 window.messages.insert(1, sanitized);
841 inserted_count += 1;
842 tracing::debug!("injected persona facts into context");
843 }
844 if let Some(msg) = prepared
845 .trajectory_hints
846 .filter(|_| window.messages.len() > 1)
847 {
848 let sanitized = self
849 .sanitize_memory_message(msg, MemorySourceHint::ExternalContent, view)
850 .await;
851 window.messages.insert(1, sanitized);
852 inserted_count += 1;
853 tracing::debug!("injected trajectory hints into context");
854 }
855 if let Some(msg) = prepared.tree_memory.filter(|_| window.messages.len() > 1) {
856 let sanitized = self
857 .sanitize_memory_message(msg, MemorySourceHint::ExternalContent, view)
858 .await;
859 window.messages.insert(1, sanitized);
860 inserted_count += 1;
861 tracing::debug!("injected tree memory summary into context");
862 }
863 if let Some(msg) = prepared
864 .reasoning_hints
865 .filter(|_| window.messages.len() > 1)
866 {
867 let sanitized = self
868 .sanitize_memory_message(msg, MemorySourceHint::ExternalContent, view)
869 .await;
870 window.messages.insert(1, sanitized);
871 inserted_count += 1;
872 tracing::debug!("injected reasoning strategies into context");
873 }
874
875 let code_context = if let Some(text) = prepared.code_context {
877 let sanitized = view
878 .sanitizer
879 .sanitize(&text, ContentSource::new(ContentSourceKind::ToolResult));
880 view.metrics.sanitizer_runs += 1;
881 if !sanitized.injection_flags.is_empty() {
882 tracing::warn!(
883 flags = sanitized.injection_flags.len(),
884 "injection patterns detected in code RAG context"
885 );
886 view.metrics.sanitizer_injection_flags += sanitized.injection_flags.len() as u64;
887 let detail = sanitized
888 .injection_flags
889 .first()
890 .map_or_else(String::new, |f| {
891 format!("Detected pattern: {}", f.pattern_name)
892 });
893 view.security_events.push(
894 zeph_common::SecurityEventCategory::InjectionFlag,
895 "code_rag",
896 detail,
897 );
898 }
899 if sanitized.was_truncated {
900 view.metrics.sanitizer_truncations += 1;
901 view.security_events.push(
902 zeph_common::SecurityEventCategory::Truncation,
903 "code_rag",
904 "Content truncated to max_content_size".to_string(),
905 );
906 }
907 Some(sanitized.body)
908 } else {
909 None
910 };
911
912 if !prepared.memory_first {
913 self.trim_messages_to_budget(window, prepared.recent_history_budget);
914 }
915
916 if view.digest_enabled
918 && let Some((digest_text, _)) = view
919 .cached_session_digest
920 .clone()
921 .filter(|_| window.messages.len() > 1)
922 {
923 let digest_msg = Message {
924 role: Role::User,
925 content: format!("{}{digest_text}", crate::helpers::SESSION_DIGEST_PREFIX),
926 parts: vec![],
927 metadata: MessageMetadata::default(),
928 };
929 let sanitized = self
930 .sanitize_memory_message(digest_msg, MemorySourceHint::LlmSummary, view)
931 .await;
932 window.messages.insert(1, sanitized);
933 inserted_count += 1;
934 tracing::debug!("injected session digest into context");
935 }
936
937 if view.redact_credentials {
939 for msg in &mut *window.messages {
940 if msg.role == Role::System {
941 continue;
942 }
943 if let Cow::Owned(s) = (view.scrub)(&msg.content) {
944 msg.content = s;
945 }
946 }
947 }
948
949 recompute_prompt_tokens(window);
950
951 (ContextDelta { code_context }, inserted_count)
952 }
953
954 async fn sanitize_memory_message(
964 &self,
965 mut msg: zeph_llm::provider::Message,
966 hint: zeph_sanitizer::MemorySourceHint,
967 view: &mut ContextAssemblyView<'_>,
968 ) -> zeph_llm::provider::Message {
969 use zeph_sanitizer::{ContentSource, ContentSourceKind};
970
971 let source = ContentSource::new(ContentSourceKind::MemoryRetrieval).with_memory_hint(hint);
972 let sanitized = view.sanitizer.sanitize(&msg.content, source);
973 view.metrics.sanitizer_runs += 1;
974 if !sanitized.injection_flags.is_empty() {
975 tracing::warn!(
976 flags = sanitized.injection_flags.len(),
977 "injection patterns detected in memory retrieval"
978 );
979 view.metrics.sanitizer_injection_flags += sanitized.injection_flags.len() as u64;
980 let detail = sanitized
981 .injection_flags
982 .first()
983 .map_or_else(String::new, |f| {
984 format!("Detected pattern: {}", f.pattern_name)
985 });
986 view.security_events.push(
987 zeph_common::SecurityEventCategory::InjectionFlag,
988 "memory_retrieval",
989 detail,
990 );
991 }
992 if sanitized.was_truncated {
993 view.metrics.sanitizer_truncations += 1;
994 view.security_events.push(
995 zeph_common::SecurityEventCategory::Truncation,
996 "memory_retrieval",
997 "Content truncated to max_content_size".to_string(),
998 );
999 }
1000
1001 if view.sanitizer.is_enabled()
1003 && let Some(qs) = view.quarantine_summarizer
1004 && qs.should_quarantine(ContentSourceKind::MemoryRetrieval)
1005 {
1006 match qs.extract_facts(&sanitized, view.sanitizer).await {
1007 Ok((facts, flags)) => {
1008 view.metrics.quarantine_invocations += 1;
1009 view.security_events.push(
1010 zeph_common::SecurityEventCategory::Quarantine,
1011 "memory_retrieval",
1012 "Content quarantined, facts extracted".to_string(),
1013 );
1014 let escaped = zeph_sanitizer::ContentSanitizer::escape_delimiter_tags(&facts);
1015 msg.content = zeph_sanitizer::ContentSanitizer::apply_spotlight(
1016 &escaped,
1017 &sanitized.source,
1018 &flags,
1019 );
1020 return msg;
1021 }
1022 Err(e) => {
1023 tracing::warn!(
1024 error = %e,
1025 "quarantine failed for memory retrieval, using original sanitized content"
1026 );
1027 view.metrics.quarantine_failures += 1;
1028 view.security_events.push(
1029 zeph_common::SecurityEventCategory::Quarantine,
1030 "memory_retrieval",
1031 format!("Quarantine failed: {e}"),
1032 );
1033 }
1034 }
1035 }
1036
1037 msg.content = sanitized.body;
1038 msg
1039 }
1040
1041 pub fn reset_conversation(
1051 &self,
1052 window: &mut MessageWindowView<'_>,
1053 _view: &mut ContextAssemblyView<'_>,
1054 ) -> Result<(), ContextError> {
1055 self.clear_history(window);
1056 Ok(())
1057 }
1058
1059 #[allow(
1076 clippy::cast_precision_loss,
1077 clippy::cast_possible_truncation,
1078 clippy::cast_sign_loss,
1079 clippy::too_many_lines
1080 )]
1081 #[tracing::instrument(name = "agent_context.service.maybe_compact", skip_all, err)]
1082 pub async fn maybe_compact(
1083 &self,
1084 summ: &mut ContextSummarizationView<'_>,
1085 status: &(impl StatusSink + ?Sized),
1086 ) -> Result<(), ContextError> {
1087 use zeph_context::manager::{CompactionState, CompactionTier};
1088
1089 if let Some(count) = summ.context_manager.turns_since_last_hard_compaction_mut() {
1091 *count += 1;
1092 }
1093
1094 if let CompactionState::Exhausted { warned } = summ.context_manager.compaction_state()
1096 && !warned
1097 {
1098 summ.context_manager
1099 .set_compaction_state(CompactionState::Exhausted { warned: true });
1100 tracing::warn!("compaction exhausted: context budget too tight for this session");
1101 }
1102 if summ.context_manager.compaction_state().is_exhausted() {
1103 return Ok(());
1104 }
1105
1106 if summ.server_compaction_active {
1108 let budget = summ
1109 .context_manager
1110 .budget
1111 .as_ref()
1112 .map_or(0, ContextBudget::max_tokens);
1113 if budget > 0 {
1114 let fallback = (budget * 95 / 100) as u64;
1115 if *summ.cached_prompt_tokens < fallback {
1116 return Ok(());
1117 }
1118 tracing::warn!(
1119 "server compaction active but context at 95%+ — falling back to client-side"
1120 );
1121 } else {
1122 return Ok(());
1123 }
1124 }
1125
1126 if summ
1128 .context_manager
1129 .compaction_state()
1130 .is_compacted_this_turn()
1131 {
1132 return Ok(());
1133 }
1134
1135 let in_cooldown = summ.context_manager.compaction_state().cooldown_remaining() > 0;
1137 if in_cooldown
1138 && let CompactionState::Cooling { turns_remaining } =
1139 summ.context_manager.compaction_state()
1140 {
1141 let next = turns_remaining - 1;
1142 summ.context_manager.set_compaction_state(if next == 0 {
1143 CompactionState::Ready
1144 } else {
1145 CompactionState::Cooling {
1146 turns_remaining: next,
1147 }
1148 });
1149 }
1150
1151 if let Some(ref fidelity_cfg) = summ.fidelity_config.clone()
1155 && fidelity_cfg.enabled
1156 && summ.context_manager.should_proactively_regrade(
1157 *summ.cached_prompt_tokens,
1158 fidelity_cfg.regrade_threshold,
1159 summ.server_compaction_active,
1160 )
1161 {
1162 use tracing::Instrument as _;
1163 let regrade_embed_provider = summ
1164 .fidelity_semantic_provider
1165 .as_deref()
1166 .map(|p| p as &dyn zeph_llm::LlmProviderDyn);
1167 let regrade_compress_provider = summ
1168 .fidelity_compress_provider
1169 .as_deref()
1170 .map(|p| p as &dyn zeph_llm::LlmProviderDyn);
1171 FidelityScorer
1172 .score_and_apply(
1173 summ.messages,
1174 &summ.current_query,
1175 &[],
1176 fidelity_cfg,
1177 &*summ.token_counter,
1178 0,
1179 true, regrade_embed_provider,
1181 regrade_compress_provider,
1182 )
1183 .instrument(tracing::info_span!(
1184 "context.fidelity.regrade",
1185 budget_ratio = tracing::field::Empty,
1186 ))
1187 .await;
1188 persist_fidelity_tags(summ.messages, summ.memory.as_deref()).await;
1190 recompute_prompt_tokens_summ(summ);
1191 summ.context_manager.set_regraded_this_turn(true);
1192 tracing::debug!(
1193 cached_tokens = *summ.cached_prompt_tokens,
1194 "AgeMem proactive regrade complete"
1195 );
1196 }
1197
1198 match summ
1199 .context_manager
1200 .compaction_tier(*summ.cached_prompt_tokens)
1201 {
1202 CompactionTier::Soft => {
1203 self.do_soft_compaction(summ, status).await;
1204 Ok(())
1205 }
1206 CompactionTier::Hard => self.do_hard_compaction(summ, status, in_cooldown).await,
1207 _ => Ok(()),
1208 }
1209 }
1210
1211 #[allow(
1216 clippy::cast_precision_loss,
1217 clippy::cast_possible_truncation,
1218 clippy::cast_sign_loss
1219 )]
1220 async fn do_soft_compaction(
1221 &self,
1222 summ: &mut ContextSummarizationView<'_>,
1223 status: &(impl StatusSink + ?Sized),
1224 ) {
1225 status.send_status("soft compacting context...").await;
1226
1227 match &summ.context_manager.compression.pruning_strategy {
1229 zeph_config::PruningStrategy::Subgoal | zeph_config::PruningStrategy::SubgoalMig => {
1230 crate::summarization::scheduling::maybe_refresh_subgoal(summ);
1231 }
1232 _ => crate::summarization::scheduling::maybe_refresh_task_goal(summ),
1233 }
1234
1235 let applied = crate::summarization::deferred::apply_deferred_summaries(summ);
1237
1238 if applied > 0
1240 && summ
1241 .context_manager
1242 .compression
1243 .pruning_strategy
1244 .is_subgoal()
1245 {
1246 summ.subgoal_registry
1247 .rebuild_after_compaction(summ.messages, 0);
1248 }
1249
1250 let budget = summ
1252 .context_manager
1253 .budget
1254 .as_ref()
1255 .map_or(0, ContextBudget::max_tokens);
1256 let soft_threshold =
1257 (budget as f32 * summ.context_manager.soft_compaction_threshold) as usize;
1258 let cached = usize::try_from(*summ.cached_prompt_tokens).unwrap_or(usize::MAX);
1259 let min_to_free = cached.saturating_sub(soft_threshold);
1260 if min_to_free > 0 {
1261 crate::summarization::pruning::prune_tool_outputs(summ, min_to_free);
1262 }
1263
1264 status.send_status("").await;
1265 tracing::info!(
1266 cached_tokens = *summ.cached_prompt_tokens,
1267 soft_threshold,
1268 "soft compaction complete"
1269 );
1270 }
1271
1272 #[allow(
1274 clippy::cast_precision_loss,
1275 clippy::cast_possible_truncation,
1276 clippy::cast_sign_loss
1277 )]
1278 async fn do_hard_compaction(
1279 &self,
1280 summ: &mut ContextSummarizationView<'_>,
1281 status: &(impl StatusSink + ?Sized),
1282 in_cooldown: bool,
1283 ) -> Result<(), ContextError> {
1284 use zeph_context::manager::CompactionState;
1285
1286 let turns_since_last = summ
1288 .context_manager
1289 .turns_since_last_hard_compaction()
1290 .map(|t| u32::try_from(t).unwrap_or(u32::MAX));
1291 summ.context_manager
1292 .set_turns_since_last_hard_compaction(Some(0));
1293 if let Some(metrics) = summ.metrics {
1294 metrics.record_hard_compaction(turns_since_last);
1295 }
1296
1297 if in_cooldown {
1298 tracing::debug!(
1299 turns_remaining = summ.context_manager.compaction_state().cooldown_remaining(),
1300 "hard compaction skipped: cooldown active"
1301 );
1302 return Ok(());
1303 }
1304
1305 let budget = summ
1306 .context_manager
1307 .budget
1308 .as_ref()
1309 .map_or(0, ContextBudget::max_tokens);
1310 let hard_threshold =
1311 (budget as f32 * summ.context_manager.hard_compaction_threshold) as usize;
1312 let cached = usize::try_from(*summ.cached_prompt_tokens).unwrap_or(usize::MAX);
1313 let min_to_free = cached.saturating_sub(hard_threshold);
1314
1315 status.send_status("compacting context...").await;
1316
1317 crate::summarization::deferred::apply_deferred_summaries(summ);
1319
1320 let freed = crate::summarization::pruning::prune_tool_outputs(summ, min_to_free);
1322 if freed >= min_to_free {
1323 tracing::info!(freed, "hard compaction: pruning sufficient");
1324 summ.context_manager
1325 .set_compaction_state(CompactionState::CompactedThisTurn {
1326 cooldown: summ.context_manager.compaction_cooldown_turns(),
1327 });
1328 if let Err(e) = crate::summarization::deferred::flush_deferred_summaries(summ).await {
1329 tracing::warn!(%e, "flush_deferred_summaries failed after hard compaction");
1330 }
1331 status.send_status("").await;
1332 return Ok(());
1333 }
1334
1335 let preserve_tail = summ.context_manager.compaction_preserve_tail;
1337 let compactable = summ.messages.len().saturating_sub(preserve_tail + 1);
1338 if compactable <= 1 {
1339 tracing::warn!(
1340 compactable,
1341 "hard compaction: too few messages, marking exhausted"
1342 );
1343 summ.context_manager
1344 .set_compaction_state(CompactionState::Exhausted { warned: false });
1345 status.send_status("").await;
1346 return Ok(());
1347 }
1348
1349 tracing::info!(
1351 min_to_free,
1352 "hard compaction: falling back to LLM summarization"
1353 );
1354 let tokens_before = *summ.cached_prompt_tokens;
1355 let outcome = crate::summarization::compaction::compact_context(summ, None).await?;
1356
1357 let freed_tokens = tokens_before.saturating_sub(*summ.cached_prompt_tokens);
1358
1359 if !outcome.is_compacted() || freed_tokens == 0 {
1360 tracing::warn!("hard compaction: no net reduction, marking exhausted");
1361 summ.context_manager
1362 .set_compaction_state(CompactionState::Exhausted { warned: false });
1363 status.send_status("").await;
1364 return Ok(());
1365 }
1366
1367 if matches!(
1368 summ.context_manager
1369 .compaction_tier(*summ.cached_prompt_tokens),
1370 zeph_context::manager::CompactionTier::Hard
1371 ) {
1372 tracing::warn!(
1373 freed_tokens,
1374 "hard compaction: still above hard threshold after compaction, marking exhausted"
1375 );
1376 summ.context_manager
1377 .set_compaction_state(CompactionState::Exhausted { warned: false });
1378 status.send_status("").await;
1379 return Ok(());
1380 }
1381
1382 summ.context_manager
1383 .set_compaction_state(CompactionState::CompactedThisTurn {
1384 cooldown: summ.context_manager.compaction_cooldown_turns(),
1385 });
1386
1387 if tokens_before > *summ.cached_prompt_tokens {
1388 tracing::info!(
1389 tokens_before,
1390 tokens_after = *summ.cached_prompt_tokens,
1391 saved = freed_tokens,
1392 "context compaction complete"
1393 );
1394 }
1395
1396 status.send_status("").await;
1397 Ok(())
1398 }
1399
1400 #[tracing::instrument(name = "agent_context.service.maybe_summarize_tool_pair", skip_all)]
1406 pub async fn maybe_summarize_tool_pair(
1407 &self,
1408 summ: &mut ContextSummarizationView<'_>,
1409 providers: &ProviderHandles,
1410 ) {
1411 crate::summarization::deferred::maybe_summarize_tool_pair(
1412 summ,
1413 providers,
1414 &TxStatusSink(summ.status_tx.clone()),
1415 )
1416 .await;
1417 }
1418
1419 #[must_use]
1424 pub fn apply_deferred_summaries(&self, summ: &mut ContextSummarizationView<'_>) -> usize {
1425 crate::summarization::deferred::apply_deferred_summaries(summ)
1426 }
1427
1428 #[tracing::instrument(name = "agent_context.service.flush_deferred_summaries", skip_all)]
1433 pub async fn flush_deferred_summaries(&self, summ: &mut ContextSummarizationView<'_>) {
1434 if let Err(e) = crate::summarization::deferred::flush_deferred_summaries(summ).await {
1435 tracing::warn!(%e, "flush_deferred_summaries failed");
1436 }
1437 }
1438
1439 pub fn maybe_apply_deferred_summaries(&self, summ: &mut ContextSummarizationView<'_>) {
1445 crate::summarization::deferred::maybe_apply_deferred_summaries(summ);
1446 }
1447
1448 #[tracing::instrument(name = "agent_context.service.compact_context", skip_all, err)]
1463 pub async fn compact_context(
1464 &self,
1465 summ: &mut ContextSummarizationView<'_>,
1466 max_summary_tokens: Option<usize>,
1467 ) -> Result<crate::state::CompactionOutcome, crate::error::ContextError> {
1468 crate::summarization::compaction::compact_context(summ, max_summary_tokens).await
1469 }
1470
1471 pub fn maybe_soft_compact_mid_iteration(&self, summ: &mut ContextSummarizationView<'_>) {
1477 crate::summarization::scheduling::maybe_soft_compact_mid_iteration(summ);
1478 }
1479
1480 #[tracing::instrument(name = "agent_context.service.maybe_proactive_compress", skip_all)]
1486 pub async fn maybe_proactive_compress(
1487 &self,
1488 summ: &mut ContextSummarizationView<'_>,
1489 status: &(impl StatusSink + ?Sized),
1490 ) {
1491 let Some((_threshold, max_summary_tokens)) = summ
1492 .context_manager
1493 .should_proactively_compress(*summ.cached_prompt_tokens)
1494 else {
1495 return;
1496 };
1497
1498 if summ.server_compaction_active {
1499 let budget = summ
1500 .context_manager
1501 .budget
1502 .as_ref()
1503 .map_or(0, ContextBudget::max_tokens);
1504 if budget > 0 {
1505 let fallback = (budget * 95 / 100) as u64;
1506 if *summ.cached_prompt_tokens <= fallback {
1507 return;
1508 }
1509 tracing::warn!(
1510 cached_prompt_tokens = *summ.cached_prompt_tokens,
1511 fallback_threshold = fallback,
1512 "server compaction active but context at 95%+ — falling back to proactive"
1513 );
1514 } else {
1515 return;
1516 }
1517 }
1518
1519 status.send_status("compressing context...").await;
1520 tracing::info!(
1521 max_summary_tokens,
1522 cached_tokens = *summ.cached_prompt_tokens,
1523 "proactive compression triggered"
1524 );
1525
1526 match crate::summarization::compaction::compact_context(summ, Some(max_summary_tokens))
1527 .await
1528 {
1529 Ok(outcome) if outcome.is_compacted() => {
1530 summ.context_manager.set_compaction_state(
1531 zeph_context::manager::CompactionState::CompactedThisTurn { cooldown: 0 },
1532 );
1533 tracing::info!("proactive compression complete");
1534 }
1535 Ok(_) => {}
1536 Err(e) => tracing::warn!(%e, "proactive compression failed"),
1537 }
1538
1539 status.send_status("").await;
1540 }
1541
1542 pub fn maybe_refresh_task_goal(&self, summ: &mut ContextSummarizationView<'_>) {
1548 crate::summarization::scheduling::maybe_refresh_task_goal(summ);
1549 }
1550
1551 pub fn maybe_refresh_subgoal(&self, summ: &mut ContextSummarizationView<'_>) {
1556 crate::summarization::scheduling::maybe_refresh_subgoal(summ);
1557 }
1558}
1559
1560struct TxStatusSink(Option<tokio::sync::mpsc::UnboundedSender<String>>);
1566
1567impl StatusSink for TxStatusSink {
1568 fn send_status(&self, msg: &str) -> impl std::future::Future<Output = ()> + Send + '_ {
1569 if let Some(ref tx) = self.0 {
1570 let _ = tx.send(msg.to_owned());
1571 }
1572 std::future::ready(())
1573 }
1574}
1575
1576pub(crate) fn recompute_prompt_tokens(window: &mut MessageWindowView<'_>) {
1583 *window.cached_prompt_tokens = window
1584 .messages
1585 .iter()
1586 .map(|m| window.token_counter.count_message_tokens(m) as u64)
1587 .sum();
1588}
1589
1590async fn persist_fidelity_tags(
1599 messages: &[zeph_llm::provider::Message],
1600 memory: Option<&zeph_memory::semantic::SemanticMemory>,
1601) {
1602 let Some(mem) = memory else { return };
1603 let updates: Vec<(zeph_memory::MessageId, u8)> = messages
1604 .iter()
1605 .filter_map(|m| {
1606 let db_id = m.metadata.db_id?;
1607 let tag = m.metadata.fidelity_tag?;
1608 Some((zeph_memory::MessageId(db_id), tag as u8))
1609 })
1610 .collect();
1611 if updates.is_empty() {
1612 return;
1613 }
1614 if let Err(e) = mem.sqlite().update_fidelity_tags(&updates).await {
1615 tracing::warn!(
1616 count = updates.len(),
1617 error = %e,
1618 "failed to persist fidelity tags; floor invariant will not apply next turn"
1619 );
1620 }
1621}
1622
1623fn recompute_prompt_tokens_summ(summ: &mut crate::state::ContextSummarizationView<'_>) {
1627 *summ.cached_prompt_tokens = summ
1628 .messages
1629 .iter()
1630 .map(|m| summ.token_counter.count_message_tokens(m) as u64)
1631 .sum();
1632}
1633
1634pub(crate) fn remove_by_prefix(
1640 messages: &mut Vec<zeph_llm::provider::Message>,
1641 role: Role,
1642 prefix: &str,
1643) {
1644 messages.retain(|m| m.role != role || !m.content.starts_with(prefix));
1645}
1646
1647pub(crate) fn remove_by_part_or_prefix(
1656 messages: &mut Vec<zeph_llm::provider::Message>,
1657 prefix: &str,
1658 part_matches: impl Fn(&MessagePart) -> bool,
1659) {
1660 messages.retain(|m| {
1661 if m.role == Role::User {
1664 return !m.content.starts_with(prefix);
1665 }
1666 if m.role != Role::System {
1667 return true;
1668 }
1669 if m.parts.first().is_some_and(&part_matches) {
1670 return false;
1671 }
1672 !m.content.starts_with(prefix)
1673 });
1674}
1675
1676#[cfg(test)]
1677mod tests {
1678 use std::collections::HashSet;
1679 use std::sync::Arc;
1680
1681 use zeph_llm::provider::{Message, MessagePart, Role};
1682 use zeph_memory::TokenCounter;
1683
1684 use super::*;
1685 use crate::helpers::{GRAPH_FACTS_PREFIX, RECALL_PREFIX, SUMMARY_PREFIX};
1686 use crate::state::MessageWindowView;
1687
1688 fn make_counter() -> Arc<TokenCounter> {
1689 Arc::new(TokenCounter::default())
1690 }
1691
1692 fn make_window<'a>(
1693 messages: &'a mut Vec<Message>,
1694 cached: &'a mut u64,
1695 completed: &'a mut HashSet<String>,
1696 ) -> MessageWindowView<'a> {
1697 let last = Box::leak(Box::new(None::<i64>));
1698 let deferred_hide = Box::leak(Box::new(Vec::<i64>::new()));
1699 let deferred_summ = Box::leak(Box::new(Vec::<String>::new()));
1700 MessageWindowView {
1701 messages,
1702 last_persisted_message_id: last,
1703 deferred_db_hide_ids: deferred_hide,
1704 deferred_db_summaries: deferred_summ,
1705 cached_prompt_tokens: cached,
1706 token_counter: make_counter(),
1707 completed_tool_ids: completed,
1708 }
1709 }
1710
1711 fn sys(text: &str) -> Message {
1712 Message::from_legacy(Role::System, text)
1713 }
1714
1715 fn user(text: &str) -> Message {
1716 Message::from_legacy(Role::User, text)
1717 }
1718
1719 fn assistant(text: &str) -> Message {
1720 Message::from_legacy(Role::Assistant, text)
1721 }
1722
1723 #[test]
1724 fn clear_history_keeps_system_prompt() {
1725 let mut msgs = vec![sys("system"), user("hello"), assistant("hi")];
1726 let mut cached = 0u64;
1727 let mut completed = HashSet::new();
1728 completed.insert("tool_1".to_owned());
1729 let mut window = make_window(&mut msgs, &mut cached, &mut completed);
1730
1731 ContextService::new().clear_history(&mut window);
1732
1733 assert_eq!(window.messages.len(), 1);
1734 assert_eq!(window.messages[0].content, "system");
1735 assert!(
1736 window.completed_tool_ids.is_empty(),
1737 "completed_tool_ids must be cleared"
1738 );
1739 }
1740
1741 #[test]
1742 fn clear_history_empty_messages_is_noop() {
1743 let mut msgs: Vec<Message> = vec![];
1744 let mut cached = 0u64;
1745 let mut completed = HashSet::new();
1746 let mut window = make_window(&mut msgs, &mut cached, &mut completed);
1747
1748 ContextService::new().clear_history(&mut window);
1749
1750 assert!(window.messages.is_empty());
1751 }
1752
1753 #[test]
1754 fn remove_recall_messages_removes_by_prefix() {
1755 let mut msgs = vec![
1756 sys("system"),
1757 sys(&format!("{RECALL_PREFIX}some recalled text")),
1758 user("hello"),
1759 ];
1760 let mut cached = 0u64;
1761 let mut completed = HashSet::new();
1762 let mut window = make_window(&mut msgs, &mut cached, &mut completed);
1763
1764 ContextService::new().remove_recall_messages(&mut window);
1765
1766 assert_eq!(window.messages.len(), 2);
1767 assert!(
1768 window
1769 .messages
1770 .iter()
1771 .all(|m| !m.content.starts_with(RECALL_PREFIX))
1772 );
1773 }
1774
1775 #[test]
1778 fn remove_recall_messages_removes_user_role_recall() {
1779 let mut msgs = vec![
1780 sys("system"),
1781 user(&format!("{RECALL_PREFIX}recalled via tiered path")),
1782 user("real user message"),
1783 ];
1784 let mut cached = 0u64;
1785 let mut completed = HashSet::new();
1786 let mut window = make_window(&mut msgs, &mut cached, &mut completed);
1787
1788 ContextService::new().remove_recall_messages(&mut window);
1789
1790 assert_eq!(
1791 window.messages.len(),
1792 2,
1793 "Role::User recall message must be removed"
1794 );
1795 assert!(
1796 window
1797 .messages
1798 .iter()
1799 .all(|m| !m.content.starts_with(RECALL_PREFIX)),
1800 "no message with RECALL_PREFIX must remain"
1801 );
1802 assert!(
1803 window
1804 .messages
1805 .iter()
1806 .any(|m| m.content == "real user message"),
1807 "non-recall user message must survive"
1808 );
1809 }
1810
1811 #[test]
1812 fn remove_graph_facts_messages_removes_matching() {
1813 let mut msgs = vec![
1814 sys("system"),
1815 sys(&format!("{GRAPH_FACTS_PREFIX}fact1")),
1816 user("hello"),
1817 ];
1818 let mut cached = 0u64;
1819 let mut completed = HashSet::new();
1820 let mut window = make_window(&mut msgs, &mut cached, &mut completed);
1821
1822 ContextService::new().remove_graph_facts_messages(&mut window);
1823
1824 assert_eq!(window.messages.len(), 2);
1825 }
1826
1827 #[test]
1828 fn remove_summary_messages_removes_by_part() {
1829 let mut msgs = vec![
1830 sys("system"),
1831 Message::from_parts(
1832 Role::System,
1833 vec![MessagePart::Summary {
1834 text: format!("{SUMMARY_PREFIX}old summary"),
1835 }],
1836 ),
1837 user("hello"),
1838 ];
1839 let mut cached = 0u64;
1840 let mut completed = HashSet::new();
1841 let mut window = make_window(&mut msgs, &mut cached, &mut completed);
1842
1843 ContextService::new().remove_summary_messages(&mut window);
1844
1845 assert_eq!(window.messages.len(), 2);
1846 }
1847
1848 #[test]
1849 fn trim_messages_to_budget_zero_is_noop() {
1850 let mut msgs = vec![sys("system"), user("a"), assistant("b"), user("c")];
1851 let original_len = msgs.len();
1852 let mut cached = 0u64;
1853 let mut completed = HashSet::new();
1854 let mut window = make_window(&mut msgs, &mut cached, &mut completed);
1855
1856 ContextService::new().trim_messages_to_budget(&mut window, 0);
1857
1858 assert_eq!(window.messages.len(), original_len);
1859 }
1860
1861 #[test]
1862 fn trim_messages_to_budget_keeps_recent() {
1863 let mut msgs = vec![
1865 sys("system"),
1866 user("message 1"),
1867 assistant("reply 1"),
1868 user("message 2"),
1869 ];
1870 let mut cached = 0u64;
1871 let mut completed = HashSet::new();
1872 let mut window = make_window(&mut msgs, &mut cached, &mut completed);
1873
1874 ContextService::new().trim_messages_to_budget(&mut window, 1);
1876
1877 assert!(
1879 window.messages.len() < 4,
1880 "trim should remove some messages"
1881 );
1882 assert_eq!(
1883 window.messages[0].role,
1884 Role::System,
1885 "system prompt must survive trim"
1886 );
1887 }
1888
1889 mod inserted_count_tests {
1892 use parking_lot::RwLock;
1893 use std::borrow::Cow;
1894 use std::collections::HashSet;
1895 use std::sync::Arc;
1896
1897 use zeph_common::SecurityEventCategory;
1898 use zeph_config::memory::TieredRetrievalConfig;
1899 use zeph_config::{
1900 ContextFormat, ContextStrategy, DocumentConfig, GraphConfig, PersonaConfig,
1901 ReasoningConfig, TrajectoryConfig, TreeConfig,
1902 };
1903 use zeph_context::assembler::PreparedContext;
1904 use zeph_context::manager::ContextManager;
1905 use zeph_llm::provider::{Message, MessageMetadata, Role};
1906 use zeph_memory::TokenCounter;
1907 use zeph_sanitizer::ContentIsolationConfig;
1908 use zeph_sanitizer::ContentSanitizer;
1909 use zeph_skills::registry::SkillRegistry;
1910
1911 use super::super::*;
1912 use crate::state::{
1913 ContextAssemblyView, MessageWindowView, MetricsCounters, SecurityEventSink,
1914 };
1915
1916 fn make_task_supervisor() -> Arc<zeph_common::TaskSupervisor> {
1917 Arc::new(zeph_common::TaskSupervisor::new(
1918 tokio_util::sync::CancellationToken::new(),
1919 ))
1920 }
1921
1922 struct NoopSink;
1923 impl SecurityEventSink for NoopSink {
1924 fn push(&mut self, _: SecurityEventCategory, _: &'static str, _: String) {}
1925 }
1926
1927 fn make_counter() -> Arc<TokenCounter> {
1928 Arc::new(TokenCounter::default())
1929 }
1930
1931 fn make_window<'a>(
1932 messages: &'a mut Vec<Message>,
1933 cached: &'a mut u64,
1934 completed: &'a mut HashSet<String>,
1935 ) -> MessageWindowView<'a> {
1936 let last = Box::leak(Box::new(None::<i64>));
1937 let deferred_hide = Box::leak(Box::new(Vec::<i64>::new()));
1938 let deferred_summ = Box::leak(Box::new(Vec::<String>::new()));
1939 MessageWindowView {
1940 messages,
1941 last_persisted_message_id: last,
1942 deferred_db_hide_ids: deferred_hide,
1943 deferred_db_summaries: deferred_summ,
1944 cached_prompt_tokens: cached,
1945 token_counter: make_counter(),
1946 completed_tool_ids: completed,
1947 }
1948 }
1949
1950 fn mem_msg(content: &str) -> Message {
1951 Message {
1952 role: Role::User,
1953 content: content.to_string(),
1954 parts: vec![],
1955 metadata: MessageMetadata::default(),
1956 }
1957 }
1958
1959 fn scrub_noop(s: &str) -> Cow<'_, str> {
1960 Cow::Borrowed(s)
1961 }
1962
1963 #[tokio::test]
1964 async fn inserted_count_incremented_for_all_paths() {
1965 let mut msgs = vec![
1968 Message::from_legacy(Role::System, "system"),
1969 Message::from_legacy(Role::User, "user turn"),
1970 ];
1971 let mut cached = 0u64;
1972 let mut completed = HashSet::new();
1973 let mut window = make_window(&mut msgs, &mut cached, &mut completed);
1974
1975 let sanitizer = ContentSanitizer::new(&ContentIsolationConfig::default());
1976 let mut ctx_mgr = ContextManager::new();
1977 let mut sink = NoopSink;
1978 let mut last_confidence = None::<f32>;
1979 let mut last_skills_prompt = String::new();
1980 let mut active_skill_names = Vec::new();
1981 let registry = Arc::new(RwLock::new(SkillRegistry::default()));
1982
1983 let mut view = ContextAssemblyView {
1984 memory: None,
1985 conversation_id: None,
1986 recall_limit: 10,
1987 cross_session_score_threshold: 0.5,
1988 context_format: ContextFormat::default(),
1989 last_recall_confidence: &mut last_confidence,
1990 context_strategy: ContextStrategy::default(),
1991 crossover_turn_threshold: 0,
1992 cached_session_digest: None,
1993 digest_enabled: false, graph_config: GraphConfig::default(),
1995 document_config: DocumentConfig::default(),
1996 persona_config: PersonaConfig::default(),
1997 trajectory_config: TrajectoryConfig::default(),
1998 reasoning_config: ReasoningConfig::default(),
1999 memcot_config: zeph_config::MemCotConfig::default(),
2000 memcot_state: None,
2001 tree_config: TreeConfig::default(),
2002 last_skills_prompt: &mut last_skills_prompt,
2003 active_skill_names: &mut active_skill_names,
2004 skill_registry: registry,
2005 skill_paths: &[],
2006 correction_config: None,
2007 sidequest_turn_counter: 0,
2008 proactive_explorer: None,
2009 sanitizer: &sanitizer,
2010 quarantine_summarizer: None,
2011 context_manager: &mut ctx_mgr,
2012 token_counter: make_counter(),
2013 metrics: MetricsCounters::default(),
2014 security_events: &mut sink,
2015 cached_prompt_tokens: 0,
2016 redact_credentials: false,
2017 channel_skills: &[],
2018 scrub: scrub_noop,
2019 tiered_retrieval_config: TieredRetrievalConfig {
2020 enabled: false,
2021 ..TieredRetrievalConfig::default()
2022 },
2023 tiered_retrieval_classifier: None,
2024 tiered_retrieval_validator: None,
2025 fidelity_config: None,
2026 fidelity_semantic_provider: None,
2027 fidelity_compress_provider: None,
2028 planned_next_tools: &[],
2029 status_tx: None,
2030 task_supervisor: make_task_supervisor(),
2031 };
2032
2033 let prepared = PreparedContext {
2035 graph_facts: Some(mem_msg("graph_facts")),
2036 doc_rag: Some(mem_msg("doc_rag")),
2037 corrections: Some(mem_msg("corrections")),
2038 recall: Some(mem_msg("recall")),
2039 recall_confidence: Some(0.9),
2040 cross_session: Some(mem_msg("cross_session")),
2041 summaries: Some(mem_msg("summaries")),
2042 code_context: None, persona_facts: Some(mem_msg("persona_facts")),
2044 trajectory_hints: Some(mem_msg("trajectory_hints")),
2045 tree_memory: Some(mem_msg("tree_memory")),
2046 reasoning_hints: Some(mem_msg("reasoning_hints")),
2047 memory_first: false,
2048 recent_history_budget: 100_000,
2049 background_tasks: vec![],
2050 };
2051
2052 let (_delta, inserted_count) = ContextService::new()
2053 .apply_prepared_context(&mut window, &mut view, prepared)
2054 .await;
2055
2056 assert_eq!(
2059 inserted_count, 10,
2060 "all 10 message-carrying PreparedContext fields must increment inserted_count"
2061 );
2062 }
2063 }
2064
2065 mod inject_semantic_recall_tests {
2066 use parking_lot::RwLock;
2067 use std::borrow::Cow;
2068 use std::collections::HashSet;
2069 use std::sync::Arc;
2070
2071 use zeph_config::memory::TieredRetrievalConfig;
2072 use zeph_config::{
2073 ContextFormat, ContextStrategy, DocumentConfig, GraphConfig, PersonaConfig,
2074 ReasoningConfig, TrajectoryConfig, TreeConfig,
2075 };
2076 use zeph_context::manager::ContextManager;
2077 use zeph_llm::provider::Message;
2078 use zeph_memory::TokenCounter;
2079 use zeph_sanitizer::ContentIsolationConfig;
2080 use zeph_sanitizer::ContentSanitizer;
2081 use zeph_skills::registry::SkillRegistry;
2082
2083 use zeph_common::SecurityEventCategory;
2084
2085 use super::super::*;
2086 use crate::helpers::RECALL_PREFIX;
2087 use crate::state::{
2088 ContextAssemblyView, MessageWindowView, MetricsCounters, SecurityEventSink,
2089 };
2090
2091 fn make_task_supervisor() -> Arc<zeph_common::TaskSupervisor> {
2092 Arc::new(zeph_common::TaskSupervisor::new(
2093 tokio_util::sync::CancellationToken::new(),
2094 ))
2095 }
2096
2097 struct NoopSink;
2098 impl SecurityEventSink for NoopSink {
2099 fn push(&mut self, _: SecurityEventCategory, _: &'static str, _: String) {}
2100 }
2101
2102 fn make_counter() -> Arc<TokenCounter> {
2103 Arc::new(TokenCounter::default())
2104 }
2105
2106 fn make_window<'a>(
2107 messages: &'a mut Vec<Message>,
2108 cached: &'a mut u64,
2109 completed: &'a mut HashSet<String>,
2110 ) -> MessageWindowView<'a> {
2111 let last = Box::leak(Box::new(None::<i64>));
2112 let deferred_hide = Box::leak(Box::new(Vec::<i64>::new()));
2113 let deferred_summ = Box::leak(Box::new(Vec::<String>::new()));
2114 MessageWindowView {
2115 messages,
2116 last_persisted_message_id: last,
2117 deferred_db_hide_ids: deferred_hide,
2118 deferred_db_summaries: deferred_summ,
2119 cached_prompt_tokens: cached,
2120 token_counter: make_counter(),
2121 completed_tool_ids: completed,
2122 }
2123 }
2124
2125 fn scrub_noop(s: &str) -> Cow<'_, str> {
2126 Cow::Borrowed(s)
2127 }
2128
2129 #[tokio::test]
2130 async fn tiered_recall_disabled_uses_flat_path() {
2131 let mut msgs: Vec<Message> = vec![];
2134 let mut cached = 0u64;
2135 let mut completed = HashSet::new();
2136 let mut window = make_window(&mut msgs, &mut cached, &mut completed);
2137
2138 let sanitizer = ContentSanitizer::new(&ContentIsolationConfig::default());
2139 let mut ctx_mgr = ContextManager::new();
2140 let mut sink = NoopSink;
2141 let mut last_confidence = None::<f32>;
2142 let mut last_skills_prompt = String::new();
2143 let mut active_skill_names = Vec::new();
2144 let registry = Arc::new(RwLock::new(SkillRegistry::default()));
2145
2146 let view = ContextAssemblyView {
2147 memory: None,
2148 conversation_id: None,
2149 recall_limit: 10,
2150 cross_session_score_threshold: 0.5,
2151 context_format: ContextFormat::default(),
2152 last_recall_confidence: &mut last_confidence,
2153 context_strategy: ContextStrategy::default(),
2154 crossover_turn_threshold: 0,
2155 cached_session_digest: None,
2156 digest_enabled: false,
2157 graph_config: GraphConfig::default(),
2158 document_config: DocumentConfig::default(),
2159 persona_config: PersonaConfig::default(),
2160 trajectory_config: TrajectoryConfig::default(),
2161 reasoning_config: ReasoningConfig::default(),
2162 memcot_config: zeph_config::MemCotConfig::default(),
2163 memcot_state: None,
2164 tree_config: TreeConfig::default(),
2165 last_skills_prompt: &mut last_skills_prompt,
2166 active_skill_names: &mut active_skill_names,
2167 skill_registry: registry,
2168 skill_paths: &[],
2169 correction_config: None,
2170 sidequest_turn_counter: 0,
2171 proactive_explorer: None,
2172 sanitizer: &sanitizer,
2173 quarantine_summarizer: None,
2174 context_manager: &mut ctx_mgr,
2175 token_counter: make_counter(),
2176 metrics: MetricsCounters::default(),
2177 security_events: &mut sink,
2178 cached_prompt_tokens: 0,
2179 redact_credentials: false,
2180 channel_skills: &[],
2181 scrub: scrub_noop,
2182 tiered_retrieval_config: TieredRetrievalConfig {
2183 enabled: false,
2184 ..TieredRetrievalConfig::default()
2185 },
2186 tiered_retrieval_classifier: None,
2187 tiered_retrieval_validator: None,
2188 fidelity_config: None,
2189 fidelity_semantic_provider: None,
2190 fidelity_compress_provider: None,
2191 planned_next_tools: &[],
2192 status_tx: None,
2193 task_supervisor: make_task_supervisor(),
2194 };
2195
2196 let result = ContextService::new()
2197 .inject_semantic_recall("test query", 1000, &mut window, &view)
2198 .await;
2199
2200 assert!(result.is_ok(), "disabled tiered recall must return Ok(())");
2201 assert!(
2202 window
2203 .messages
2204 .iter()
2205 .all(|m| !m.content.starts_with(RECALL_PREFIX)),
2206 "no recall message must be injected when memory is None"
2207 );
2208 }
2209
2210 #[tokio::test]
2211 async fn tiered_recall_enabled_no_memory_returns_ok() {
2212 let mut msgs: Vec<Message> = vec![];
2215 let mut cached = 0u64;
2216 let mut completed = HashSet::new();
2217 let mut window = make_window(&mut msgs, &mut cached, &mut completed);
2218
2219 let sanitizer = ContentSanitizer::new(&ContentIsolationConfig::default());
2220 let mut ctx_mgr = ContextManager::new();
2221 let mut sink = NoopSink;
2222 let mut last_confidence = None::<f32>;
2223 let mut last_skills_prompt = String::new();
2224 let mut active_skill_names = Vec::new();
2225 let registry = Arc::new(RwLock::new(SkillRegistry::default()));
2226
2227 let view = ContextAssemblyView {
2228 memory: None,
2229 conversation_id: None,
2230 recall_limit: 10,
2231 cross_session_score_threshold: 0.5,
2232 context_format: ContextFormat::default(),
2233 last_recall_confidence: &mut last_confidence,
2234 context_strategy: ContextStrategy::default(),
2235 crossover_turn_threshold: 0,
2236 cached_session_digest: None,
2237 digest_enabled: false,
2238 graph_config: GraphConfig::default(),
2239 document_config: DocumentConfig::default(),
2240 persona_config: PersonaConfig::default(),
2241 trajectory_config: TrajectoryConfig::default(),
2242 reasoning_config: ReasoningConfig::default(),
2243 memcot_config: zeph_config::MemCotConfig::default(),
2244 memcot_state: None,
2245 tree_config: TreeConfig::default(),
2246 last_skills_prompt: &mut last_skills_prompt,
2247 active_skill_names: &mut active_skill_names,
2248 skill_registry: registry,
2249 skill_paths: &[],
2250 correction_config: None,
2251 sidequest_turn_counter: 0,
2252 proactive_explorer: None,
2253 sanitizer: &sanitizer,
2254 quarantine_summarizer: None,
2255 context_manager: &mut ctx_mgr,
2256 token_counter: make_counter(),
2257 metrics: MetricsCounters::default(),
2258 security_events: &mut sink,
2259 cached_prompt_tokens: 0,
2260 redact_credentials: false,
2261 channel_skills: &[],
2262 scrub: scrub_noop,
2263 tiered_retrieval_config: TieredRetrievalConfig {
2264 enabled: true,
2265 ..TieredRetrievalConfig::default()
2266 },
2267 tiered_retrieval_classifier: None,
2268 tiered_retrieval_validator: None,
2269 fidelity_config: None,
2270 fidelity_semantic_provider: None,
2271 fidelity_compress_provider: None,
2272 planned_next_tools: &[],
2273 status_tx: None,
2274 task_supervisor: make_task_supervisor(),
2275 };
2276
2277 let result = ContextService::new()
2278 .inject_semantic_recall("test query", 1000, &mut window, &view)
2279 .await;
2280
2281 assert!(
2282 result.is_ok(),
2283 "enabled tiered recall with no memory must return Ok(())"
2284 );
2285 assert!(
2286 window.messages.is_empty(),
2287 "no recall message must be injected when memory is None"
2288 );
2289 }
2290
2291 #[tokio::test]
2296 async fn prepare_context_tiered_enabled_no_budget_returns_default() {
2297 let mut msgs: Vec<zeph_llm::provider::Message> = vec![];
2298 let mut cached = 0u64;
2299 let mut completed = HashSet::new();
2300 let mut window = make_window(&mut msgs, &mut cached, &mut completed);
2301
2302 let sanitizer = zeph_sanitizer::ContentSanitizer::new(
2303 &zeph_sanitizer::ContentIsolationConfig::default(),
2304 );
2305 let mut ctx_mgr = zeph_context::manager::ContextManager::new();
2306 assert!(ctx_mgr.budget.is_none());
2308
2309 let mut sink = NoopSink;
2310 let mut last_confidence = None::<f32>;
2311 let mut last_skills_prompt = String::new();
2312 let mut active_skill_names = Vec::new();
2313 let registry = Arc::new(RwLock::new(zeph_skills::registry::SkillRegistry::default()));
2314
2315 let mut view = ContextAssemblyView {
2316 memory: None,
2317 conversation_id: None,
2318 recall_limit: 10,
2319 cross_session_score_threshold: 0.5,
2320 context_format: ContextFormat::default(),
2321 last_recall_confidence: &mut last_confidence,
2322 context_strategy: ContextStrategy::default(),
2323 crossover_turn_threshold: 0,
2324 cached_session_digest: None,
2325 digest_enabled: false,
2326 graph_config: GraphConfig::default(),
2327 document_config: DocumentConfig::default(),
2328 persona_config: PersonaConfig::default(),
2329 trajectory_config: TrajectoryConfig::default(),
2330 reasoning_config: ReasoningConfig::default(),
2331 memcot_config: zeph_config::MemCotConfig::default(),
2332 memcot_state: None,
2333 tree_config: TreeConfig::default(),
2334 last_skills_prompt: &mut last_skills_prompt,
2335 active_skill_names: &mut active_skill_names,
2336 skill_registry: registry,
2337 skill_paths: &[],
2338 correction_config: None,
2339 sidequest_turn_counter: 0,
2340 proactive_explorer: None,
2341 sanitizer: &sanitizer,
2342 quarantine_summarizer: None,
2343 context_manager: &mut ctx_mgr,
2344 token_counter: make_counter(),
2345 metrics: MetricsCounters::default(),
2346 security_events: &mut sink,
2347 cached_prompt_tokens: 0,
2348 redact_credentials: false,
2349 channel_skills: &[],
2350 scrub: scrub_noop,
2351 tiered_retrieval_config: TieredRetrievalConfig {
2352 enabled: true,
2353 ..TieredRetrievalConfig::default()
2354 },
2355 tiered_retrieval_classifier: None,
2356 tiered_retrieval_validator: None,
2357 fidelity_config: None,
2358 fidelity_semantic_provider: None,
2359 fidelity_compress_provider: None,
2360 planned_next_tools: &[],
2361 status_tx: None,
2362 task_supervisor: make_task_supervisor(),
2363 };
2364
2365 let result = ContextService::new()
2366 .prepare_context("test query", &mut window, &mut view)
2367 .await;
2368
2369 assert!(
2370 result.is_ok(),
2371 "prepare_context with tiered enabled and no budget must return Ok"
2372 );
2373 }
2374
2375 #[tokio::test]
2378 async fn inject_semantic_recall_bare_no_memory_returns_ok() {
2379 use zeph_config::memory::TieredRetrievalConfig;
2380
2381 let mut msgs: Vec<Message> = vec![];
2382 let mut cached = 0u64;
2383 let mut completed = HashSet::new();
2384 let mut window = make_window(&mut msgs, &mut cached, &mut completed);
2385
2386 let tiered_config = TieredRetrievalConfig {
2387 enabled: true,
2388 ..TieredRetrievalConfig::default()
2389 };
2390 let params = SemanticRecallParams {
2391 query: "test query",
2392 token_budget: 1000,
2393 recall_limit: 10,
2394 context_format: zeph_config::ContextFormat::default(),
2395 conversation_id: None,
2396 tiered_classifier: None,
2397 tiered_validator: None,
2398 tiered_config: &tiered_config,
2399 };
2400 let result = ContextService::new()
2401 .inject_semantic_recall_bare(params, &mut window, None)
2402 .await;
2403
2404 assert!(
2405 result.is_ok(),
2406 "inject_semantic_recall_bare with memory=None must return Ok(())"
2407 );
2408 assert!(
2409 window.messages.is_empty(),
2410 "no recall message must be injected when memory is None"
2411 );
2412 }
2413 }
2414}