zeph_agent_context/
service.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! [`ContextService`] — stateless façade for agent context-assembly operations.
5
6use zeph_context::budget::ContextBudget;
7use zeph_context::fidelity::FidelityScorer;
8use zeph_llm::LlmProvider;
9use zeph_llm::provider::{Message, MessagePart, Role};
10
11use crate::error::ContextError;
12use crate::helpers::{
13    CODE_CONTEXT_PREFIX, CORRECTIONS_PREFIX, CROSS_SESSION_PREFIX, DOCUMENT_RAG_PREFIX,
14    GRAPH_FACTS_PREFIX, LSP_NOTE_PREFIX, PERSONA_PREFIX, REASONING_PREFIX, RECALL_PREFIX,
15    SESSION_DIGEST_PREFIX, SUMMARY_PREFIX, TRAJECTORY_PREFIX, TREE_MEMORY_PREFIX,
16};
17use crate::state::{
18    ContextAssemblyView, ContextDelta, ContextSummarizationView, MessageWindowView,
19    ProviderHandles, StatusSink,
20};
21
22/// Configuration parameters for semantic recall injection.
23///
24/// Collects the 8 config-like arguments shared between the tiered and flat recall paths so
25/// callers do not need to pass them positionally to [`ContextService::inject_semantic_recall_bare`].
26///
27/// `window` and `memory` are kept as direct parameters on the method because they are
28/// mutable/output args rather than configuration.
29pub struct SemanticRecallParams<'a> {
30    /// Query string used for retrieval.
31    pub query: &'a str,
32    /// Maximum number of tokens the injected recall may consume.
33    pub token_budget: usize,
34    /// Maximum number of memories to retrieve (flat path only).
35    pub recall_limit: usize,
36    /// Format applied when serialising recalled memories.
37    pub context_format: zeph_config::ContextFormat,
38    /// Conversation scope used for tiered retrieval.
39    pub conversation_id: Option<zeph_memory::ConversationId>,
40    /// Optional LLM provider for intent classification (tiered path).
41    pub tiered_classifier: Option<&'a std::sync::Arc<zeph_llm::any::AnyProvider>>,
42    /// Optional LLM provider for result validation (tiered path).
43    pub tiered_validator: Option<&'a std::sync::Arc<zeph_llm::any::AnyProvider>>,
44    /// Tiered retrieval configuration controlling whether the tiered path is active.
45    pub tiered_config: &'a zeph_config::memory::TieredRetrievalConfig,
46}
47
48/// Stateless façade for agent context-assembly operations.
49///
50/// This struct has no fields. All state flows through method parameters, which allows the
51/// borrow checker to see disjoint `&mut` borrows at the call site without hiding them
52/// inside an opaque bundle.
53///
54/// Methods are `&self` — the type exists only to namespace the operations and give callers
55/// a single import.
56///
57/// # Examples
58///
59/// ```no_run
60/// use zeph_agent_context::service::ContextService;
61///
62/// let svc = ContextService::new();
63/// // call svc.prepare_context(...) or svc.clear_history(...)
64/// ```
65#[derive(Debug, Default)]
66pub struct ContextService;
67
68impl ContextService {
69    /// Create a new stateless `ContextService`.
70    ///
71    /// This is a zero-cost constructor — the struct has no fields.
72    #[must_use]
73    pub fn new() -> Self {
74        Self
75    }
76
77    // ── Trivial message-window mutators (PR1) ─────────────────────────────────
78
79    /// Clear the message history, preserving the system prompt.
80    ///
81    /// Keeps the first message (system prompt), clears the rest, and clears
82    /// `completed_tool_ids` — session-scoped dependency state resets with the history.
83    /// Recomputes `cached_prompt_tokens` inline after clearing.
84    pub fn clear_history(&self, window: &mut MessageWindowView<'_>) {
85        let system_prompt = window.messages.first().cloned();
86        window.messages.clear();
87        if let Some(sp) = system_prompt {
88            window.messages.push(sp);
89        }
90        window.completed_tool_ids.clear();
91        recompute_prompt_tokens(window);
92    }
93
94    /// Remove semantic recall messages from the window.
95    pub fn remove_recall_messages(&self, window: &mut MessageWindowView<'_>) {
96        remove_by_part_or_prefix(window.messages, RECALL_PREFIX, |p| {
97            matches!(p, MessagePart::Recall { .. })
98        });
99    }
100
101    /// Remove past-correction messages from the window.
102    pub fn remove_correction_messages(&self, window: &mut MessageWindowView<'_>) {
103        remove_by_prefix(window.messages, Role::System, CORRECTIONS_PREFIX);
104    }
105
106    /// Remove knowledge-graph fact messages from the window.
107    pub fn remove_graph_facts_messages(&self, window: &mut MessageWindowView<'_>) {
108        remove_by_prefix(window.messages, Role::System, GRAPH_FACTS_PREFIX);
109    }
110
111    /// Remove persona-facts messages from the window.
112    pub fn remove_persona_facts_messages(&self, window: &mut MessageWindowView<'_>) {
113        remove_by_prefix(window.messages, Role::System, PERSONA_PREFIX);
114    }
115
116    /// Remove trajectory-hint messages from the window.
117    pub fn remove_trajectory_hints_messages(&self, window: &mut MessageWindowView<'_>) {
118        remove_by_prefix(window.messages, Role::System, TRAJECTORY_PREFIX);
119    }
120
121    /// Remove tree-memory summary messages from the window.
122    pub fn remove_tree_memory_messages(&self, window: &mut MessageWindowView<'_>) {
123        remove_by_prefix(window.messages, Role::System, TREE_MEMORY_PREFIX);
124    }
125
126    /// Remove reasoning-strategy messages from the window.
127    pub fn remove_reasoning_strategies_messages(&self, window: &mut MessageWindowView<'_>) {
128        remove_by_prefix(window.messages, Role::System, REASONING_PREFIX);
129    }
130
131    /// Remove previously injected LSP context notes from the window.
132    ///
133    /// Called before injecting fresh notes each turn so stale diagnostics/hover
134    /// data from the previous tool call do not accumulate across iterations.
135    pub fn remove_lsp_messages(&self, window: &mut MessageWindowView<'_>) {
136        remove_by_prefix(window.messages, Role::System, LSP_NOTE_PREFIX);
137    }
138
139    /// Remove code-context (repo-map / file context) messages from the window.
140    pub fn remove_code_context_messages(&self, window: &mut MessageWindowView<'_>) {
141        remove_by_part_or_prefix(window.messages, CODE_CONTEXT_PREFIX, |p| {
142            matches!(p, MessagePart::CodeContext { .. })
143        });
144    }
145
146    /// Remove session-summary messages from the window.
147    pub fn remove_summary_messages(&self, window: &mut MessageWindowView<'_>) {
148        remove_by_part_or_prefix(window.messages, SUMMARY_PREFIX, |p| {
149            matches!(p, MessagePart::Summary { .. })
150        });
151    }
152
153    /// Remove cross-session context messages from the window.
154    pub fn remove_cross_session_messages(&self, window: &mut MessageWindowView<'_>) {
155        remove_by_part_or_prefix(window.messages, CROSS_SESSION_PREFIX, |p| {
156            matches!(p, MessagePart::CrossSession { .. })
157        });
158    }
159
160    /// Remove the session-digest user message from the window.
161    pub fn remove_session_digest_message(&self, window: &mut MessageWindowView<'_>) {
162        remove_by_prefix(window.messages, Role::User, SESSION_DIGEST_PREFIX);
163    }
164
165    /// Remove document-RAG messages from the window.
166    pub fn remove_document_rag_messages(&self, window: &mut MessageWindowView<'_>) {
167        remove_by_prefix(window.messages, Role::System, DOCUMENT_RAG_PREFIX);
168    }
169
170    /// Trim the non-system message tail to fit within `token_budget` tokens.
171    ///
172    /// Keeps the system prefix intact and the most recent messages, removing
173    /// older messages from the start of the conversation history until the
174    /// token count fits the budget. Recomputes `cached_prompt_tokens` after trimming.
175    ///
176    /// No-op when `token_budget` is zero.
177    pub fn trim_messages_to_budget(&self, window: &mut MessageWindowView<'_>, token_budget: usize) {
178        if token_budget == 0 {
179            return;
180        }
181
182        // Find the first non-system message index (skip system prefix).
183        let history_start = window
184            .messages
185            .iter()
186            .position(|m| m.role != Role::System)
187            .unwrap_or(window.messages.len());
188
189        if history_start >= window.messages.len() {
190            return;
191        }
192
193        let mut total = 0usize;
194        let mut keep_from = window.messages.len();
195
196        for i in (history_start..window.messages.len()).rev() {
197            let msg_tokens = window
198                .token_counter
199                .count_message_tokens(&window.messages[i]);
200            if total + msg_tokens > token_budget {
201                break;
202            }
203            total += msg_tokens;
204            keep_from = i;
205        }
206
207        if keep_from > history_start {
208            let removed = keep_from - history_start;
209            window.messages.drain(history_start..keep_from);
210            recompute_prompt_tokens(window);
211            tracing::info!(
212                removed,
213                token_budget,
214                "trimmed messages to fit context budget"
215            );
216        }
217    }
218
219    // ── prepare_context family (PR2) ─────────────────────────────────────────
220
221    /// Inject semantic recall messages into the window for the given query.
222    ///
223    /// Removes any existing recall messages first, fetches fresh recall up to
224    /// `token_budget` tokens, and inserts the result at position 1 (immediately
225    /// after the system prompt).
226    ///
227    /// # Errors
228    ///
229    /// Returns [`ContextError::Memory`] if the recall backend returns an error.
230    #[tracing::instrument(name = "agent_context.service.inject_semantic_recall", skip_all, err)]
231    pub async fn inject_semantic_recall(
232        &self,
233        query: &str,
234        token_budget: usize,
235        window: &mut MessageWindowView<'_>,
236        view: &ContextAssemblyView<'_>,
237    ) -> Result<(), ContextError> {
238        self.remove_recall_messages(window);
239
240        let params = SemanticRecallParams {
241            query,
242            token_budget,
243            recall_limit: view.recall_limit,
244            context_format: view.context_format,
245            conversation_id: view.conversation_id,
246            tiered_classifier: view.tiered_retrieval_classifier.as_ref(),
247            tiered_validator: view.tiered_retrieval_validator.as_ref(),
248            tiered_config: &view.tiered_retrieval_config,
249        };
250        let msg = self
251            .run_tiered_recall(&params, window, view.memory.as_deref())
252            .await?;
253
254        if let Some(msg) = msg
255            && window.messages.len() > 1
256        {
257            window.messages.insert(1, msg);
258        }
259
260        Ok(())
261    }
262
263    /// Inject semantic recall without a full [`ContextAssemblyView`].
264    ///
265    /// This variant is called from `Agent::inject_semantic_recall` in `zeph-core`, where
266    /// constructing a full `ContextAssemblyView` would require duplicating all of
267    /// `prepare_context`'s setup. It carries only the fields that
268    /// `inject_semantic_recall` actually reads, enabling tiered retrieval on the
269    /// hot-path turn loop without the overhead of the full view.
270    ///
271    /// # Errors
272    ///
273    /// Returns [`ContextError::Memory`] if the recall backend returns an error.
274    #[tracing::instrument(
275        name = "agent_context.service.inject_semantic_recall_bare",
276        skip_all,
277        err
278    )]
279    pub async fn inject_semantic_recall_bare(
280        &self,
281        params: SemanticRecallParams<'_>,
282        window: &mut MessageWindowView<'_>,
283        memory: Option<&zeph_memory::semantic::SemanticMemory>,
284    ) -> Result<(), ContextError> {
285        self.remove_recall_messages(window);
286
287        let msg = self.run_tiered_recall(&params, window, memory).await?;
288
289        if let Some(msg) = msg
290            && window.messages.len() > 1
291        {
292            window.messages.insert(1, msg);
293        }
294
295        Ok(())
296    }
297
298    /// Execute tiered or flat semantic recall and return the message to inject, if any.
299    ///
300    /// Both `inject_semantic_recall` and `inject_semantic_recall_bare` share identical
301    /// retrieval logic; this method holds the single implementation.
302    async fn run_tiered_recall(
303        &self,
304        params: &SemanticRecallParams<'_>,
305        window: &MessageWindowView<'_>,
306        memory: Option<&zeph_memory::semantic::SemanticMemory>,
307    ) -> Result<Option<Message>, ContextError> {
308        if params.tiered_config.enabled {
309            use tracing::Instrument as _;
310            let Some(mem) = memory else {
311                return Ok(None);
312            };
313            let result = tokio::time::timeout(
314                std::time::Duration::from_secs(30),
315                zeph_memory::recall_tiered(
316                    mem,
317                    params.query,
318                    params.conversation_id,
319                    params.tiered_classifier,
320                    params.tiered_validator,
321                    params.tiered_config,
322                    Some(params.token_budget),
323                )
324                .instrument(tracing::info_span!("agent_context.tiered_retrieval.recall")),
325            )
326            .await
327            .map_err(|_| {
328                tracing::warn!("tiered_retrieval: recall_tiered timed out after 30s");
329                ContextError::Memory(zeph_memory::MemoryError::Timeout(
330                    "recall_tiered timed out".to_owned(),
331                ))
332            })?
333            .map_err(ContextError::Memory)?;
334
335            tracing::debug!(
336                intent = %result.intent,
337                tokens_used = result.tokens_used,
338                tier_escalated = result.tier_escalated,
339                count = result.messages.len(),
340                "tiered_retrieval: recall complete"
341            );
342
343            if result.messages.is_empty() {
344                return Ok(None);
345            }
346
347            let recalled_text = result
348                .messages
349                .iter()
350                .map(|m| m.message.content.as_str())
351                .collect::<Vec<_>>()
352                .join("\n---\n");
353            Ok(Some(Message::from_legacy(
354                Role::User,
355                format!("{RECALL_PREFIX}{recalled_text}"),
356            )))
357        } else {
358            let (msg, _score) = crate::helpers::fetch_semantic_recall_raw(
359                memory,
360                params.recall_limit,
361                params.context_format,
362                params.query,
363                params.token_budget,
364                &window.token_counter,
365                None,
366                None,
367            )
368            .await?;
369            Ok(msg)
370        }
371    }
372
373    /// Inject cross-session context messages into the window for the given query.
374    ///
375    /// Removes any existing cross-session messages first, fetches fresh cross-session
376    /// context for the current conversation, and inserts the result at position 1.
377    ///
378    /// # Errors
379    ///
380    /// Returns [`ContextError::Memory`] if the memory backend returns an error.
381    #[tracing::instrument(
382        name = "agent_context.service.inject_cross_session_context",
383        skip_all,
384        err
385    )]
386    pub async fn inject_cross_session_context(
387        &self,
388        query: &str,
389        token_budget: usize,
390        window: &mut MessageWindowView<'_>,
391        view: &ContextAssemblyView<'_>,
392    ) -> Result<(), ContextError> {
393        self.remove_cross_session_messages(window);
394
395        if let Some(msg) = crate::helpers::fetch_cross_session_raw(
396            view.memory.as_deref(),
397            view.conversation_id,
398            view.cross_session_score_threshold,
399            query,
400            token_budget,
401            &view.token_counter,
402        )
403        .await?
404            && window.messages.len() > 1
405        {
406            window.messages.insert(1, msg);
407            tracing::debug!("injected cross-session context");
408        }
409
410        Ok(())
411    }
412
413    /// Inject conversation-summary messages into the window.
414    ///
415    /// Removes any existing summary messages first, fetches stored summaries for the
416    /// current conversation, and inserts the result at position 1.
417    ///
418    /// # Errors
419    ///
420    /// Returns [`ContextError::Memory`] if the memory backend returns an error.
421    #[tracing::instrument(name = "agent_context.service.inject_summaries", skip_all, err)]
422    pub async fn inject_summaries(
423        &self,
424        token_budget: usize,
425        window: &mut MessageWindowView<'_>,
426        view: &ContextAssemblyView<'_>,
427    ) -> Result<(), ContextError> {
428        self.remove_summary_messages(window);
429
430        if let Some(msg) = crate::helpers::fetch_summaries_raw(
431            view.memory.as_deref(),
432            view.conversation_id,
433            token_budget,
434            &view.token_counter,
435        )
436        .await?
437            && window.messages.len() > 1
438        {
439            window.messages.insert(1, msg);
440            tracing::debug!("injected summaries into context");
441        }
442
443        Ok(())
444    }
445
446    /// Select the best-matching skill among ambiguous candidates via an LLM classification call.
447    ///
448    /// Returns the reordered index list with the most likely skill first, or `None` if the
449    /// LLM call fails (caller falls back to original score order).
450    #[tracing::instrument(name = "agent_context.service.disambiguate_skills", skip_all)]
451    pub async fn disambiguate_skills(
452        &self,
453        query: &str,
454        all_meta: &[&zeph_skills::loader::SkillMeta],
455        scored: &[zeph_skills::ScoredMatch],
456        providers: &ProviderHandles,
457    ) -> Option<Vec<usize>> {
458        use std::fmt::Write as _;
459
460        let mut candidates = String::new();
461        for sm in scored {
462            if let Some(meta) = all_meta.get(sm.index) {
463                let _ = writeln!(
464                    candidates,
465                    "- {} (score: {:.3}): {}",
466                    meta.name, sm.score, meta.description
467                );
468            }
469        }
470
471        let prompt = format!(
472            "The user said: \"{query}\"\n\n\
473             These skills matched with similar scores:\n{candidates}\n\
474             Which skill best matches the user's intent? \
475             Return the skill_name, your confidence (0-1), and any extracted parameters."
476        );
477
478        let messages = vec![zeph_llm::provider::Message::from_legacy(
479            zeph_llm::provider::Role::User,
480            prompt,
481        )];
482        match providers
483            .disambiguate
484            .chat_typed::<zeph_skills::IntentClassification>(&messages)
485            .await
486        {
487            Ok(classification) => {
488                tracing::info!(
489                    skill = %classification.skill_name,
490                    confidence = classification.confidence,
491                    "disambiguation selected skill"
492                );
493                let mut indices: Vec<usize> = scored.iter().map(|s| s.index).collect();
494                if let Some(pos) = indices.iter().position(|&i| {
495                    all_meta
496                        .get(i)
497                        .is_some_and(|m| m.name == classification.skill_name)
498                }) {
499                    indices.swap(0, pos);
500                }
501                Some(indices)
502            }
503            Err(e) => {
504                tracing::warn!("disambiguation failed, using original order: {e:#}");
505                None
506            }
507        }
508    }
509
510    /// Prepare the context window for the current turn.
511    ///
512    /// Removes stale injection messages, runs proactive skill exploration, gathers
513    /// semantic recall and graph facts via the concurrent assembler, applies the
514    /// retrieval policy, and injects fresh context. Returns a [`ContextDelta`] whose
515    /// `code_context` field must be applied by the caller (via `inject_code_context`).
516    ///
517    /// # Errors
518    ///
519    /// Returns [`ContextError::Memory`] if recall fails or [`ContextError::Assembler`]
520    /// if the context assembler encounters an internal error.
521    #[allow(clippy::too_many_lines)] // sequential context-assembly pipeline; splitting would reduce readability
522    #[tracing::instrument(name = "agent_context.service.prepare_context", skip_all, err)]
523    pub async fn prepare_context(
524        &self,
525        query: &str,
526        window: &mut MessageWindowView<'_>,
527        view: &mut ContextAssemblyView<'_>,
528    ) -> Result<ContextDelta, ContextError> {
529        if view.context_manager.budget.is_none() {
530            return Ok(ContextDelta::default());
531        }
532
533        // Remove stale injected messages before concurrent fetch.
534        self.remove_session_digest_message(window);
535        self.remove_summary_messages(window);
536        self.remove_cross_session_messages(window);
537        self.remove_recall_messages(window);
538        self.remove_document_rag_messages(window);
539        self.remove_correction_messages(window);
540        self.remove_code_context_messages(window);
541        self.remove_graph_facts_messages(window);
542        self.remove_persona_facts_messages(window);
543        self.remove_trajectory_hints_messages(window);
544        self.remove_tree_memory_messages(window);
545        if view.reasoning_config.enabled {
546            self.remove_reasoning_strategies_messages(window);
547        }
548
549        // Proactive world-knowledge exploration (feature-gated, #3320).
550        if let Some(explorer) = view.proactive_explorer.clone()
551            && let Some(domain) = explorer.classify(query)
552        {
553            let already_known = {
554                let registry_guard = view.skill_registry.read();
555                explorer.has_knowledge(&registry_guard, &domain)
556            };
557            let excluded = explorer.is_excluded(&domain);
558
559            if !already_known && !excluded {
560                tracing::debug!(domain = %domain.0, query_len = query.len(), "proactive.explore triggered");
561                let timeout_ms = explorer.timeout_ms();
562                let result = tokio::time::timeout(
563                    std::time::Duration::from_millis(timeout_ms),
564                    explorer.explore(&domain),
565                )
566                .await;
567                match result {
568                    Ok(Ok(())) => {
569                        view.skill_registry.write().reload(view.skill_paths);
570                        tracing::debug!(domain = %domain.0, "proactive.explore complete, registry reloaded");
571                    }
572                    Ok(Err(e)) => {
573                        tracing::warn!(domain = %domain.0, error = %e, "proactive exploration failed");
574                    }
575                    Err(_) => {
576                        tracing::warn!(domain = %domain.0, timeout_ms, "proactive exploration timed out");
577                    }
578                }
579            }
580        }
581
582        // Compression-spectrum retrieval policy (#3305, #3455).
583        let active_levels: &'static [zeph_memory::compression::CompressionLevel] =
584            if let Some(ref budget) = view.context_manager.budget {
585                let used = view.cached_prompt_tokens;
586                let max = budget.max_tokens();
587                #[allow(clippy::cast_precision_loss)]
588                let remaining_ratio = if max == 0 {
589                    1.0_f32
590                } else {
591                    1.0 - (used as f32 / max as f32).clamp(0.0, 1.0)
592                };
593                let levels =
594                    zeph_memory::compression::RetrievalPolicy::default().select(remaining_ratio);
595                tracing::debug!(
596                    remaining_ratio,
597                    active_levels = ?levels,
598                    "compression_spectrum: retrieval policy selected"
599                );
600                levels
601            } else {
602                &[]
603            };
604
605        let memory_backend: Option<std::sync::Arc<dyn zeph_common::memory::ContextMemoryBackend>> =
606            view.memory.clone().map(
607                |m| -> std::sync::Arc<dyn zeph_common::memory::ContextMemoryBackend> {
608                    std::sync::Arc::new(crate::memory_backend::SemanticMemoryBackend::new(m))
609                },
610            );
611
612        let memory_view = zeph_context::input::ContextMemoryView {
613            memory: memory_backend,
614            conversation_id: view.conversation_id.map(|c| c.0),
615            recall_limit: view.recall_limit,
616            cross_session_score_threshold: view.cross_session_score_threshold,
617            context_strategy: view.context_strategy,
618            crossover_turn_threshold: view.crossover_turn_threshold,
619            cached_session_digest: view.cached_session_digest.clone(),
620            graph_config: view.graph_config.clone(),
621            document_config: view.document_config.clone(),
622            persona_config: view.persona_config.clone(),
623            trajectory_config: view.trajectory_config.clone(),
624            reasoning_config: view.reasoning_config.clone(),
625            memcot_config: view.memcot_config.clone(),
626            memcot_state: view.memcot_state.clone(),
627            tree_config: view.tree_config.clone(),
628        };
629
630        #[cfg(feature = "index")]
631        let index_access = view.index;
632        #[cfg(not(feature = "index"))]
633        let index_access: Option<&dyn zeph_context::input::IndexAccess> = None;
634
635        let router = crate::memory_backend::build_memory_router(view.context_manager);
636
637        let input = zeph_context::input::ContextAssemblyInput {
638            memory: &memory_view,
639            context_manager: view.context_manager,
640            token_counter: &*view.token_counter,
641            skills_prompt: view.last_skills_prompt,
642            index: index_access,
643            correction_config: view.correction_config,
644            sidequest_turn_counter: view.sidequest_turn_counter,
645            messages: window.messages,
646            query,
647            scrub: view.scrub,
648            active_levels,
649            router,
650            planned_next_tools: view.planned_next_tools,
651        };
652
653        let mut prepared = zeph_context::assembler::ContextAssembler::gather(&input).await?;
654
655        // When tiered retrieval is enabled, suppress the flat recall assembled above and
656        // replace it with the tiered result injected directly into the window.  The span
657        // `agent_context.tiered_retrieval.recall` will appear in traces for every enabled
658        // turn, satisfying the observability requirement in issue #3996.
659        if view.tiered_retrieval_config.enabled {
660            prepared.recall = None;
661        }
662
663        // Drain background handles produced during assembly (e.g. mark_reasoning_used) and
664        // register them with the supervisor so they are tracked and abortable.  Must happen
665        // before `apply_prepared_context` consumes `prepared` to avoid silent drops.
666        for handle in prepared.background_tasks.drain(..) {
667            let task_supervisor = std::sync::Arc::clone(&view.task_supervisor);
668            task_supervisor.spawn(zeph_common::task_supervisor::TaskDescriptor {
669                name: "context.assembly.background",
670                restart: zeph_common::task_supervisor::RestartPolicy::RunOnce,
671                factory: {
672                    let cell = std::sync::Arc::new(std::sync::Mutex::new(Some(async move {
673                        let _ = handle.await;
674                    })));
675                    move || {
676                        let f = cell.lock().ok().and_then(|mut g| g.take());
677                        async move {
678                            if let Some(f) = f {
679                                f.await;
680                            }
681                        }
682                    }
683                },
684            });
685        }
686
687        let (delta, inserted_count) = self.apply_prepared_context(window, view, prepared).await;
688
689        if view.tiered_retrieval_config.enabled {
690            self.inject_semantic_recall(query, usize::MAX, window, view)
691                .await?;
692        }
693
694        // T-06: Fidelity scoring (INV-01: AFTER apply_prepared_context returns).
695        // Guard: skip when MemoryFirst is active (INV-11 / AC-09) or config absent/disabled.
696        // Spec AC-09: when memory_first=true the scorer MUST NOT run — the caller (here) is
697        // responsible for this bypass; FidelityScorer itself is stateless and has no memory of it.
698        let memory_first_active =
699            view.context_strategy == zeph_config::ContextStrategy::MemoryFirst;
700        if let Some(fidelity_cfg) = view.fidelity_config
701            && fidelity_cfg.enabled
702            && !memory_first_active
703        {
704            use tracing::Instrument as _;
705            if let Some(ref tx) = view.status_tx {
706                let _ = tx.send("Scoring context fidelity\u{2026}".into());
707            }
708            let embed_provider = view
709                .fidelity_semantic_provider
710                .as_deref()
711                .map(|p| p as &dyn zeph_llm::LlmProviderDyn);
712            let compress_provider = view
713                .fidelity_compress_provider
714                .as_deref()
715                .map(|p| p as &dyn zeph_llm::LlmProviderDyn);
716            let fidelity_span = tracing::info_span!(
717                "context.fidelity.score",
718                message_count = window.messages.len(),
719                query_len = query.len(),
720            );
721            FidelityScorer
722                .score_and_apply(
723                    window.messages,
724                    query,
725                    view.planned_next_tools,
726                    fidelity_cfg,
727                    &*view.token_counter,
728                    inserted_count,
729                    false, // floor invariant enforced on normal scoring path
730                    embed_provider,
731                    compress_provider,
732                )
733                .instrument(fidelity_span)
734                .await;
735            // Persist fidelity tags so subsequent turns see the floor invariant.
736            persist_fidelity_tags(window.messages, view.memory.as_deref()).await;
737            recompute_prompt_tokens(window);
738            if let Some(ref tx) = view.status_tx {
739                let _ = tx.send(String::new());
740            }
741        }
742
743        Ok(delta)
744    }
745
746    /// Apply a [`PreparedContext`] to the message window.
747    ///
748    /// Injects all fetched messages in insertion order (`doc_rag` → corrections → recall →
749    /// cross-session → summaries → persona → trajectory → tree → reasoning), handles
750    /// `MemoryFirst` history drain, sanitizes memory content, trims to budget, and injects
751    /// the session digest. Returns a [`ContextDelta`] whose `code_context` field the caller
752    /// must apply via `inject_code_context`, plus the count of messages freshly inserted at
753    /// indices `1..1+inserted_count` (used by the fidelity scorer as the exempt range — INV-10).
754    #[allow(clippy::too_many_lines)] // sequential message injection: order matters, cannot split
755    async fn apply_prepared_context(
756        &self,
757        window: &mut MessageWindowView<'_>,
758        view: &mut ContextAssemblyView<'_>,
759        prepared: zeph_context::assembler::PreparedContext,
760    ) -> (ContextDelta, usize) {
761        use std::borrow::Cow;
762        use zeph_llm::provider::{Message, MessageMetadata, Role};
763        use zeph_sanitizer::{ContentSource, ContentSourceKind, MemorySourceHint};
764
765        // Store top-1 recall score for MAR routing signal.
766        *view.last_recall_confidence = prepared.recall_confidence;
767
768        // MemoryFirst: drain conversation history BEFORE inserting memory messages.
769        if prepared.memory_first {
770            let history_start = 1usize;
771            let len = window.messages.len();
772            let keep_tail =
773                zeph_context::assembler::memory_first_keep_tail(window.messages, history_start);
774            if len > history_start + keep_tail {
775                window.messages.drain(history_start..len - keep_tail);
776                recompute_prompt_tokens(window);
777                tracing::debug!(
778                    strategy = "memory_first",
779                    keep_tail,
780                    "dropped conversation history, kept last {keep_tail} messages"
781                );
782            }
783        }
784
785        // Tracks how many memory messages were freshly inserted at positions 1..1+inserted_count
786        // so the fidelity scorer can exempt them (INV-10). Incremented at every insertion path.
787        let mut inserted_count: usize = 0;
788
789        // Insert memory messages at position 1 (all sanitized before insertion — CRIT-02).
790        if let Some(msg) = prepared.graph_facts.filter(|_| window.messages.len() > 1) {
791            let sanitized = self
792                .sanitize_memory_message(msg, MemorySourceHint::ExternalContent, view)
793                .await;
794            window.messages.insert(1, sanitized);
795            inserted_count += 1;
796            tracing::debug!("injected knowledge graph facts into context");
797        }
798        if let Some(msg) = prepared.doc_rag.filter(|_| window.messages.len() > 1) {
799            let sanitized = self
800                .sanitize_memory_message(msg, MemorySourceHint::ExternalContent, view)
801                .await;
802            window.messages.insert(1, sanitized);
803            inserted_count += 1;
804            tracing::debug!("injected document RAG context");
805        }
806        if let Some(msg) = prepared.corrections.filter(|_| window.messages.len() > 1) {
807            let sanitized = self
808                .sanitize_memory_message(msg, MemorySourceHint::ConversationHistory, view)
809                .await;
810            window.messages.insert(1, sanitized);
811            inserted_count += 1;
812            tracing::debug!("injected past corrections into context");
813        }
814        if let Some(msg) = prepared.recall.filter(|_| window.messages.len() > 1) {
815            let sanitized = self
816                .sanitize_memory_message(msg, MemorySourceHint::ConversationHistory, view)
817                .await;
818            window.messages.insert(1, sanitized);
819            inserted_count += 1;
820        }
821        if let Some(msg) = prepared.cross_session.filter(|_| window.messages.len() > 1) {
822            let sanitized = self
823                .sanitize_memory_message(msg, MemorySourceHint::LlmSummary, view)
824                .await;
825            window.messages.insert(1, sanitized);
826            inserted_count += 1;
827        }
828        if let Some(msg) = prepared.summaries.filter(|_| window.messages.len() > 1) {
829            let sanitized = self
830                .sanitize_memory_message(msg, MemorySourceHint::LlmSummary, view)
831                .await;
832            window.messages.insert(1, sanitized);
833            inserted_count += 1;
834            tracing::debug!("injected summaries into context");
835        }
836        if let Some(msg) = prepared.persona_facts.filter(|_| window.messages.len() > 1) {
837            let sanitized = self
838                .sanitize_memory_message(msg, MemorySourceHint::ExternalContent, view)
839                .await;
840            window.messages.insert(1, sanitized);
841            inserted_count += 1;
842            tracing::debug!("injected persona facts into context");
843        }
844        if let Some(msg) = prepared
845            .trajectory_hints
846            .filter(|_| window.messages.len() > 1)
847        {
848            let sanitized = self
849                .sanitize_memory_message(msg, MemorySourceHint::ExternalContent, view)
850                .await;
851            window.messages.insert(1, sanitized);
852            inserted_count += 1;
853            tracing::debug!("injected trajectory hints into context");
854        }
855        if let Some(msg) = prepared.tree_memory.filter(|_| window.messages.len() > 1) {
856            let sanitized = self
857                .sanitize_memory_message(msg, MemorySourceHint::ExternalContent, view)
858                .await;
859            window.messages.insert(1, sanitized);
860            inserted_count += 1;
861            tracing::debug!("injected tree memory summary into context");
862        }
863        if let Some(msg) = prepared
864            .reasoning_hints
865            .filter(|_| window.messages.len() > 1)
866        {
867            let sanitized = self
868                .sanitize_memory_message(msg, MemorySourceHint::ExternalContent, view)
869                .await;
870            window.messages.insert(1, sanitized);
871            inserted_count += 1;
872            tracing::debug!("injected reasoning strategies into context");
873        }
874
875        // Code context: sanitize inline, return body to caller via ContextDelta.
876        let code_context = if let Some(text) = prepared.code_context {
877            let sanitized = view
878                .sanitizer
879                .sanitize(&text, ContentSource::new(ContentSourceKind::ToolResult));
880            view.metrics.sanitizer_runs += 1;
881            if !sanitized.injection_flags.is_empty() {
882                tracing::warn!(
883                    flags = sanitized.injection_flags.len(),
884                    "injection patterns detected in code RAG context"
885                );
886                view.metrics.sanitizer_injection_flags += sanitized.injection_flags.len() as u64;
887                let detail = sanitized
888                    .injection_flags
889                    .first()
890                    .map_or_else(String::new, |f| {
891                        format!("Detected pattern: {}", f.pattern_name)
892                    });
893                view.security_events.push(
894                    zeph_common::SecurityEventCategory::InjectionFlag,
895                    "code_rag",
896                    detail,
897                );
898            }
899            if sanitized.was_truncated {
900                view.metrics.sanitizer_truncations += 1;
901                view.security_events.push(
902                    zeph_common::SecurityEventCategory::Truncation,
903                    "code_rag",
904                    "Content truncated to max_content_size".to_string(),
905                );
906            }
907            Some(sanitized.body)
908        } else {
909            None
910        };
911
912        if !prepared.memory_first {
913            self.trim_messages_to_budget(window, prepared.recent_history_budget);
914        }
915
916        // Session digest injected AFTER all other memory inserts (closest to system prompt).
917        if view.digest_enabled
918            && let Some((digest_text, _)) = view
919                .cached_session_digest
920                .clone()
921                .filter(|_| window.messages.len() > 1)
922        {
923            let digest_msg = Message {
924                role: Role::User,
925                content: format!("{}{digest_text}", crate::helpers::SESSION_DIGEST_PREFIX),
926                parts: vec![],
927                metadata: MessageMetadata::default(),
928            };
929            let sanitized = self
930                .sanitize_memory_message(digest_msg, MemorySourceHint::LlmSummary, view)
931                .await;
932            window.messages.insert(1, sanitized);
933            inserted_count += 1;
934            tracing::debug!("injected session digest into context");
935        }
936
937        // Credential scrubbing pass.
938        if view.redact_credentials {
939            for msg in &mut *window.messages {
940                if msg.role == Role::System {
941                    continue;
942                }
943                if let Cow::Owned(s) = (view.scrub)(&msg.content) {
944                    msg.content = s;
945                }
946            }
947        }
948
949        recompute_prompt_tokens(window);
950
951        (ContextDelta { code_context }, inserted_count)
952    }
953
954    /// Sanitize a memory retrieval message before inserting it into the context window.
955    ///
956    /// This is the sole sanitization point for the six memory retrieval paths (`doc_rag`,
957    /// corrections, recall, `cross_session`, summaries, `graph_facts`). The `hint` parameter
958    /// modulates injection-detection sensitivity — `ConversationHistory` and `LlmSummary`
959    /// skip detection to suppress false positives; `ExternalContent` enables full detection.
960    ///
961    /// Truncation, control-char stripping, delimiter escaping, and spotlighting are active
962    /// for all hints (defense-in-depth invariant).
963    async fn sanitize_memory_message(
964        &self,
965        mut msg: zeph_llm::provider::Message,
966        hint: zeph_sanitizer::MemorySourceHint,
967        view: &mut ContextAssemblyView<'_>,
968    ) -> zeph_llm::provider::Message {
969        use zeph_sanitizer::{ContentSource, ContentSourceKind};
970
971        let source = ContentSource::new(ContentSourceKind::MemoryRetrieval).with_memory_hint(hint);
972        let sanitized = view.sanitizer.sanitize(&msg.content, source);
973        view.metrics.sanitizer_runs += 1;
974        if !sanitized.injection_flags.is_empty() {
975            tracing::warn!(
976                flags = sanitized.injection_flags.len(),
977                "injection patterns detected in memory retrieval"
978            );
979            view.metrics.sanitizer_injection_flags += sanitized.injection_flags.len() as u64;
980            let detail = sanitized
981                .injection_flags
982                .first()
983                .map_or_else(String::new, |f| {
984                    format!("Detected pattern: {}", f.pattern_name)
985                });
986            view.security_events.push(
987                zeph_common::SecurityEventCategory::InjectionFlag,
988                "memory_retrieval",
989                detail,
990            );
991        }
992        if sanitized.was_truncated {
993            view.metrics.sanitizer_truncations += 1;
994            view.security_events.push(
995                zeph_common::SecurityEventCategory::Truncation,
996                "memory_retrieval",
997                "Content truncated to max_content_size".to_string(),
998            );
999        }
1000
1001        // Quarantine step: route high-risk sources through an isolated LLM (defense-in-depth).
1002        if view.sanitizer.is_enabled()
1003            && let Some(qs) = view.quarantine_summarizer
1004            && qs.should_quarantine(ContentSourceKind::MemoryRetrieval)
1005        {
1006            match qs.extract_facts(&sanitized, view.sanitizer).await {
1007                Ok((facts, flags)) => {
1008                    view.metrics.quarantine_invocations += 1;
1009                    view.security_events.push(
1010                        zeph_common::SecurityEventCategory::Quarantine,
1011                        "memory_retrieval",
1012                        "Content quarantined, facts extracted".to_string(),
1013                    );
1014                    let escaped = zeph_sanitizer::ContentSanitizer::escape_delimiter_tags(&facts);
1015                    msg.content = zeph_sanitizer::ContentSanitizer::apply_spotlight(
1016                        &escaped,
1017                        &sanitized.source,
1018                        &flags,
1019                    );
1020                    return msg;
1021                }
1022                Err(e) => {
1023                    tracing::warn!(
1024                        error = %e,
1025                        "quarantine failed for memory retrieval, using original sanitized content"
1026                    );
1027                    view.metrics.quarantine_failures += 1;
1028                    view.security_events.push(
1029                        zeph_common::SecurityEventCategory::Quarantine,
1030                        "memory_retrieval",
1031                        format!("Quarantine failed: {e}"),
1032                    );
1033                }
1034            }
1035        }
1036
1037        msg.content = sanitized.body;
1038        msg
1039    }
1040
1041    /// Reset the conversation history.
1042    ///
1043    /// Clears all messages except the system prompt and resets the cached token count.
1044    /// The caller (`Agent<C>`) is responsible for resetting compaction state, orchestration,
1045    /// focus, and sidequest state — those fields are outside the context-service scope.
1046    ///
1047    /// # Errors
1048    ///
1049    /// Returns [`ContextError::Memory`] if creating a new conversation in `SQLite` fails.
1050    pub fn reset_conversation(
1051        &self,
1052        window: &mut MessageWindowView<'_>,
1053        _view: &mut ContextAssemblyView<'_>,
1054    ) -> Result<(), ContextError> {
1055        self.clear_history(window);
1056        Ok(())
1057    }
1058
1059    /// Run tiered compaction if the token budget is exhausted.
1060    ///
1061    /// Dispatches to the appropriate compaction tier based on the current
1062    /// context manager state:
1063    ///
1064    /// - **None** — context is within budget; no-op.
1065    /// - **Soft** — apply deferred summaries + prune tool outputs (no LLM).
1066    /// - **Hard** — Soft steps first, then LLM full summarization if pruning is insufficient.
1067    ///
1068    /// Increments the `turns_since_last_hard_compaction` counter unconditionally so pressure
1069    /// is tracked regardless of whether compaction fires. Respects the cooldown guard: when
1070    /// cooling, Hard-tier LLM summarization is skipped.
1071    ///
1072    /// # Errors
1073    ///
1074    /// Returns [`ContextError::Memory`] if `SQLite` persistence fails during Hard compaction.
1075    #[allow(
1076        clippy::cast_precision_loss,
1077        clippy::cast_possible_truncation,
1078        clippy::cast_sign_loss,
1079        clippy::too_many_lines
1080    )]
1081    #[tracing::instrument(name = "agent_context.service.maybe_compact", skip_all, err)]
1082    pub async fn maybe_compact(
1083        &self,
1084        summ: &mut ContextSummarizationView<'_>,
1085        status: &(impl StatusSink + ?Sized),
1086    ) -> Result<(), ContextError> {
1087        use zeph_context::manager::{CompactionState, CompactionTier};
1088
1089        // Increment turn counter unconditionally (tracks pressure regardless of guards).
1090        if let Some(count) = summ.context_manager.turns_since_last_hard_compaction_mut() {
1091            *count += 1;
1092        }
1093
1094        // Guard: exhaustion — warn once, then no-op permanently.
1095        if let CompactionState::Exhausted { warned } = summ.context_manager.compaction_state()
1096            && !warned
1097        {
1098            summ.context_manager
1099                .set_compaction_state(CompactionState::Exhausted { warned: true });
1100            tracing::warn!("compaction exhausted: context budget too tight for this session");
1101        }
1102        if summ.context_manager.compaction_state().is_exhausted() {
1103            return Ok(());
1104        }
1105
1106        // Guard: server compaction active — skip unless above 95% budget (safety fallback).
1107        if summ.server_compaction_active {
1108            let budget = summ
1109                .context_manager
1110                .budget
1111                .as_ref()
1112                .map_or(0, ContextBudget::max_tokens);
1113            if budget > 0 {
1114                let fallback = (budget * 95 / 100) as u64;
1115                if *summ.cached_prompt_tokens < fallback {
1116                    return Ok(());
1117                }
1118                tracing::warn!(
1119                    "server compaction active but context at 95%+ — falling back to client-side"
1120                );
1121            } else {
1122                return Ok(());
1123            }
1124        }
1125
1126        // Guard: already compacted this turn.
1127        if summ
1128            .context_manager
1129            .compaction_state()
1130            .is_compacted_this_turn()
1131        {
1132            return Ok(());
1133        }
1134
1135        // Decrement cooldown counter; record whether we are in cooldown.
1136        let in_cooldown = summ.context_manager.compaction_state().cooldown_remaining() > 0;
1137        if in_cooldown
1138            && let CompactionState::Cooling { turns_remaining } =
1139                summ.context_manager.compaction_state()
1140        {
1141            let next = turns_remaining - 1;
1142            summ.context_manager.set_compaction_state(if next == 0 {
1143                CompactionState::Ready
1144            } else {
1145                CompactionState::Cooling {
1146                    turns_remaining: next,
1147                }
1148            });
1149        }
1150
1151        // T-07: AgeMem proactive regrade — fires before tier dispatch (INV-06, INV-11).
1152        // Skip when MemoryFirst is active; ContextSummarizationView does not carry
1153        // context_strategy, so we check the budget ratio directly via should_proactively_regrade.
1154        if let Some(ref fidelity_cfg) = summ.fidelity_config.clone()
1155            && fidelity_cfg.enabled
1156            && summ.context_manager.should_proactively_regrade(
1157                *summ.cached_prompt_tokens,
1158                fidelity_cfg.regrade_threshold,
1159                summ.server_compaction_active,
1160            )
1161        {
1162            use tracing::Instrument as _;
1163            let regrade_embed_provider = summ
1164                .fidelity_semantic_provider
1165                .as_deref()
1166                .map(|p| p as &dyn zeph_llm::LlmProviderDyn);
1167            let regrade_compress_provider = summ
1168                .fidelity_compress_provider
1169                .as_deref()
1170                .map(|p| p as &dyn zeph_llm::LlmProviderDyn);
1171            FidelityScorer
1172                .score_and_apply(
1173                    summ.messages,
1174                    &summ.current_query,
1175                    &[],
1176                    fidelity_cfg,
1177                    &*summ.token_counter,
1178                    0,
1179                    true, // proactive regrade: allow upgrading past the persisted floor
1180                    regrade_embed_provider,
1181                    regrade_compress_provider,
1182                )
1183                .instrument(tracing::info_span!(
1184                    "context.fidelity.regrade",
1185                    budget_ratio = tracing::field::Empty,
1186                ))
1187                .await;
1188            // Persist upgraded fidelity tags so the new levels survive the next turn (F-3).
1189            persist_fidelity_tags(summ.messages, summ.memory.as_deref()).await;
1190            recompute_prompt_tokens_summ(summ);
1191            summ.context_manager.set_regraded_this_turn(true);
1192            tracing::debug!(
1193                cached_tokens = *summ.cached_prompt_tokens,
1194                "AgeMem proactive regrade complete"
1195            );
1196        }
1197
1198        match summ
1199            .context_manager
1200            .compaction_tier(*summ.cached_prompt_tokens)
1201        {
1202            CompactionTier::Soft => {
1203                self.do_soft_compaction(summ, status).await;
1204                Ok(())
1205            }
1206            CompactionTier::Hard => self.do_hard_compaction(summ, status, in_cooldown).await,
1207            _ => Ok(()),
1208        }
1209    }
1210
1211    /// Execute the Soft compaction tier: apply deferred summaries and prune tool outputs.
1212    ///
1213    /// Does not trigger an LLM call. Does not set `compacted_this_turn` so Hard tier
1214    /// may still fire in the same turn if context remains above the hard threshold.
1215    #[allow(
1216        clippy::cast_precision_loss,
1217        clippy::cast_possible_truncation,
1218        clippy::cast_sign_loss
1219    )]
1220    async fn do_soft_compaction(
1221        &self,
1222        summ: &mut ContextSummarizationView<'_>,
1223        status: &(impl StatusSink + ?Sized),
1224    ) {
1225        status.send_status("soft compacting context...").await;
1226
1227        // Step 0: refresh task goal / subgoal for scored pruning.
1228        match &summ.context_manager.compression.pruning_strategy {
1229            zeph_config::PruningStrategy::Subgoal | zeph_config::PruningStrategy::SubgoalMig => {
1230                crate::summarization::scheduling::maybe_refresh_subgoal(summ);
1231            }
1232            _ => crate::summarization::scheduling::maybe_refresh_task_goal(summ),
1233        }
1234
1235        // Step 1: apply deferred summaries (free tokens without LLM).
1236        let applied = crate::summarization::deferred::apply_deferred_summaries(summ);
1237
1238        // Step 1b: rebuild subgoal index if deferred summaries were applied (S5 fix).
1239        if applied > 0
1240            && summ
1241                .context_manager
1242                .compression
1243                .pruning_strategy
1244                .is_subgoal()
1245        {
1246            summ.subgoal_registry
1247                .rebuild_after_compaction(summ.messages, 0);
1248        }
1249
1250        // Step 2: prune tool outputs down to soft threshold.
1251        let budget = summ
1252            .context_manager
1253            .budget
1254            .as_ref()
1255            .map_or(0, ContextBudget::max_tokens);
1256        let soft_threshold =
1257            (budget as f32 * summ.context_manager.soft_compaction_threshold) as usize;
1258        let cached = usize::try_from(*summ.cached_prompt_tokens).unwrap_or(usize::MAX);
1259        let min_to_free = cached.saturating_sub(soft_threshold);
1260        if min_to_free > 0 {
1261            crate::summarization::pruning::prune_tool_outputs(summ, min_to_free);
1262        }
1263
1264        status.send_status("").await;
1265        tracing::info!(
1266            cached_tokens = *summ.cached_prompt_tokens,
1267            soft_threshold,
1268            "soft compaction complete"
1269        );
1270    }
1271
1272    /// Execute the Hard compaction tier: soft pass first, then LLM summarization if needed.
1273    #[allow(
1274        clippy::cast_precision_loss,
1275        clippy::cast_possible_truncation,
1276        clippy::cast_sign_loss
1277    )]
1278    async fn do_hard_compaction(
1279        &self,
1280        summ: &mut ContextSummarizationView<'_>,
1281        status: &(impl StatusSink + ?Sized),
1282        in_cooldown: bool,
1283    ) -> Result<(), ContextError> {
1284        use zeph_context::manager::CompactionState;
1285
1286        // Track hard compaction event for pressure metrics.
1287        let turns_since_last = summ
1288            .context_manager
1289            .turns_since_last_hard_compaction()
1290            .map(|t| u32::try_from(t).unwrap_or(u32::MAX));
1291        summ.context_manager
1292            .set_turns_since_last_hard_compaction(Some(0));
1293        if let Some(metrics) = summ.metrics {
1294            metrics.record_hard_compaction(turns_since_last);
1295        }
1296
1297        if in_cooldown {
1298            tracing::debug!(
1299                turns_remaining = summ.context_manager.compaction_state().cooldown_remaining(),
1300                "hard compaction skipped: cooldown active"
1301            );
1302            return Ok(());
1303        }
1304
1305        let budget = summ
1306            .context_manager
1307            .budget
1308            .as_ref()
1309            .map_or(0, ContextBudget::max_tokens);
1310        let hard_threshold =
1311            (budget as f32 * summ.context_manager.hard_compaction_threshold) as usize;
1312        let cached = usize::try_from(*summ.cached_prompt_tokens).unwrap_or(usize::MAX);
1313        let min_to_free = cached.saturating_sub(hard_threshold);
1314
1315        status.send_status("compacting context...").await;
1316
1317        // Step 1: apply deferred summaries.
1318        crate::summarization::deferred::apply_deferred_summaries(summ);
1319
1320        // Step 2: attempt pruning-only.
1321        let freed = crate::summarization::pruning::prune_tool_outputs(summ, min_to_free);
1322        if freed >= min_to_free {
1323            tracing::info!(freed, "hard compaction: pruning sufficient");
1324            summ.context_manager
1325                .set_compaction_state(CompactionState::CompactedThisTurn {
1326                    cooldown: summ.context_manager.compaction_cooldown_turns(),
1327                });
1328            if let Err(e) = crate::summarization::deferred::flush_deferred_summaries(summ).await {
1329                tracing::warn!(%e, "flush_deferred_summaries failed after hard compaction");
1330            }
1331            status.send_status("").await;
1332            return Ok(());
1333        }
1334
1335        // Step 3: Guard — too few messages to compact.
1336        let preserve_tail = summ.context_manager.compaction_preserve_tail;
1337        let compactable = summ.messages.len().saturating_sub(preserve_tail + 1);
1338        if compactable <= 1 {
1339            tracing::warn!(
1340                compactable,
1341                "hard compaction: too few messages, marking exhausted"
1342            );
1343            summ.context_manager
1344                .set_compaction_state(CompactionState::Exhausted { warned: false });
1345            status.send_status("").await;
1346            return Ok(());
1347        }
1348
1349        // Step 4: LLM summarization.
1350        tracing::info!(
1351            min_to_free,
1352            "hard compaction: falling back to LLM summarization"
1353        );
1354        let tokens_before = *summ.cached_prompt_tokens;
1355        let outcome = crate::summarization::compaction::compact_context(summ, None).await?;
1356
1357        let freed_tokens = tokens_before.saturating_sub(*summ.cached_prompt_tokens);
1358
1359        if !outcome.is_compacted() || freed_tokens == 0 {
1360            tracing::warn!("hard compaction: no net reduction, marking exhausted");
1361            summ.context_manager
1362                .set_compaction_state(CompactionState::Exhausted { warned: false });
1363            status.send_status("").await;
1364            return Ok(());
1365        }
1366
1367        if matches!(
1368            summ.context_manager
1369                .compaction_tier(*summ.cached_prompt_tokens),
1370            zeph_context::manager::CompactionTier::Hard
1371        ) {
1372            tracing::warn!(
1373                freed_tokens,
1374                "hard compaction: still above hard threshold after compaction, marking exhausted"
1375            );
1376            summ.context_manager
1377                .set_compaction_state(CompactionState::Exhausted { warned: false });
1378            status.send_status("").await;
1379            return Ok(());
1380        }
1381
1382        summ.context_manager
1383            .set_compaction_state(CompactionState::CompactedThisTurn {
1384                cooldown: summ.context_manager.compaction_cooldown_turns(),
1385            });
1386
1387        if tokens_before > *summ.cached_prompt_tokens {
1388            tracing::info!(
1389                tokens_before,
1390                tokens_after = *summ.cached_prompt_tokens,
1391                saved = freed_tokens,
1392                "context compaction complete"
1393            );
1394        }
1395
1396        status.send_status("").await;
1397        Ok(())
1398    }
1399
1400    /// Summarize the most recent tool-use/result pair if it exceeds the cutoff.
1401    ///
1402    /// Drains the backlog of unsummarized tool-use/result pairs in a single pass,
1403    /// storing results as `deferred_summary` on message metadata. Applied lazily
1404    /// by [`Self::maybe_apply_deferred_summaries`] when context pressure rises.
1405    #[tracing::instrument(name = "agent_context.service.maybe_summarize_tool_pair", skip_all)]
1406    pub async fn maybe_summarize_tool_pair(
1407        &self,
1408        summ: &mut ContextSummarizationView<'_>,
1409        providers: &ProviderHandles,
1410    ) {
1411        crate::summarization::deferred::maybe_summarize_tool_pair(
1412            summ,
1413            providers,
1414            &TxStatusSink(summ.status_tx.clone()),
1415        )
1416        .await;
1417    }
1418
1419    /// Apply any deferred tool-pair summaries to the message window.
1420    ///
1421    /// Processes all pending deferred summaries in reverse order so insertions do not
1422    /// invalidate lower indices. Returns the number of summaries applied.
1423    #[must_use]
1424    pub fn apply_deferred_summaries(&self, summ: &mut ContextSummarizationView<'_>) -> usize {
1425        crate::summarization::deferred::apply_deferred_summaries(summ)
1426    }
1427
1428    /// Flush all deferred summary IDs to the database.
1429    ///
1430    /// Calls `apply_tool_pair_summaries` to soft-delete the original tool pairs and
1431    /// persist the summaries. Always clears both deferred queues regardless of outcome.
1432    #[tracing::instrument(name = "agent_context.service.flush_deferred_summaries", skip_all)]
1433    pub async fn flush_deferred_summaries(&self, summ: &mut ContextSummarizationView<'_>) {
1434        if let Err(e) = crate::summarization::deferred::flush_deferred_summaries(summ).await {
1435            tracing::warn!(%e, "flush_deferred_summaries failed");
1436        }
1437    }
1438
1439    /// Apply deferred summaries if context usage exceeds the soft compaction threshold.
1440    ///
1441    /// Two triggers: token pressure (above the soft threshold) and count pressure (pending
1442    /// summaries >= `tool_call_cutoff`). This is Tier 0 — no LLM call. Does NOT set
1443    /// `compacted_this_turn` so proactive/reactive compaction may still fire.
1444    pub fn maybe_apply_deferred_summaries(&self, summ: &mut ContextSummarizationView<'_>) {
1445        crate::summarization::deferred::maybe_apply_deferred_summaries(summ);
1446    }
1447
1448    /// Run unconditional LLM-based context compaction with an optional token budget.
1449    ///
1450    /// Bypasses tier and cooldown checks — always drains the oldest messages and inserts
1451    /// a compact summary. Use this in tests or when the caller has already determined that
1452    /// compaction is warranted. Production code should prefer [`Self::maybe_compact`].
1453    ///
1454    /// Invokes the optional callbacks wired into `summ` in this order:
1455    /// archive → LLM summarization → probe → finalize → persistence.
1456    ///
1457    /// Returns [`crate::state::CompactionOutcome::NoChange`] when there is nothing to compact.
1458    ///
1459    /// # Errors
1460    ///
1461    /// Returns [`ContextError`] if summarization fails (LLM error or timeout).
1462    #[tracing::instrument(name = "agent_context.service.compact_context", skip_all, err)]
1463    pub async fn compact_context(
1464        &self,
1465        summ: &mut ContextSummarizationView<'_>,
1466        max_summary_tokens: Option<usize>,
1467    ) -> Result<crate::state::CompactionOutcome, crate::error::ContextError> {
1468        crate::summarization::compaction::compact_context(summ, max_summary_tokens).await
1469    }
1470
1471    /// Apply a soft compaction pass mid-iteration if required.
1472    ///
1473    /// Applies deferred summaries and prunes tool outputs down to the soft threshold.
1474    /// Never triggers a Hard tier LLM call. Returns immediately if `compacted_this_turn`
1475    /// is set or context is below the soft threshold.
1476    pub fn maybe_soft_compact_mid_iteration(&self, summ: &mut ContextSummarizationView<'_>) {
1477        crate::summarization::scheduling::maybe_soft_compact_mid_iteration(summ);
1478    }
1479
1480    /// Run proactive compression if token usage crosses the configured threshold.
1481    ///
1482    /// Uses the `compact_context_with_budget` path (LLM summarization with an optional
1483    /// token cap). Skips when server compaction is active unless context exceeds 95% of
1484    /// the budget. Does not impose a post-compaction cooldown.
1485    #[tracing::instrument(name = "agent_context.service.maybe_proactive_compress", skip_all)]
1486    pub async fn maybe_proactive_compress(
1487        &self,
1488        summ: &mut ContextSummarizationView<'_>,
1489        status: &(impl StatusSink + ?Sized),
1490    ) {
1491        let Some((_threshold, max_summary_tokens)) = summ
1492            .context_manager
1493            .should_proactively_compress(*summ.cached_prompt_tokens)
1494        else {
1495            return;
1496        };
1497
1498        if summ.server_compaction_active {
1499            let budget = summ
1500                .context_manager
1501                .budget
1502                .as_ref()
1503                .map_or(0, ContextBudget::max_tokens);
1504            if budget > 0 {
1505                let fallback = (budget * 95 / 100) as u64;
1506                if *summ.cached_prompt_tokens <= fallback {
1507                    return;
1508                }
1509                tracing::warn!(
1510                    cached_prompt_tokens = *summ.cached_prompt_tokens,
1511                    fallback_threshold = fallback,
1512                    "server compaction active but context at 95%+ — falling back to proactive"
1513                );
1514            } else {
1515                return;
1516            }
1517        }
1518
1519        status.send_status("compressing context...").await;
1520        tracing::info!(
1521            max_summary_tokens,
1522            cached_tokens = *summ.cached_prompt_tokens,
1523            "proactive compression triggered"
1524        );
1525
1526        match crate::summarization::compaction::compact_context(summ, Some(max_summary_tokens))
1527            .await
1528        {
1529            Ok(outcome) if outcome.is_compacted() => {
1530                summ.context_manager.set_compaction_state(
1531                    zeph_context::manager::CompactionState::CompactedThisTurn { cooldown: 0 },
1532                );
1533                tracing::info!("proactive compression complete");
1534            }
1535            Ok(_) => {}
1536            Err(e) => tracing::warn!(%e, "proactive compression failed"),
1537        }
1538
1539        status.send_status("").await;
1540    }
1541
1542    /// Refresh the task goal when the last user message has changed.
1543    ///
1544    /// Two-phase non-blocking: applies any completed background result from the previous
1545    /// turn, then schedules a new extraction if the user message hash has changed.
1546    /// Only active for `TaskAware` and `Mig` pruning strategies.
1547    pub fn maybe_refresh_task_goal(&self, summ: &mut ContextSummarizationView<'_>) {
1548        crate::summarization::scheduling::maybe_refresh_task_goal(summ);
1549    }
1550
1551    /// Refresh the subgoal registry when the last user message has changed.
1552    ///
1553    /// Mirrors the two-phase `maybe_refresh_task_goal` pattern.
1554    /// Only active for `Subgoal` and `SubgoalMig` pruning strategies.
1555    pub fn maybe_refresh_subgoal(&self, summ: &mut ContextSummarizationView<'_>) {
1556        crate::summarization::scheduling::maybe_refresh_subgoal(summ);
1557    }
1558}
1559
1560// ── StatusSink adapters ───────────────────────────────────────────────────────
1561
1562/// `StatusSink` adapter over an optional `UnboundedSender<String>`.
1563///
1564/// Sends status strings when the sender is present; silently drops them otherwise.
1565struct TxStatusSink(Option<tokio::sync::mpsc::UnboundedSender<String>>);
1566
1567impl StatusSink for TxStatusSink {
1568    fn send_status(&self, msg: &str) -> impl std::future::Future<Output = ()> + Send + '_ {
1569        if let Some(ref tx) = self.0 {
1570            let _ = tx.send(msg.to_owned());
1571        }
1572        std::future::ready(())
1573    }
1574}
1575
1576// ── Free functions (helpers shared across service methods) ────────────────────
1577
1578/// Recompute `cached_prompt_tokens` from the current message list.
1579///
1580/// Called after every mutation that changes the message count or content, so the
1581/// provider call path always sees an accurate token count.
1582pub(crate) fn recompute_prompt_tokens(window: &mut MessageWindowView<'_>) {
1583    *window.cached_prompt_tokens = window
1584        .messages
1585        .iter()
1586        .map(|m| window.token_counter.count_message_tokens(m) as u64)
1587        .sum();
1588}
1589
1590/// Persist fidelity tags for all scored messages to `SQLite`.
1591///
1592/// Collects `(db_id, tag as u8)` pairs for messages that have both a `db_id` and a
1593/// non-None `fidelity_tag`, then calls [`SqliteStore::update_fidelity_tags`] inline.
1594/// The await is cheap — `SQLite` UPDATE is a sub-millisecond local I/O operation.
1595///
1596/// A warn-level log is emitted on failure; the next turn will recompute from scratch,
1597/// which is safe (the floor invariant simply won't apply until persistence succeeds).
1598async fn persist_fidelity_tags(
1599    messages: &[zeph_llm::provider::Message],
1600    memory: Option<&zeph_memory::semantic::SemanticMemory>,
1601) {
1602    let Some(mem) = memory else { return };
1603    let updates: Vec<(zeph_memory::MessageId, u8)> = messages
1604        .iter()
1605        .filter_map(|m| {
1606            let db_id = m.metadata.db_id?;
1607            let tag = m.metadata.fidelity_tag?;
1608            Some((zeph_memory::MessageId(db_id), tag as u8))
1609        })
1610        .collect();
1611    if updates.is_empty() {
1612        return;
1613    }
1614    if let Err(e) = mem.sqlite().update_fidelity_tags(&updates).await {
1615        tracing::warn!(
1616            count = updates.len(),
1617            error = %e,
1618            "failed to persist fidelity tags; floor invariant will not apply next turn"
1619        );
1620    }
1621}
1622
1623/// Recompute `cached_prompt_tokens` for a [`ContextSummarizationView`].
1624///
1625/// Used after the `AgeMem` proactive regrade modifies the message window in `maybe_compact`.
1626fn recompute_prompt_tokens_summ(summ: &mut crate::state::ContextSummarizationView<'_>) {
1627    *summ.cached_prompt_tokens = summ
1628        .messages
1629        .iter()
1630        .map(|m| summ.token_counter.count_message_tokens(m) as u64)
1631        .sum();
1632}
1633
1634/// Remove all system/user messages whose `content` starts with `prefix` and whose
1635/// role matches `role`.
1636///
1637/// Operates on the raw `messages` slice to allow callers that don't hold a full
1638/// `MessageWindowView` to use this helper (e.g., from `zeph-core` shims).
1639pub(crate) fn remove_by_prefix(
1640    messages: &mut Vec<zeph_llm::provider::Message>,
1641    role: Role,
1642    prefix: &str,
1643) {
1644    messages.retain(|m| m.role != role || !m.content.starts_with(prefix));
1645}
1646
1647/// Remove messages that match either a typed `MessagePart` or a content prefix.
1648///
1649/// For `Role::System` messages: typed-part matching takes priority — a message is removed
1650/// if its **first** part satisfies `part_matches`. As a fallback, messages that start with
1651/// `prefix` are also removed.
1652/// For `Role::User` messages: removed if their content starts with `prefix` (tiered-recall
1653/// cleanup).
1654/// All other roles are always retained.
1655pub(crate) fn remove_by_part_or_prefix(
1656    messages: &mut Vec<zeph_llm::provider::Message>,
1657    prefix: &str,
1658    part_matches: impl Fn(&MessagePart) -> bool,
1659) {
1660    messages.retain(|m| {
1661        // Role::User recall messages are produced by the tiered-retrieval path in
1662        // inject_semantic_recall. They must be cleaned up the same way as Role::System ones.
1663        if m.role == Role::User {
1664            return !m.content.starts_with(prefix);
1665        }
1666        if m.role != Role::System {
1667            return true;
1668        }
1669        if m.parts.first().is_some_and(&part_matches) {
1670            return false;
1671        }
1672        !m.content.starts_with(prefix)
1673    });
1674}
1675
1676#[cfg(test)]
1677mod tests {
1678    use std::collections::HashSet;
1679    use std::sync::Arc;
1680
1681    use zeph_llm::provider::{Message, MessagePart, Role};
1682    use zeph_memory::TokenCounter;
1683
1684    use super::*;
1685    use crate::helpers::{GRAPH_FACTS_PREFIX, RECALL_PREFIX, SUMMARY_PREFIX};
1686    use crate::state::MessageWindowView;
1687
1688    fn make_counter() -> Arc<TokenCounter> {
1689        Arc::new(TokenCounter::default())
1690    }
1691
1692    fn make_window<'a>(
1693        messages: &'a mut Vec<Message>,
1694        cached: &'a mut u64,
1695        completed: &'a mut HashSet<String>,
1696    ) -> MessageWindowView<'a> {
1697        let last = Box::leak(Box::new(None::<i64>));
1698        let deferred_hide = Box::leak(Box::new(Vec::<i64>::new()));
1699        let deferred_summ = Box::leak(Box::new(Vec::<String>::new()));
1700        MessageWindowView {
1701            messages,
1702            last_persisted_message_id: last,
1703            deferred_db_hide_ids: deferred_hide,
1704            deferred_db_summaries: deferred_summ,
1705            cached_prompt_tokens: cached,
1706            token_counter: make_counter(),
1707            completed_tool_ids: completed,
1708        }
1709    }
1710
1711    fn sys(text: &str) -> Message {
1712        Message::from_legacy(Role::System, text)
1713    }
1714
1715    fn user(text: &str) -> Message {
1716        Message::from_legacy(Role::User, text)
1717    }
1718
1719    fn assistant(text: &str) -> Message {
1720        Message::from_legacy(Role::Assistant, text)
1721    }
1722
1723    #[test]
1724    fn clear_history_keeps_system_prompt() {
1725        let mut msgs = vec![sys("system"), user("hello"), assistant("hi")];
1726        let mut cached = 0u64;
1727        let mut completed = HashSet::new();
1728        completed.insert("tool_1".to_owned());
1729        let mut window = make_window(&mut msgs, &mut cached, &mut completed);
1730
1731        ContextService::new().clear_history(&mut window);
1732
1733        assert_eq!(window.messages.len(), 1);
1734        assert_eq!(window.messages[0].content, "system");
1735        assert!(
1736            window.completed_tool_ids.is_empty(),
1737            "completed_tool_ids must be cleared"
1738        );
1739    }
1740
1741    #[test]
1742    fn clear_history_empty_messages_is_noop() {
1743        let mut msgs: Vec<Message> = vec![];
1744        let mut cached = 0u64;
1745        let mut completed = HashSet::new();
1746        let mut window = make_window(&mut msgs, &mut cached, &mut completed);
1747
1748        ContextService::new().clear_history(&mut window);
1749
1750        assert!(window.messages.is_empty());
1751    }
1752
1753    #[test]
1754    fn remove_recall_messages_removes_by_prefix() {
1755        let mut msgs = vec![
1756            sys("system"),
1757            sys(&format!("{RECALL_PREFIX}some recalled text")),
1758            user("hello"),
1759        ];
1760        let mut cached = 0u64;
1761        let mut completed = HashSet::new();
1762        let mut window = make_window(&mut msgs, &mut cached, &mut completed);
1763
1764        ContextService::new().remove_recall_messages(&mut window);
1765
1766        assert_eq!(window.messages.len(), 2);
1767        assert!(
1768            window
1769                .messages
1770                .iter()
1771                .all(|m| !m.content.starts_with(RECALL_PREFIX))
1772        );
1773    }
1774
1775    // Regression test for #4019: Role::User recall messages must be removed by
1776    // remove_recall_messages, not just Role::System ones.
1777    #[test]
1778    fn remove_recall_messages_removes_user_role_recall() {
1779        let mut msgs = vec![
1780            sys("system"),
1781            user(&format!("{RECALL_PREFIX}recalled via tiered path")),
1782            user("real user message"),
1783        ];
1784        let mut cached = 0u64;
1785        let mut completed = HashSet::new();
1786        let mut window = make_window(&mut msgs, &mut cached, &mut completed);
1787
1788        ContextService::new().remove_recall_messages(&mut window);
1789
1790        assert_eq!(
1791            window.messages.len(),
1792            2,
1793            "Role::User recall message must be removed"
1794        );
1795        assert!(
1796            window
1797                .messages
1798                .iter()
1799                .all(|m| !m.content.starts_with(RECALL_PREFIX)),
1800            "no message with RECALL_PREFIX must remain"
1801        );
1802        assert!(
1803            window
1804                .messages
1805                .iter()
1806                .any(|m| m.content == "real user message"),
1807            "non-recall user message must survive"
1808        );
1809    }
1810
1811    #[test]
1812    fn remove_graph_facts_messages_removes_matching() {
1813        let mut msgs = vec![
1814            sys("system"),
1815            sys(&format!("{GRAPH_FACTS_PREFIX}fact1")),
1816            user("hello"),
1817        ];
1818        let mut cached = 0u64;
1819        let mut completed = HashSet::new();
1820        let mut window = make_window(&mut msgs, &mut cached, &mut completed);
1821
1822        ContextService::new().remove_graph_facts_messages(&mut window);
1823
1824        assert_eq!(window.messages.len(), 2);
1825    }
1826
1827    #[test]
1828    fn remove_summary_messages_removes_by_part() {
1829        let mut msgs = vec![
1830            sys("system"),
1831            Message::from_parts(
1832                Role::System,
1833                vec![MessagePart::Summary {
1834                    text: format!("{SUMMARY_PREFIX}old summary"),
1835                }],
1836            ),
1837            user("hello"),
1838        ];
1839        let mut cached = 0u64;
1840        let mut completed = HashSet::new();
1841        let mut window = make_window(&mut msgs, &mut cached, &mut completed);
1842
1843        ContextService::new().remove_summary_messages(&mut window);
1844
1845        assert_eq!(window.messages.len(), 2);
1846    }
1847
1848    #[test]
1849    fn trim_messages_to_budget_zero_is_noop() {
1850        let mut msgs = vec![sys("system"), user("a"), assistant("b"), user("c")];
1851        let original_len = msgs.len();
1852        let mut cached = 0u64;
1853        let mut completed = HashSet::new();
1854        let mut window = make_window(&mut msgs, &mut cached, &mut completed);
1855
1856        ContextService::new().trim_messages_to_budget(&mut window, 0);
1857
1858        assert_eq!(window.messages.len(), original_len);
1859    }
1860
1861    #[test]
1862    fn trim_messages_to_budget_keeps_recent() {
1863        // With a very small budget only the most recent messages survive.
1864        let mut msgs = vec![
1865            sys("system"),
1866            user("message 1"),
1867            assistant("reply 1"),
1868            user("message 2"),
1869        ];
1870        let mut cached = 0u64;
1871        let mut completed = HashSet::new();
1872        let mut window = make_window(&mut msgs, &mut cached, &mut completed);
1873
1874        // 1-token budget keeps the last user message only.
1875        ContextService::new().trim_messages_to_budget(&mut window, 1);
1876
1877        // System prompt is always kept; at least one recent message should be present.
1878        assert!(
1879            window.messages.len() < 4,
1880            "trim should remove some messages"
1881        );
1882        assert_eq!(
1883            window.messages[0].role,
1884            Role::System,
1885            "system prompt must survive trim"
1886        );
1887    }
1888
1889    // AC-12: inserted_count must equal the number of non-None memory fields injected.
1890    // Tests that every Some(msg) field in PreparedContext increments inserted_count by 1.
1891    mod inserted_count_tests {
1892        use parking_lot::RwLock;
1893        use std::borrow::Cow;
1894        use std::collections::HashSet;
1895        use std::sync::Arc;
1896
1897        use zeph_common::SecurityEventCategory;
1898        use zeph_config::memory::TieredRetrievalConfig;
1899        use zeph_config::{
1900            ContextFormat, ContextStrategy, DocumentConfig, GraphConfig, PersonaConfig,
1901            ReasoningConfig, TrajectoryConfig, TreeConfig,
1902        };
1903        use zeph_context::assembler::PreparedContext;
1904        use zeph_context::manager::ContextManager;
1905        use zeph_llm::provider::{Message, MessageMetadata, Role};
1906        use zeph_memory::TokenCounter;
1907        use zeph_sanitizer::ContentIsolationConfig;
1908        use zeph_sanitizer::ContentSanitizer;
1909        use zeph_skills::registry::SkillRegistry;
1910
1911        use super::super::*;
1912        use crate::state::{
1913            ContextAssemblyView, MessageWindowView, MetricsCounters, SecurityEventSink,
1914        };
1915
1916        fn make_task_supervisor() -> Arc<zeph_common::TaskSupervisor> {
1917            Arc::new(zeph_common::TaskSupervisor::new(
1918                tokio_util::sync::CancellationToken::new(),
1919            ))
1920        }
1921
1922        struct NoopSink;
1923        impl SecurityEventSink for NoopSink {
1924            fn push(&mut self, _: SecurityEventCategory, _: &'static str, _: String) {}
1925        }
1926
1927        fn make_counter() -> Arc<TokenCounter> {
1928            Arc::new(TokenCounter::default())
1929        }
1930
1931        fn make_window<'a>(
1932            messages: &'a mut Vec<Message>,
1933            cached: &'a mut u64,
1934            completed: &'a mut HashSet<String>,
1935        ) -> MessageWindowView<'a> {
1936            let last = Box::leak(Box::new(None::<i64>));
1937            let deferred_hide = Box::leak(Box::new(Vec::<i64>::new()));
1938            let deferred_summ = Box::leak(Box::new(Vec::<String>::new()));
1939            MessageWindowView {
1940                messages,
1941                last_persisted_message_id: last,
1942                deferred_db_hide_ids: deferred_hide,
1943                deferred_db_summaries: deferred_summ,
1944                cached_prompt_tokens: cached,
1945                token_counter: make_counter(),
1946                completed_tool_ids: completed,
1947            }
1948        }
1949
1950        fn mem_msg(content: &str) -> Message {
1951            Message {
1952                role: Role::User,
1953                content: content.to_string(),
1954                parts: vec![],
1955                metadata: MessageMetadata::default(),
1956            }
1957        }
1958
1959        fn scrub_noop(s: &str) -> Cow<'_, str> {
1960            Cow::Borrowed(s)
1961        }
1962
1963        #[tokio::test]
1964        async fn inserted_count_incremented_for_all_paths() {
1965            // AC-12: each non-None field in PreparedContext increments inserted_count by 1.
1966            // 10 memory fields are tested here (session_digest is controlled by digest_enabled).
1967            let mut msgs = vec![
1968                Message::from_legacy(Role::System, "system"),
1969                Message::from_legacy(Role::User, "user turn"),
1970            ];
1971            let mut cached = 0u64;
1972            let mut completed = HashSet::new();
1973            let mut window = make_window(&mut msgs, &mut cached, &mut completed);
1974
1975            let sanitizer = ContentSanitizer::new(&ContentIsolationConfig::default());
1976            let mut ctx_mgr = ContextManager::new();
1977            let mut sink = NoopSink;
1978            let mut last_confidence = None::<f32>;
1979            let mut last_skills_prompt = String::new();
1980            let mut active_skill_names = Vec::new();
1981            let registry = Arc::new(RwLock::new(SkillRegistry::default()));
1982
1983            let mut view = ContextAssemblyView {
1984                memory: None,
1985                conversation_id: None,
1986                recall_limit: 10,
1987                cross_session_score_threshold: 0.5,
1988                context_format: ContextFormat::default(),
1989                last_recall_confidence: &mut last_confidence,
1990                context_strategy: ContextStrategy::default(),
1991                crossover_turn_threshold: 0,
1992                cached_session_digest: None,
1993                digest_enabled: false, // no session digest injection in this test
1994                graph_config: GraphConfig::default(),
1995                document_config: DocumentConfig::default(),
1996                persona_config: PersonaConfig::default(),
1997                trajectory_config: TrajectoryConfig::default(),
1998                reasoning_config: ReasoningConfig::default(),
1999                memcot_config: zeph_config::MemCotConfig::default(),
2000                memcot_state: None,
2001                tree_config: TreeConfig::default(),
2002                last_skills_prompt: &mut last_skills_prompt,
2003                active_skill_names: &mut active_skill_names,
2004                skill_registry: registry,
2005                skill_paths: &[],
2006                correction_config: None,
2007                sidequest_turn_counter: 0,
2008                proactive_explorer: None,
2009                sanitizer: &sanitizer,
2010                quarantine_summarizer: None,
2011                context_manager: &mut ctx_mgr,
2012                token_counter: make_counter(),
2013                metrics: MetricsCounters::default(),
2014                security_events: &mut sink,
2015                cached_prompt_tokens: 0,
2016                redact_credentials: false,
2017                channel_skills: &[],
2018                scrub: scrub_noop,
2019                tiered_retrieval_config: TieredRetrievalConfig {
2020                    enabled: false,
2021                    ..TieredRetrievalConfig::default()
2022                },
2023                tiered_retrieval_classifier: None,
2024                tiered_retrieval_validator: None,
2025                fidelity_config: None,
2026                fidelity_semantic_provider: None,
2027                fidelity_compress_provider: None,
2028                planned_next_tools: &[],
2029                status_tx: None,
2030                task_supervisor: make_task_supervisor(),
2031            };
2032
2033            // Populate all 10 message-carrying fields.
2034            let prepared = PreparedContext {
2035                graph_facts: Some(mem_msg("graph_facts")),
2036                doc_rag: Some(mem_msg("doc_rag")),
2037                corrections: Some(mem_msg("corrections")),
2038                recall: Some(mem_msg("recall")),
2039                recall_confidence: Some(0.9),
2040                cross_session: Some(mem_msg("cross_session")),
2041                summaries: Some(mem_msg("summaries")),
2042                code_context: None, // code_context returns via ContextDelta, not inserted_count
2043                persona_facts: Some(mem_msg("persona_facts")),
2044                trajectory_hints: Some(mem_msg("trajectory_hints")),
2045                tree_memory: Some(mem_msg("tree_memory")),
2046                reasoning_hints: Some(mem_msg("reasoning_hints")),
2047                memory_first: false,
2048                recent_history_budget: 100_000,
2049                background_tasks: vec![],
2050            };
2051
2052            let (_delta, inserted_count) = ContextService::new()
2053                .apply_prepared_context(&mut window, &mut view, prepared)
2054                .await;
2055
2056            // 10 message fields were Some(msg): graph_facts, doc_rag, corrections, recall,
2057            // cross_session, summaries, persona_facts, trajectory_hints, tree_memory, reasoning_hints.
2058            assert_eq!(
2059                inserted_count, 10,
2060                "all 10 message-carrying PreparedContext fields must increment inserted_count"
2061            );
2062        }
2063    }
2064
2065    mod inject_semantic_recall_tests {
2066        use parking_lot::RwLock;
2067        use std::borrow::Cow;
2068        use std::collections::HashSet;
2069        use std::sync::Arc;
2070
2071        use zeph_config::memory::TieredRetrievalConfig;
2072        use zeph_config::{
2073            ContextFormat, ContextStrategy, DocumentConfig, GraphConfig, PersonaConfig,
2074            ReasoningConfig, TrajectoryConfig, TreeConfig,
2075        };
2076        use zeph_context::manager::ContextManager;
2077        use zeph_llm::provider::Message;
2078        use zeph_memory::TokenCounter;
2079        use zeph_sanitizer::ContentIsolationConfig;
2080        use zeph_sanitizer::ContentSanitizer;
2081        use zeph_skills::registry::SkillRegistry;
2082
2083        use zeph_common::SecurityEventCategory;
2084
2085        use super::super::*;
2086        use crate::helpers::RECALL_PREFIX;
2087        use crate::state::{
2088            ContextAssemblyView, MessageWindowView, MetricsCounters, SecurityEventSink,
2089        };
2090
2091        fn make_task_supervisor() -> Arc<zeph_common::TaskSupervisor> {
2092            Arc::new(zeph_common::TaskSupervisor::new(
2093                tokio_util::sync::CancellationToken::new(),
2094            ))
2095        }
2096
2097        struct NoopSink;
2098        impl SecurityEventSink for NoopSink {
2099            fn push(&mut self, _: SecurityEventCategory, _: &'static str, _: String) {}
2100        }
2101
2102        fn make_counter() -> Arc<TokenCounter> {
2103            Arc::new(TokenCounter::default())
2104        }
2105
2106        fn make_window<'a>(
2107            messages: &'a mut Vec<Message>,
2108            cached: &'a mut u64,
2109            completed: &'a mut HashSet<String>,
2110        ) -> MessageWindowView<'a> {
2111            let last = Box::leak(Box::new(None::<i64>));
2112            let deferred_hide = Box::leak(Box::new(Vec::<i64>::new()));
2113            let deferred_summ = Box::leak(Box::new(Vec::<String>::new()));
2114            MessageWindowView {
2115                messages,
2116                last_persisted_message_id: last,
2117                deferred_db_hide_ids: deferred_hide,
2118                deferred_db_summaries: deferred_summ,
2119                cached_prompt_tokens: cached,
2120                token_counter: make_counter(),
2121                completed_tool_ids: completed,
2122            }
2123        }
2124
2125        fn scrub_noop(s: &str) -> Cow<'_, str> {
2126            Cow::Borrowed(s)
2127        }
2128
2129        #[tokio::test]
2130        async fn tiered_recall_disabled_uses_flat_path() {
2131            // With tiered_retrieval disabled and no memory, inject_semantic_recall must
2132            // return Ok(()) without inserting any recall message (flat path returns empty).
2133            let mut msgs: Vec<Message> = vec![];
2134            let mut cached = 0u64;
2135            let mut completed = HashSet::new();
2136            let mut window = make_window(&mut msgs, &mut cached, &mut completed);
2137
2138            let sanitizer = ContentSanitizer::new(&ContentIsolationConfig::default());
2139            let mut ctx_mgr = ContextManager::new();
2140            let mut sink = NoopSink;
2141            let mut last_confidence = None::<f32>;
2142            let mut last_skills_prompt = String::new();
2143            let mut active_skill_names = Vec::new();
2144            let registry = Arc::new(RwLock::new(SkillRegistry::default()));
2145
2146            let view = ContextAssemblyView {
2147                memory: None,
2148                conversation_id: None,
2149                recall_limit: 10,
2150                cross_session_score_threshold: 0.5,
2151                context_format: ContextFormat::default(),
2152                last_recall_confidence: &mut last_confidence,
2153                context_strategy: ContextStrategy::default(),
2154                crossover_turn_threshold: 0,
2155                cached_session_digest: None,
2156                digest_enabled: false,
2157                graph_config: GraphConfig::default(),
2158                document_config: DocumentConfig::default(),
2159                persona_config: PersonaConfig::default(),
2160                trajectory_config: TrajectoryConfig::default(),
2161                reasoning_config: ReasoningConfig::default(),
2162                memcot_config: zeph_config::MemCotConfig::default(),
2163                memcot_state: None,
2164                tree_config: TreeConfig::default(),
2165                last_skills_prompt: &mut last_skills_prompt,
2166                active_skill_names: &mut active_skill_names,
2167                skill_registry: registry,
2168                skill_paths: &[],
2169                correction_config: None,
2170                sidequest_turn_counter: 0,
2171                proactive_explorer: None,
2172                sanitizer: &sanitizer,
2173                quarantine_summarizer: None,
2174                context_manager: &mut ctx_mgr,
2175                token_counter: make_counter(),
2176                metrics: MetricsCounters::default(),
2177                security_events: &mut sink,
2178                cached_prompt_tokens: 0,
2179                redact_credentials: false,
2180                channel_skills: &[],
2181                scrub: scrub_noop,
2182                tiered_retrieval_config: TieredRetrievalConfig {
2183                    enabled: false,
2184                    ..TieredRetrievalConfig::default()
2185                },
2186                tiered_retrieval_classifier: None,
2187                tiered_retrieval_validator: None,
2188                fidelity_config: None,
2189                fidelity_semantic_provider: None,
2190                fidelity_compress_provider: None,
2191                planned_next_tools: &[],
2192                status_tx: None,
2193                task_supervisor: make_task_supervisor(),
2194            };
2195
2196            let result = ContextService::new()
2197                .inject_semantic_recall("test query", 1000, &mut window, &view)
2198                .await;
2199
2200            assert!(result.is_ok(), "disabled tiered recall must return Ok(())");
2201            assert!(
2202                window
2203                    .messages
2204                    .iter()
2205                    .all(|m| !m.content.starts_with(RECALL_PREFIX)),
2206                "no recall message must be injected when memory is None"
2207            );
2208        }
2209
2210        #[tokio::test]
2211        async fn tiered_recall_enabled_no_memory_returns_ok() {
2212            // With tiered_retrieval enabled but memory = None, inject_semantic_recall must
2213            // return Ok(()) via the early-return guard without inserting any recall message.
2214            let mut msgs: Vec<Message> = vec![];
2215            let mut cached = 0u64;
2216            let mut completed = HashSet::new();
2217            let mut window = make_window(&mut msgs, &mut cached, &mut completed);
2218
2219            let sanitizer = ContentSanitizer::new(&ContentIsolationConfig::default());
2220            let mut ctx_mgr = ContextManager::new();
2221            let mut sink = NoopSink;
2222            let mut last_confidence = None::<f32>;
2223            let mut last_skills_prompt = String::new();
2224            let mut active_skill_names = Vec::new();
2225            let registry = Arc::new(RwLock::new(SkillRegistry::default()));
2226
2227            let view = ContextAssemblyView {
2228                memory: None,
2229                conversation_id: None,
2230                recall_limit: 10,
2231                cross_session_score_threshold: 0.5,
2232                context_format: ContextFormat::default(),
2233                last_recall_confidence: &mut last_confidence,
2234                context_strategy: ContextStrategy::default(),
2235                crossover_turn_threshold: 0,
2236                cached_session_digest: None,
2237                digest_enabled: false,
2238                graph_config: GraphConfig::default(),
2239                document_config: DocumentConfig::default(),
2240                persona_config: PersonaConfig::default(),
2241                trajectory_config: TrajectoryConfig::default(),
2242                reasoning_config: ReasoningConfig::default(),
2243                memcot_config: zeph_config::MemCotConfig::default(),
2244                memcot_state: None,
2245                tree_config: TreeConfig::default(),
2246                last_skills_prompt: &mut last_skills_prompt,
2247                active_skill_names: &mut active_skill_names,
2248                skill_registry: registry,
2249                skill_paths: &[],
2250                correction_config: None,
2251                sidequest_turn_counter: 0,
2252                proactive_explorer: None,
2253                sanitizer: &sanitizer,
2254                quarantine_summarizer: None,
2255                context_manager: &mut ctx_mgr,
2256                token_counter: make_counter(),
2257                metrics: MetricsCounters::default(),
2258                security_events: &mut sink,
2259                cached_prompt_tokens: 0,
2260                redact_credentials: false,
2261                channel_skills: &[],
2262                scrub: scrub_noop,
2263                tiered_retrieval_config: TieredRetrievalConfig {
2264                    enabled: true,
2265                    ..TieredRetrievalConfig::default()
2266                },
2267                tiered_retrieval_classifier: None,
2268                tiered_retrieval_validator: None,
2269                fidelity_config: None,
2270                fidelity_semantic_provider: None,
2271                fidelity_compress_provider: None,
2272                planned_next_tools: &[],
2273                status_tx: None,
2274                task_supervisor: make_task_supervisor(),
2275            };
2276
2277            let result = ContextService::new()
2278                .inject_semantic_recall("test query", 1000, &mut window, &view)
2279                .await;
2280
2281            assert!(
2282                result.is_ok(),
2283                "enabled tiered recall with no memory must return Ok(())"
2284            );
2285            assert!(
2286                window.messages.is_empty(),
2287                "no recall message must be injected when memory is None"
2288            );
2289        }
2290
2291        // Regression test for #3996: prepare_context must call inject_semantic_recall when
2292        // tiered_retrieval.enabled = true. When context_manager.budget is None the function
2293        // returns early with Ok(ContextDelta::default()); this test verifies that early-return
2294        // path compiles and does not panic with the new conditional blocks in place.
2295        #[tokio::test]
2296        async fn prepare_context_tiered_enabled_no_budget_returns_default() {
2297            let mut msgs: Vec<zeph_llm::provider::Message> = vec![];
2298            let mut cached = 0u64;
2299            let mut completed = HashSet::new();
2300            let mut window = make_window(&mut msgs, &mut cached, &mut completed);
2301
2302            let sanitizer = zeph_sanitizer::ContentSanitizer::new(
2303                &zeph_sanitizer::ContentIsolationConfig::default(),
2304            );
2305            let mut ctx_mgr = zeph_context::manager::ContextManager::new();
2306            // budget = None → prepare_context returns Ok(ContextDelta::default()) immediately.
2307            assert!(ctx_mgr.budget.is_none());
2308
2309            let mut sink = NoopSink;
2310            let mut last_confidence = None::<f32>;
2311            let mut last_skills_prompt = String::new();
2312            let mut active_skill_names = Vec::new();
2313            let registry = Arc::new(RwLock::new(zeph_skills::registry::SkillRegistry::default()));
2314
2315            let mut view = ContextAssemblyView {
2316                memory: None,
2317                conversation_id: None,
2318                recall_limit: 10,
2319                cross_session_score_threshold: 0.5,
2320                context_format: ContextFormat::default(),
2321                last_recall_confidence: &mut last_confidence,
2322                context_strategy: ContextStrategy::default(),
2323                crossover_turn_threshold: 0,
2324                cached_session_digest: None,
2325                digest_enabled: false,
2326                graph_config: GraphConfig::default(),
2327                document_config: DocumentConfig::default(),
2328                persona_config: PersonaConfig::default(),
2329                trajectory_config: TrajectoryConfig::default(),
2330                reasoning_config: ReasoningConfig::default(),
2331                memcot_config: zeph_config::MemCotConfig::default(),
2332                memcot_state: None,
2333                tree_config: TreeConfig::default(),
2334                last_skills_prompt: &mut last_skills_prompt,
2335                active_skill_names: &mut active_skill_names,
2336                skill_registry: registry,
2337                skill_paths: &[],
2338                correction_config: None,
2339                sidequest_turn_counter: 0,
2340                proactive_explorer: None,
2341                sanitizer: &sanitizer,
2342                quarantine_summarizer: None,
2343                context_manager: &mut ctx_mgr,
2344                token_counter: make_counter(),
2345                metrics: MetricsCounters::default(),
2346                security_events: &mut sink,
2347                cached_prompt_tokens: 0,
2348                redact_credentials: false,
2349                channel_skills: &[],
2350                scrub: scrub_noop,
2351                tiered_retrieval_config: TieredRetrievalConfig {
2352                    enabled: true,
2353                    ..TieredRetrievalConfig::default()
2354                },
2355                tiered_retrieval_classifier: None,
2356                tiered_retrieval_validator: None,
2357                fidelity_config: None,
2358                fidelity_semantic_provider: None,
2359                fidelity_compress_provider: None,
2360                planned_next_tools: &[],
2361                status_tx: None,
2362                task_supervisor: make_task_supervisor(),
2363            };
2364
2365            let result = ContextService::new()
2366                .prepare_context("test query", &mut window, &mut view)
2367                .await;
2368
2369            assert!(
2370                result.is_ok(),
2371                "prepare_context with tiered enabled and no budget must return Ok"
2372            );
2373        }
2374
2375        // Regression test for #4022: inject_semantic_recall_bare must be callable without a
2376        // full ContextAssemblyView and must return Ok(()) when memory is None.
2377        #[tokio::test]
2378        async fn inject_semantic_recall_bare_no_memory_returns_ok() {
2379            use zeph_config::memory::TieredRetrievalConfig;
2380
2381            let mut msgs: Vec<Message> = vec![];
2382            let mut cached = 0u64;
2383            let mut completed = HashSet::new();
2384            let mut window = make_window(&mut msgs, &mut cached, &mut completed);
2385
2386            let tiered_config = TieredRetrievalConfig {
2387                enabled: true,
2388                ..TieredRetrievalConfig::default()
2389            };
2390            let params = SemanticRecallParams {
2391                query: "test query",
2392                token_budget: 1000,
2393                recall_limit: 10,
2394                context_format: zeph_config::ContextFormat::default(),
2395                conversation_id: None,
2396                tiered_classifier: None,
2397                tiered_validator: None,
2398                tiered_config: &tiered_config,
2399            };
2400            let result = ContextService::new()
2401                .inject_semantic_recall_bare(params, &mut window, None)
2402                .await;
2403
2404            assert!(
2405                result.is_ok(),
2406                "inject_semantic_recall_bare with memory=None must return Ok(())"
2407            );
2408            assert!(
2409                window.messages.is_empty(),
2410                "no recall message must be injected when memory is None"
2411            );
2412        }
2413    }
2414}
zeph_agent_context/service.rs

zeph_agent_context/
service.rs