Skip to main content

zeph_context/
assembler.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Stateless context assembler.
5//!
6//! [`ContextAssembler`] gathers all memory-sourced context for a single agent turn by running
7//! all async fetch operations concurrently. It takes only borrowed references via
8//! [`ContextAssemblyInput`] and returns a [`PreparedContext`] ready for injection.
9//!
10//! Invariants:
11//! - No `Agent` field mutations inside `gather()`.
12//! - No channel communication inside `gather()`.
13//! - All `send_status` calls remain in `Agent::prepare_context`.
14//! - `session_digest` is cached (not async) and stays in `Agent::apply_prepared_context`.
15
16use std::future::Future;
17use std::pin::Pin;
18
19use futures::StreamExt as _;
20use futures::stream::FuturesUnordered;
21
22use zeph_common::memory::{AsyncMemoryRouter, CompressionLevel, GraphRecallParams, TokenCounting};
23use zeph_llm::provider::{Message, MessageMetadata, MessagePart, Role};
24
25use crate::error::AssemblerError;
26use crate::input::ContextAssemblyInput;
27use crate::slot::ContextSlot;
28
29/// Map a slice of active compression levels to per-tier boolean flags.
30///
31/// Returns `(episodic_active, procedural_active, declarative_active)`.
32///
33/// An empty slice means "no tier filtering": all three flags are `true`. This is the defensive
34/// default — passing an empty slice preserves legacy behaviour instead of silently suppressing
35/// all memory recall.
36pub(crate) fn levels_to_flags(levels: &[CompressionLevel]) -> (bool, bool, bool) {
37    if levels.is_empty() {
38        return (true, true, true);
39    }
40    let episodic = levels.contains(&CompressionLevel::Episodic);
41    let procedural = levels.contains(&CompressionLevel::Procedural);
42    let declarative = levels.contains(&CompressionLevel::Declarative);
43    (episodic, procedural, declarative)
44}
45
46/// Prefix for past-session summary injections.
47pub const SUMMARY_PREFIX: &str = "[conversation summaries]\n";
48/// Prefix for cross-session context injections.
49pub const CROSS_SESSION_PREFIX: &str = "[cross-session context]\n";
50/// Prefix for semantic recall injections.
51pub const RECALL_PREFIX: &str = "[semantic recall]\n";
52/// Prefix for past-correction injections.
53pub const CORRECTIONS_PREFIX: &str = "[past corrections]\n";
54/// Prefix for document RAG injections.
55pub const DOCUMENT_RAG_PREFIX: &str = "## Relevant documents\n";
56/// Prefix for knowledge graph fact injections.
57pub const GRAPH_FACTS_PREFIX: &str = "[known facts]\n";
58
59/// Result of one context-assembly pass.
60///
61/// All source fields are `Option` — `None` means disabled, empty, or budget-exhausted.
62/// `session_digest` is excluded: it is a cached value injected by `Agent::apply_prepared_context`.
63#[derive(Default)]
64pub struct PreparedContext {
65    /// Knowledge graph fact recall.
66    pub graph_facts: Option<Message>,
67    /// Document RAG context.
68    pub doc_rag: Option<Message>,
69    /// Past user corrections.
70    pub corrections: Option<Message>,
71    /// Semantic recall results.
72    pub recall: Option<Message>,
73    /// Top-1 similarity score from semantic recall.
74    pub recall_confidence: Option<f32>,
75    /// Cross-session memory context.
76    pub cross_session: Option<Message>,
77    /// Past-conversation summaries.
78    pub summaries: Option<Message>,
79    /// Code-index RAG context (repo map or file context).
80    pub code_context: Option<String>,
81    /// Persona memory facts.
82    pub persona_facts: Option<Message>,
83    /// Trajectory hints.
84    pub trajectory_hints: Option<Message>,
85    /// `TiMem` tree memory summary.
86    pub tree_memory: Option<Message>,
87    /// Distilled reasoning strategies from the `ReasoningBank` (#3343).
88    pub reasoning_hints: Option<Message>,
89    /// Whether the memory-first context strategy is active for this turn.
90    pub memory_first: bool,
91    /// Token budget for recent conversation history (passed to trim step in apply).
92    pub recent_history_budget: usize,
93    /// Background tasks spawned during context assembly that must be tracked to completion.
94    ///
95    /// Callers are responsible for awaiting or aborting these handles at an appropriate boundary
96    /// (e.g., turn end). See async discipline rule: fire-and-forget tasks MUST be tracked.
97    pub background_tasks: Vec<tokio::task::JoinHandle<()>>,
98}
99
100/// Stateless coordinator for parallel context fetching.
101///
102/// All logic is in [`ContextAssembler::gather`]. No state is stored on this type.
103pub struct ContextAssembler;
104
105type CtxFuture<'a> = Pin<Box<dyn Future<Output = Result<ContextSlot, AssemblerError>> + Send + 'a>>;
106
107fn empty_prepared_context() -> PreparedContext {
108    PreparedContext::default()
109}
110
111fn resolve_effective_strategy(
112    memory: &crate::input::ContextMemoryView,
113    sidequest_turn_counter: u64,
114) -> zeph_config::ContextStrategy {
115    match memory.context_strategy {
116        zeph_config::ContextStrategy::MemoryFirst => zeph_config::ContextStrategy::MemoryFirst,
117        zeph_config::ContextStrategy::Adaptive => {
118            if sidequest_turn_counter >= u64::from(memory.crossover_turn_threshold) {
119                zeph_config::ContextStrategy::MemoryFirst
120            } else {
121                zeph_config::ContextStrategy::FullHistory
122            }
123        }
124        _ => zeph_config::ContextStrategy::FullHistory,
125    }
126}
127
128fn correction_params(cfg: Option<&crate::input::CorrectionConfig>) -> (usize, f32) {
129    cfg.filter(|c| c.correction_detection)
130        .map_or((3, 0.75), |c| {
131            (
132                c.correction_recall_limit as usize,
133                c.correction_min_similarity,
134            )
135        })
136}
137
138/// Schedules all enabled context fetchers and returns them as a set of concurrent futures.
139///
140/// `router_ref` borrows from `router`, which is a local owned by `gather`. Using a separate
141/// lifetime `'r` for `router_ref` avoids tying it to `'a` (the input lifetime), which would
142/// require `router` to outlive `input`. All `usize` budget values are passed by copy so the
143/// returned futures do not borrow from `alloc`.
144#[allow(clippy::too_many_arguments)]
145fn schedule_context_fetchers<'r>(
146    memory: &'r crate::input::ContextMemoryView,
147    tc: &'r dyn TokenCounting,
148    query: &'r str,
149    scrub: fn(&str) -> std::borrow::Cow<'_, str>,
150    index: Option<&'r dyn crate::input::IndexAccess>,
151    router_ref: &'r dyn AsyncMemoryRouter,
152    summaries_budget: usize,
153    cross_session_budget: usize,
154    semantic_recall_budget: usize,
155    code_context_budget: usize,
156    graph_facts_budget: usize,
157    recall_limit: usize,
158    min_sim: f32,
159    active_levels: &[CompressionLevel],
160) -> FuturesUnordered<CtxFuture<'r>> {
161    // TODO(critic): episodic_active currently gates summaries + cross-session + recall + doc_rag
162    // together. If future RetrievalPolicy variants ever drop Episodic, the cheap summary fetchers
163    // will be silently disabled — split into raw vs compressed sub-tiers. (#3455 follow-up)
164    let (episodic_active, procedural_active, declarative_active) = levels_to_flags(active_levels);
165
166    let fetchers: FuturesUnordered<CtxFuture<'r>> = FuturesUnordered::new();
167
168    if episodic_active && summaries_budget > 0 {
169        fetchers.push(Box::pin(async move {
170            fetch_summaries(memory, summaries_budget, tc)
171                .await
172                .map(ContextSlot::Summaries)
173        }));
174    }
175    if episodic_active && cross_session_budget > 0 {
176        fetchers.push(Box::pin(async move {
177            fetch_cross_session(memory, query, cross_session_budget, tc)
178                .await
179                .map(ContextSlot::CrossSession)
180        }));
181    }
182    if episodic_active && semantic_recall_budget > 0 {
183        fetchers.push(Box::pin(async move {
184            fetch_semantic_recall(memory, query, semantic_recall_budget, tc, Some(router_ref))
185                .await
186                .map(|(msg, score)| ContextSlot::SemanticRecall(msg, score))
187        }));
188        fetchers.push(Box::pin(async move {
189            fetch_document_rag(memory, query, semantic_recall_budget, tc)
190                .await
191                .map(ContextSlot::DocumentRag)
192        }));
193    }
194    // Corrections are safety-critical and never budget-gated or tier-gated.
195    fetchers.push(Box::pin(async move {
196        fetch_corrections(memory, query, recall_limit, min_sim, scrub)
197            .await
198            .map(ContextSlot::Corrections)
199    }));
200    // Code RAG is request-driven, not memory-tier; exempt from tier filtering.
201    if code_context_budget > 0
202        && let Some(idx) = index
203    {
204        fetchers.push(Box::pin(async move {
205            let result: Result<Option<String>, AssemblerError> =
206                idx.fetch_code_rag(query, code_context_budget).await;
207            result.map(ContextSlot::CodeContext)
208        }));
209    }
210    if declarative_active && graph_facts_budget > 0 {
211        fetchers.push(Box::pin(async move {
212            fetch_graph_facts(memory, query, graph_facts_budget, tc)
213                .await
214                .map(ContextSlot::GraphFacts)
215        }));
216    }
217    if declarative_active && memory.persona_config.context_budget_tokens > 0 {
218        fetchers.push(Box::pin(async move {
219            let persona_budget = memory.persona_config.context_budget_tokens;
220            fetch_persona_facts(memory, persona_budget, tc)
221                .await
222                .map(ContextSlot::PersonaFacts)
223        }));
224    }
225    if procedural_active && memory.trajectory_config.context_budget_tokens > 0 {
226        fetchers.push(Box::pin(async move {
227            let tbudget = memory.trajectory_config.context_budget_tokens;
228            fetch_trajectory_hints(memory, tbudget, tc)
229                .await
230                .map(ContextSlot::TrajectoryHints)
231        }));
232    }
233    if declarative_active && memory.tree_config.context_budget_tokens > 0 {
234        fetchers.push(Box::pin(async move {
235            let tbudget = memory.tree_config.context_budget_tokens;
236            fetch_tree_memory(memory, tbudget, tc)
237                .await
238                .map(ContextSlot::TreeMemory)
239        }));
240    }
241    if procedural_active
242        && memory.reasoning_config.enabled
243        && memory.reasoning_config.context_budget_tokens > 0
244    {
245        fetchers.push(Box::pin(async move {
246            let rbudget = memory.reasoning_config.context_budget_tokens;
247            let top_k = memory.reasoning_config.top_k;
248            fetch_reasoning_strategies(memory, query, rbudget, top_k, tc)
249                .await
250                .map(|(msg, handle)| ContextSlot::ReasoningStrategies(msg, handle))
251        }));
252    }
253
254    fetchers
255}
256
257async fn drive_fetchers(
258    mut fetchers: FuturesUnordered<CtxFuture<'_>>,
259    prepared: &mut PreparedContext,
260) -> Result<(), AssemblerError> {
261    while let Some(result) = fetchers.next().await {
262        match result {
263            Ok(slot) => match slot {
264                ContextSlot::Summaries(msg) => prepared.summaries = msg,
265                ContextSlot::CrossSession(msg) => prepared.cross_session = msg,
266                ContextSlot::SemanticRecall(msg, score) => {
267                    prepared.recall = msg;
268                    prepared.recall_confidence = score;
269                }
270                ContextSlot::DocumentRag(msg) => prepared.doc_rag = msg,
271                ContextSlot::Corrections(msg) => prepared.corrections = msg,
272                ContextSlot::CodeContext(text) => prepared.code_context = text,
273                ContextSlot::GraphFacts(msg) => prepared.graph_facts = msg,
274                ContextSlot::PersonaFacts(msg) => prepared.persona_facts = msg,
275                ContextSlot::TrajectoryHints(msg) => prepared.trajectory_hints = msg,
276                ContextSlot::TreeMemory(msg) => prepared.tree_memory = msg,
277                ContextSlot::ReasoningStrategies(msg, handle) => {
278                    prepared.reasoning_hints = msg;
279                    if let Some(h) = handle {
280                        prepared.background_tasks.push(h);
281                    }
282                }
283            },
284            Err(e) => return Err(e),
285        }
286    }
287    Ok(())
288}
289
290impl ContextAssembler {
291    /// Gather all context sources concurrently and return a [`PreparedContext`].
292    ///
293    /// Returns an empty `PreparedContext` immediately when `context_manager.budget` is `None`.
294    ///
295    /// # Errors
296    ///
297    /// Propagates errors from any async fetch operation.
298    pub async fn gather(
299        input: &ContextAssemblyInput<'_>,
300    ) -> Result<PreparedContext, AssemblerError> {
301        let Some(ref budget) = input.context_manager.budget else {
302            return Ok(empty_prepared_context());
303        };
304
305        let memory = input.memory;
306        let tc = input.token_counter;
307
308        let effective_strategy = resolve_effective_strategy(memory, input.sidequest_turn_counter);
309        let memory_first = effective_strategy == zeph_config::ContextStrategy::MemoryFirst;
310
311        let system_prompt = input
312            .messages
313            .first()
314            .filter(|m| m.role == Role::System)
315            .map_or("", |m| m.content.as_str());
316
317        let digest_tokens = memory
318            .cached_session_digest
319            .as_ref()
320            .map_or(0, |(_, tokens)| *tokens);
321
322        let alloc = budget.allocate_with_opts(
323            system_prompt,
324            input.skills_prompt,
325            tc,
326            memory.graph_config.enabled,
327            digest_tokens,
328            memory_first,
329        );
330
331        let (recall_limit, min_sim) = correction_params(input.correction_config.as_ref());
332
333        let router_ref: &dyn AsyncMemoryRouter = input.router.as_ref();
334
335        tracing::debug!(
336            active_sources = alloc.active_sources(),
337            active_levels = ?input.active_levels,
338            "context budget allocated"
339        );
340
341        let fetchers = schedule_context_fetchers(
342            memory,
343            tc,
344            input.query,
345            input.scrub,
346            input.index,
347            router_ref,
348            alloc.summaries,
349            alloc.cross_session,
350            alloc.semantic_recall,
351            alloc.code_context,
352            alloc.graph_facts,
353            recall_limit,
354            min_sim,
355            input.active_levels,
356        );
357
358        let mut prepared = empty_prepared_context();
359        prepared.memory_first = memory_first;
360        prepared.recent_history_budget = alloc.recent_history;
361
362        drive_fetchers(fetchers, &mut prepared).await?;
363        Ok(prepared)
364    }
365}
366
367/// Clamp recall timeout to a safe minimum.
368///
369/// A configured value of 0 would disable spreading activation recall entirely;
370/// clamping to 100ms preserves the user's intent while preventing a silent no-op.
371pub fn effective_recall_timeout_ms(configured: u64) -> u64 {
372    if configured == 0 {
373        tracing::warn!(
374            "recall_timeout_ms is 0, which would disable spreading activation recall; \
375             clamping to 100ms"
376        );
377        100
378    } else {
379        configured
380    }
381}
382
383use crate::input::ContextMemoryView;
384
385#[tracing::instrument(name = "context.graph_facts", skip_all)]
386#[allow(clippy::too_many_lines)] // single-pass view-aware enrichment pipeline
387pub(crate) async fn fetch_graph_facts(
388    memory: &ContextMemoryView,
389    query: &str,
390    budget_tokens: usize,
391    tc: &dyn TokenCounting,
392) -> Result<Option<Message>, AssemblerError> {
393    use zeph_common::memory::{RecallView, SpreadingActivationParams, classify_graph_subgraph};
394
395    if budget_tokens == 0 || !memory.graph_config.enabled {
396        return Ok(None);
397    }
398    let Some(ref mem) = memory.memory else {
399        return Ok(None);
400    };
401    let recall_limit = memory.graph_config.recall_limit;
402    let temporal_decay_rate = memory.graph_config.temporal_decay_rate;
403    let sa_config = &memory.graph_config.spreading_activation;
404
405    // Fuse MemCoT semantic state into the recall query (spec §A8: state ≤ 2 × query.len()).
406    let fused_query;
407    let effective_query = if let Some(ref state) = memory.memcot_state {
408        let max_state_chars = 2 * query.len();
409        let state_slice = if state.len() > max_state_chars {
410            let boundary = state.floor_char_boundary(max_state_chars);
411            &state[..boundary]
412        } else {
413            state.as_str()
414        };
415        fused_query = format!("[state] {state_slice}\n{query}");
416        &fused_query as &str
417    } else {
418        query
419    };
420
421    let edge_types = classify_graph_subgraph(effective_query);
422
423    let view = match memory.memcot_config.recall_view {
424        zeph_config::RecallViewConfig::ZoomIn => RecallView::ZoomIn,
425        zeph_config::RecallViewConfig::ZoomOut => RecallView::ZoomOut,
426        _ => RecallView::Head,
427    };
428
429    let sa_params = if sa_config.enabled {
430        Some(SpreadingActivationParams {
431            decay_lambda: sa_config.decay_lambda,
432            max_hops: sa_config.max_hops,
433            activation_threshold: sa_config.activation_threshold,
434            inhibition_threshold: sa_config.inhibition_threshold,
435            max_activated_nodes: sa_config.max_activated_nodes,
436            temporal_decay_rate,
437            seed_structural_weight: sa_config.seed_structural_weight,
438            seed_community_cap: sa_config.seed_community_cap,
439        })
440    } else {
441        None
442    };
443
444    let timeout_ms = effective_recall_timeout_ms(sa_config.recall_timeout_ms);
445    let recall_fut = mem.recall_graph_facts(
446        effective_query,
447        GraphRecallParams {
448            limit: recall_limit,
449            view,
450            zoom_out_neighbor_cap: memory.memcot_config.zoom_out_neighbor_cap,
451            max_hops: memory.graph_config.max_hops,
452            temporal_decay_rate,
453            edge_types: &edge_types,
454            spreading_activation: sa_params,
455        },
456    );
457    let recalled = match tokio::time::timeout(
458        std::time::Duration::from_millis(timeout_ms),
459        recall_fut,
460    )
461    .await
462    {
463        Ok(Ok(facts)) => facts,
464        Ok(Err(e)) => {
465            tracing::warn!("graph recall failed: {e:#}");
466            Vec::new()
467        }
468        Err(_) => {
469            tracing::warn!("graph recall timed out ({timeout_ms}ms)");
470            Vec::new()
471        }
472    };
473
474    if recalled.is_empty() {
475        return Ok(None);
476    }
477
478    let mut body = String::from(GRAPH_FACTS_PREFIX);
479    let mut tokens_so_far = tc.count_tokens(&body);
480
481    for rf in &recalled {
482        let fact_text = rf.fact.replace(['\n', '\r', '<', '>'], " ");
483        let line = if let Some(score) = rf.activation_score {
484            format!(
485                "- {} (confidence: {:.2}, activation: {:.2})\n",
486                fact_text, rf.confidence, score
487            )
488        } else {
489            format!("- {} (confidence: {:.2})\n", fact_text, rf.confidence)
490        };
491        let line_tokens = tc.count_tokens(&line);
492        if tokens_so_far + line_tokens > budget_tokens {
493            break;
494        }
495        body.push_str(&line);
496        tokens_so_far += line_tokens;
497
498        // Append ZoomOut neighbors after the head fact.
499        for nb in &rf.neighbors {
500            let nb_text = nb.fact.replace(['\n', '\r', '<', '>'], " ");
501            let nb_line = format!("  ~ {} (confidence: {:.2})\n", nb_text, nb.confidence);
502            let nb_tokens = tc.count_tokens(&nb_line);
503            if tokens_so_far + nb_tokens > budget_tokens {
504                break;
505            }
506            body.push_str(&nb_line);
507            tokens_so_far += nb_tokens;
508        }
509
510        // Append ZoomIn provenance snippet if present.
511        if let Some(ref snippet) = rf.provenance_snippet {
512            let snip_line = format!(
513                "  [source: {}]\n",
514                snippet.replace(['\n', '\r', '<', '>'], " ")
515            );
516            let snip_tokens = tc.count_tokens(&snip_line);
517            if tokens_so_far + snip_tokens <= budget_tokens {
518                body.push_str(&snip_line);
519                tokens_so_far += snip_tokens;
520            }
521        }
522    }
523
524    if body == GRAPH_FACTS_PREFIX {
525        return Ok(None);
526    }
527
528    Ok(Some(Message::from_legacy(Role::System, body)))
529}
530
531#[tracing::instrument(name = "context.persona_facts", skip_all)]
532pub(crate) async fn fetch_persona_facts(
533    memory: &ContextMemoryView,
534    budget_tokens: usize,
535    tc: &dyn TokenCounting,
536) -> Result<Option<Message>, AssemblerError> {
537    if budget_tokens == 0 || !memory.persona_config.enabled {
538        return Ok(None);
539    }
540    let Some(ref mem) = memory.memory else {
541        return Ok(None);
542    };
543
544    let min_confidence = memory.persona_config.min_confidence;
545    let facts = mem
546        .load_persona_facts(min_confidence)
547        .await
548        .map_err(AssemblerError::Memory)?;
549
550    if facts.is_empty() {
551        return Ok(None);
552    }
553
554    let mut body = String::from(crate::slot::PERSONA_PREFIX);
555    let mut tokens_so_far = tc.count_tokens(&body);
556
557    for fact in &facts {
558        let line = format!("[{}] {}\n", fact.category, fact.content);
559        let line_tokens = tc.count_tokens(&line);
560        if tokens_so_far + line_tokens > budget_tokens {
561            break;
562        }
563        body.push_str(&line);
564        tokens_so_far += line_tokens;
565    }
566
567    if body == crate::slot::PERSONA_PREFIX {
568        return Ok(None);
569    }
570
571    Ok(Some(Message::from_legacy(Role::System, body)))
572}
573
574#[tracing::instrument(name = "context.trajectory_hints", skip_all)]
575pub(crate) async fn fetch_trajectory_hints(
576    memory: &ContextMemoryView,
577    budget_tokens: usize,
578    tc: &dyn TokenCounting,
579) -> Result<Option<Message>, AssemblerError> {
580    if budget_tokens == 0 || !memory.trajectory_config.enabled {
581        return Ok(None);
582    }
583    let Some(ref mem) = memory.memory else {
584        return Ok(None);
585    };
586
587    let top_k = memory.trajectory_config.recall_top_k;
588    let min_conf = memory.trajectory_config.min_confidence;
589    // Load procedural trajectory entries via the backend abstraction.
590    // The "procedural" filter maps to the same tier used by the original
591    // sqlite().load_trajectory_entries(Some("procedural"), top_k) call.
592    let entries = mem
593        .load_trajectory_entries(Some("procedural"), top_k)
594        .await
595        .map_err(AssemblerError::Memory)?;
596
597    if entries.is_empty() {
598        return Ok(None);
599    }
600
601    let mut body = String::from(crate::slot::TRAJECTORY_PREFIX);
602    let mut tokens_so_far = tc.count_tokens(&body);
603
604    for entry in entries
605        .iter()
606        .filter(|e| e.confidence >= min_conf)
607        .take(top_k)
608    {
609        let line = format!("- {}: {}\n", entry.intent, entry.outcome);
610        let line_tokens = tc.count_tokens(&line);
611        if tokens_so_far + line_tokens > budget_tokens {
612            break;
613        }
614        body.push_str(&line);
615        tokens_so_far += line_tokens;
616    }
617
618    if body == crate::slot::TRAJECTORY_PREFIX {
619        return Ok(None);
620    }
621
622    Ok(Some(Message::from_legacy(Role::System, body)))
623}
624
625#[tracing::instrument(name = "context.tree_memory", skip_all)]
626pub(crate) async fn fetch_tree_memory(
627    memory: &ContextMemoryView,
628    budget_tokens: usize,
629    tc: &dyn TokenCounting,
630) -> Result<Option<Message>, AssemblerError> {
631    if budget_tokens == 0 || !memory.tree_config.enabled {
632        return Ok(None);
633    }
634    let Some(ref mem) = memory.memory else {
635        return Ok(None);
636    };
637
638    let top_k = memory.tree_config.recall_top_k;
639    let nodes = mem
640        .load_tree_nodes(1, top_k)
641        .await
642        .map_err(AssemblerError::Memory)?;
643
644    if nodes.is_empty() {
645        return Ok(None);
646    }
647
648    let mut body = String::from(crate::slot::TREE_MEMORY_PREFIX);
649    let mut tokens_so_far = tc.count_tokens(&body);
650
651    for node in nodes.iter().take(top_k) {
652        let line = format!("- {}\n", node.content);
653        let line_tokens = tc.count_tokens(&line);
654        if tokens_so_far + line_tokens > budget_tokens {
655            break;
656        }
657        body.push_str(&line);
658        tokens_so_far += line_tokens;
659    }
660
661    if body == crate::slot::TREE_MEMORY_PREFIX {
662        return Ok(None);
663    }
664
665    Ok(Some(Message::from_legacy(Role::System, body)))
666}
667
668#[tracing::instrument(name = "context.reasoning_strategies", skip_all)]
669pub(crate) async fn fetch_reasoning_strategies(
670    memory: &ContextMemoryView,
671    query: &str,
672    budget_tokens: usize,
673    top_k: usize,
674    tc: &dyn TokenCounting,
675) -> Result<(Option<Message>, Option<tokio::task::JoinHandle<()>>), AssemblerError> {
676    // S1: enforce the ≤500-token spec cap documented in ReasoningConfig.
677    let budget_tokens = budget_tokens.min(500);
678    if budget_tokens == 0 {
679        return Ok((None, None));
680    }
681    let Some(ref mem) = memory.memory else {
682        return Ok((None, None));
683    };
684
685    let strategies = mem
686        .retrieve_reasoning_strategies(query, top_k)
687        .await
688        .map_err(AssemblerError::Memory)?;
689
690    if strategies.is_empty() {
691        return Ok((None, None));
692    }
693
694    let mut body = String::from(crate::slot::REASONING_PREFIX);
695    let mut tokens_so_far = tc.count_tokens(&body);
696    let mut injected_ids: Vec<String> = Vec::new();
697
698    for s in strategies.iter().take(top_k) {
699        // S-Med1: sanitize distilled summaries to prevent stored injection payloads
700        // from reaching the system prompt (mirrors fetch_graph_facts scrub pattern).
701        let safe_summary = s.summary.replace(['\n', '\r', '<', '>'], " ");
702        let line = format!("- [{}] {}\n", s.outcome, safe_summary);
703        let line_tokens = tc.count_tokens(&line);
704        if tokens_so_far + line_tokens > budget_tokens {
705            break;
706        }
707        body.push_str(&line);
708        tokens_so_far += line_tokens;
709        injected_ids.push(s.id.clone());
710    }
711
712    if body == crate::slot::REASONING_PREFIX {
713        return Ok((None, None));
714    }
715
716    // C4 split: mark_used only for strategies that made it past budget truncation.
717    // Spawn the task and return the handle so the caller can track it (async discipline rule:
718    // fire-and-forget tasks MUST be tracked; handle stored in PreparedContext::background_tasks).
719    let handle = if injected_ids.is_empty() {
720        None
721    } else {
722        let mem_clone = mem.clone();
723        Some(tokio::spawn(async move {
724            if let Err(e) = mem_clone.mark_reasoning_used(&injected_ids).await {
725                tracing::warn!(error = %e, "reasoning: mark_used failed");
726            }
727        }))
728    };
729
730    Ok((Some(Message::from_legacy(Role::System, body)), handle))
731}
732
733#[tracing::instrument(name = "context.corrections", skip_all)]
734pub(crate) async fn fetch_corrections(
735    memory: &ContextMemoryView,
736    query: &str,
737    limit: usize,
738    min_score: f32,
739    scrub: fn(&str) -> std::borrow::Cow<'_, str>,
740) -> Result<Option<Message>, AssemblerError> {
741    let Some(ref mem) = memory.memory else {
742        return Ok(None);
743    };
744    let corrections = mem
745        .retrieve_corrections(query, limit, min_score)
746        .await
747        .map_err(AssemblerError::Memory)?;
748    if corrections.is_empty() {
749        return Ok(None);
750    }
751    let mut text = String::from(CORRECTIONS_PREFIX);
752    for c in &corrections {
753        text.push_str("- Past user correction: \"");
754        text.push_str(&scrub(&c.correction_text));
755        text.push_str("\"\n");
756    }
757    Ok(Some(Message::from_legacy(Role::System, text)))
758}
759
760#[tracing::instrument(name = "context.semantic_recall", skip_all)]
761pub(crate) async fn fetch_semantic_recall(
762    memory: &ContextMemoryView,
763    query: &str,
764    token_budget: usize,
765    tc: &dyn TokenCounting,
766    router: Option<&dyn AsyncMemoryRouter>,
767) -> Result<(Option<Message>, Option<f32>), AssemblerError> {
768    let Some(ref mem) = memory.memory else {
769        return Ok((None, None));
770    };
771    if memory.recall_limit == 0 || token_budget == 0 {
772        return Ok((None, None));
773    }
774
775    let recalled = mem
776        .recall(query, memory.recall_limit, router)
777        .await
778        .map_err(AssemblerError::Memory)?;
779    if recalled.is_empty() {
780        return Ok((None, None));
781    }
782
783    let top_score = recalled.first().map(|r| r.score);
784
785    let mut recall_text = String::with_capacity(token_budget * 3);
786    recall_text.push_str(RECALL_PREFIX);
787    let mut tokens_used = tc.count_tokens(&recall_text);
788
789    for item in &recalled {
790        if item.content.starts_with("[skipped]") || item.content.starts_with("[stopped]") {
791            continue;
792        }
793        let entry = format!("- [{}] {}\n", item.role, item.content);
794        let entry_tokens = tc.count_tokens(&entry);
795        if tokens_used + entry_tokens > token_budget {
796            break;
797        }
798        recall_text.push_str(&entry);
799        tokens_used += entry_tokens;
800    }
801
802    if tokens_used > tc.count_tokens(RECALL_PREFIX) {
803        Ok((
804            Some(Message::from_parts(
805                Role::System,
806                vec![MessagePart::Recall { text: recall_text }],
807            )),
808            top_score,
809        ))
810    } else {
811        Ok((None, None))
812    }
813}
814
815#[tracing::instrument(name = "context.document_rag", skip_all)]
816pub(crate) async fn fetch_document_rag(
817    memory: &ContextMemoryView,
818    query: &str,
819    token_budget: usize,
820    tc: &dyn TokenCounting,
821) -> Result<Option<Message>, AssemblerError> {
822    if !memory.document_config.rag_enabled || token_budget == 0 {
823        return Ok(None);
824    }
825    let Some(ref mem) = memory.memory else {
826        return Ok(None);
827    };
828
829    let collection = &memory.document_config.collection;
830    let top_k = memory.document_config.top_k;
831    let chunks = mem
832        .search_document_collection(collection, query, top_k)
833        .await
834        .map_err(AssemblerError::Memory)?;
835    if chunks.is_empty() {
836        return Ok(None);
837    }
838
839    let mut text = String::from(DOCUMENT_RAG_PREFIX);
840    let mut tokens_used = tc.count_tokens(&text);
841
842    for chunk in &chunks {
843        if chunk.text.is_empty() {
844            continue;
845        }
846        let entry = format!("{}\n", chunk.text);
847        let cost = tc.count_tokens(&entry);
848        if tokens_used + cost > token_budget {
849            break;
850        }
851        text.push_str(&entry);
852        tokens_used += cost;
853    }
854
855    if tokens_used > tc.count_tokens(DOCUMENT_RAG_PREFIX) {
856        Ok(Some(Message {
857            role: Role::System,
858            content: text,
859            parts: vec![],
860            metadata: MessageMetadata::default(),
861        }))
862    } else {
863        Ok(None)
864    }
865}
866
867#[tracing::instrument(name = "context.summaries", skip_all)]
868pub(crate) async fn fetch_summaries(
869    memory: &ContextMemoryView,
870    token_budget: usize,
871    tc: &dyn TokenCounting,
872) -> Result<Option<Message>, AssemblerError> {
873    let (Some(mem), Some(cid)) = (&memory.memory, memory.conversation_id) else {
874        return Ok(None);
875    };
876    if token_budget == 0 {
877        return Ok(None);
878    }
879
880    let summaries = mem
881        .load_summaries(cid)
882        .await
883        .map_err(AssemblerError::Memory)?;
884    if summaries.is_empty() {
885        return Ok(None);
886    }
887
888    let mut summary_text = String::from(SUMMARY_PREFIX);
889    let mut tokens_used = tc.count_tokens(&summary_text);
890
891    for summary in summaries.iter().rev() {
892        let first = summary.first_message_id.unwrap_or(0);
893        let last = summary.last_message_id.unwrap_or(0);
894        let entry = format!("- Messages {first}-{last}: {}\n", summary.content);
895        let cost = tc.count_tokens(&entry);
896        if tokens_used + cost > token_budget {
897            break;
898        }
899        summary_text.push_str(&entry);
900        tokens_used += cost;
901    }
902
903    if tokens_used > tc.count_tokens(SUMMARY_PREFIX) {
904        Ok(Some(Message::from_parts(
905            Role::System,
906            vec![MessagePart::Summary { text: summary_text }],
907        )))
908    } else {
909        Ok(None)
910    }
911}
912
913#[tracing::instrument(name = "context.cross_session", skip_all)]
914pub(crate) async fn fetch_cross_session(
915    memory: &ContextMemoryView,
916    query: &str,
917    token_budget: usize,
918    tc: &dyn TokenCounting,
919) -> Result<Option<Message>, AssemblerError> {
920    let (Some(mem), Some(cid)) = (&memory.memory, memory.conversation_id) else {
921        return Ok(None);
922    };
923    if token_budget == 0 {
924        return Ok(None);
925    }
926
927    let threshold = memory.cross_session_score_threshold;
928    let results: Vec<_> = mem
929        .search_session_summaries(query, 5, Some(cid))
930        .await
931        .map_err(AssemblerError::Memory)?
932        .into_iter()
933        .filter(|r| r.score >= threshold)
934        .collect();
935    if results.is_empty() {
936        return Ok(None);
937    }
938
939    let mut text = String::from(CROSS_SESSION_PREFIX);
940    let mut tokens_used = tc.count_tokens(&text);
941
942    for item in &results {
943        let entry = format!("- {}\n", item.summary_text);
944        let cost = tc.count_tokens(&entry);
945        if tokens_used + cost > token_budget {
946            break;
947        }
948        text.push_str(&entry);
949        tokens_used += cost;
950    }
951
952    if tokens_used > tc.count_tokens(CROSS_SESSION_PREFIX) {
953        Ok(Some(Message::from_parts(
954            Role::System,
955            vec![MessagePart::CrossSession { text }],
956        )))
957    } else {
958        Ok(None)
959    }
960}
961
962/// Maximum number of messages scanned backward by [`memory_first_keep_tail`] before
963/// stopping at the next non-`ToolResult` boundary, to avoid O(N) scans on long sessions.
964pub const MAX_KEEP_TAIL_SCAN: usize = 50;
965
966/// Compute how many tail messages to keep when the `MemoryFirst` strategy is active.
967///
968/// Always keeps at least 2 messages. Extends the tail as long as the boundary message is
969/// a `ToolResult` (user message with a `ToolResult` part) to avoid splitting a tool-call
970/// round-trip. Capped at `MAX_KEEP_TAIL_SCAN` to prevent O(N) scans on long sessions.
971///
972/// `history_start` is the index of the first non-system message (typically 1).
973#[must_use]
974pub fn memory_first_keep_tail(messages: &[Message], history_start: usize) -> usize {
975    use zeph_llm::provider::MessagePart;
976
977    let mut keep_tail = 2usize;
978    let len = messages.len();
979    let max = len.saturating_sub(history_start);
980
981    while keep_tail < max {
982        let first_retained = &messages[len - keep_tail];
983        let is_tool_result = first_retained.role == Role::User
984            && first_retained
985                .parts
986                .iter()
987                .any(|p| matches!(p, MessagePart::ToolResult { .. }));
988
989        if is_tool_result {
990            keep_tail += 1;
991        } else {
992            break;
993        }
994
995        if keep_tail >= MAX_KEEP_TAIL_SCAN {
996            let preceding_idx = len.saturating_sub(keep_tail + 1);
997            if preceding_idx >= history_start {
998                let preceding = &messages[preceding_idx];
999                let is_tool_use = preceding.role == Role::Assistant
1000                    && preceding
1001                        .parts
1002                        .iter()
1003                        .any(|p| matches!(p, MessagePart::ToolUse { .. }));
1004                if is_tool_use {
1005                    keep_tail += 1;
1006                }
1007            }
1008            break;
1009        }
1010    }
1011
1012    keep_tail
1013}
1014
1015#[cfg(test)]
1016mod tests {
1017    use super::*;
1018    use crate::input::ContextMemoryView;
1019    use zeph_common::memory::CompressionLevel;
1020    use zeph_config::{
1021        ContextStrategy, DocumentConfig, GraphConfig, PersonaConfig, ReasoningConfig,
1022        TrajectoryConfig, TreeConfig,
1023    };
1024
1025    struct NaiveTokenCounter;
1026    impl zeph_common::memory::TokenCounting for NaiveTokenCounter {
1027        fn count_tokens(&self, text: &str) -> usize {
1028            text.split_whitespace().count()
1029        }
1030        fn count_tool_schema_tokens(&self, schema: &serde_json::Value) -> usize {
1031            schema.to_string().split_whitespace().count()
1032        }
1033    }
1034
1035    fn empty_view() -> ContextMemoryView {
1036        ContextMemoryView {
1037            memory: None,
1038            conversation_id: None,
1039            recall_limit: 10,
1040            cross_session_score_threshold: 0.5,
1041            context_strategy: ContextStrategy::default(),
1042            crossover_turn_threshold: 5,
1043            cached_session_digest: None,
1044            graph_config: GraphConfig::default(),
1045            document_config: DocumentConfig::default(),
1046            persona_config: PersonaConfig::default(),
1047            trajectory_config: TrajectoryConfig::default(),
1048            reasoning_config: ReasoningConfig::default(),
1049            memcot_config: zeph_config::MemCotConfig::default(),
1050            memcot_state: None,
1051            tree_config: TreeConfig::default(),
1052        }
1053    }
1054
1055    // ── fetch_graph_facts ─────────────────────────────────────────────────────
1056
1057    #[tokio::test]
1058    async fn fetch_graph_facts_returns_none_when_memory_is_none() {
1059        let view = empty_view();
1060        let tc = NaiveTokenCounter;
1061        let result = fetch_graph_facts(&view, "test", 1000, &tc).await.unwrap();
1062        assert!(result.is_none());
1063    }
1064
1065    #[tokio::test]
1066    async fn fetch_graph_facts_returns_none_when_budget_zero() {
1067        let mut view = empty_view();
1068        view.graph_config.enabled = true;
1069        let tc = NaiveTokenCounter;
1070        let result = fetch_graph_facts(&view, "test", 0, &tc).await.unwrap();
1071        assert!(result.is_none());
1072    }
1073
1074    #[tokio::test]
1075    async fn fetch_graph_facts_returns_none_when_graph_disabled() {
1076        let mut view = empty_view();
1077        view.graph_config.enabled = false;
1078        let tc = NaiveTokenCounter;
1079        let result = fetch_graph_facts(&view, "test", 1000, &tc).await.unwrap();
1080        assert!(result.is_none());
1081    }
1082
1083    // ── fetch_persona_facts ───────────────────────────────────────────────────
1084
1085    #[tokio::test]
1086    async fn fetch_persona_facts_returns_none_when_memory_is_none() {
1087        let view = empty_view();
1088        let tc = NaiveTokenCounter;
1089        let result = fetch_persona_facts(&view, 1000, &tc).await.unwrap();
1090        assert!(result.is_none());
1091    }
1092
1093    #[tokio::test]
1094    async fn fetch_persona_facts_returns_none_when_budget_zero() {
1095        let mut view = empty_view();
1096        view.persona_config.enabled = true;
1097        let tc = NaiveTokenCounter;
1098        let result = fetch_persona_facts(&view, 0, &tc).await.unwrap();
1099        assert!(result.is_none());
1100    }
1101
1102    // ── fetch_trajectory_hints ────────────────────────────────────────────────
1103
1104    #[tokio::test]
1105    async fn fetch_trajectory_hints_returns_none_when_memory_is_none() {
1106        let view = empty_view();
1107        let tc = NaiveTokenCounter;
1108        let result = fetch_trajectory_hints(&view, 1000, &tc).await.unwrap();
1109        assert!(result.is_none());
1110    }
1111
1112    #[tokio::test]
1113    async fn fetch_trajectory_hints_returns_none_when_budget_zero() {
1114        let mut view = empty_view();
1115        view.trajectory_config.enabled = true;
1116        let tc = NaiveTokenCounter;
1117        let result = fetch_trajectory_hints(&view, 0, &tc).await.unwrap();
1118        assert!(result.is_none());
1119    }
1120
1121    // ── fetch_tree_memory ─────────────────────────────────────────────────────
1122
1123    #[tokio::test]
1124    async fn fetch_tree_memory_returns_none_when_memory_is_none() {
1125        let view = empty_view();
1126        let tc = NaiveTokenCounter;
1127        let result = fetch_tree_memory(&view, 1000, &tc).await.unwrap();
1128        assert!(result.is_none());
1129    }
1130
1131    #[tokio::test]
1132    async fn fetch_tree_memory_returns_none_when_budget_zero() {
1133        let mut view = empty_view();
1134        view.tree_config.enabled = true;
1135        let tc = NaiveTokenCounter;
1136        let result = fetch_tree_memory(&view, 0, &tc).await.unwrap();
1137        assert!(result.is_none());
1138    }
1139
1140    // ── fetch_corrections ─────────────────────────────────────────────────────
1141
1142    #[tokio::test]
1143    async fn fetch_corrections_returns_none_when_memory_is_none() {
1144        let view = empty_view();
1145        let result = fetch_corrections(&view, "test", 10, 0.5, |s| s.into())
1146            .await
1147            .unwrap();
1148        assert!(result.is_none());
1149    }
1150
1151    // ── fetch_semantic_recall ─────────────────────────────────────────────────
1152
1153    #[tokio::test]
1154    async fn fetch_semantic_recall_returns_none_when_memory_is_none() {
1155        let view = empty_view();
1156        let tc = NaiveTokenCounter;
1157        let result = fetch_semantic_recall(&view, "test", 1000, &tc, None)
1158            .await
1159            .unwrap();
1160        assert!(result.0.is_none() && result.1.is_none());
1161    }
1162
1163    #[tokio::test]
1164    async fn fetch_semantic_recall_returns_none_when_budget_zero() {
1165        let view = empty_view();
1166        let tc = NaiveTokenCounter;
1167        let result = fetch_semantic_recall(&view, "test", 0, &tc, None)
1168            .await
1169            .unwrap();
1170        assert!(result.0.is_none() && result.1.is_none());
1171    }
1172
1173    // ── fetch_document_rag ────────────────────────────────────────────────────
1174
1175    #[tokio::test]
1176    async fn fetch_document_rag_returns_none_when_memory_is_none() {
1177        let mut view = empty_view();
1178        view.document_config.rag_enabled = true;
1179        let tc = NaiveTokenCounter;
1180        let result = fetch_document_rag(&view, "test", 1000, &tc).await.unwrap();
1181        assert!(result.is_none());
1182    }
1183
1184    #[tokio::test]
1185    async fn fetch_document_rag_returns_none_when_rag_disabled() {
1186        let view = empty_view();
1187        let tc = NaiveTokenCounter;
1188        let result = fetch_document_rag(&view, "test", 1000, &tc).await.unwrap();
1189        assert!(result.is_none());
1190    }
1191
1192    // ── fetch_summaries ───────────────────────────────────────────────────────
1193
1194    #[tokio::test]
1195    async fn fetch_summaries_returns_none_when_memory_is_none() {
1196        let view = empty_view();
1197        let tc = NaiveTokenCounter;
1198        let result = fetch_summaries(&view, 1000, &tc).await.unwrap();
1199        assert!(result.is_none());
1200    }
1201
1202    // ── fetch_cross_session ───────────────────────────────────────────────────
1203
1204    #[tokio::test]
1205    async fn fetch_cross_session_returns_none_when_memory_is_none() {
1206        let view = empty_view();
1207        let tc = NaiveTokenCounter;
1208        let result = fetch_cross_session(&view, "test", 1000, &tc).await.unwrap();
1209        assert!(result.is_none());
1210    }
1211
1212    // ── levels_to_flags ───────────────────────────────────────────────────────
1213
1214    #[test]
1215    fn levels_to_flags_empty_slice_enables_all_tiers() {
1216        let (e, p, d) = levels_to_flags(&[]);
1217        assert!(e, "episodic should be active for empty slice");
1218        assert!(p, "procedural should be active for empty slice");
1219        assert!(d, "declarative should be active for empty slice");
1220    }
1221
1222    #[test]
1223    fn levels_to_flags_full_set_enables_all_tiers() {
1224        let all = &[
1225            CompressionLevel::Episodic,
1226            CompressionLevel::Procedural,
1227            CompressionLevel::Declarative,
1228        ];
1229        let (e, p, d) = levels_to_flags(all);
1230        assert!(e);
1231        assert!(p);
1232        assert!(d);
1233    }
1234
1235    #[test]
1236    fn levels_to_flags_episodic_only() {
1237        let (e, p, d) = levels_to_flags(&[CompressionLevel::Episodic]);
1238        assert!(e);
1239        assert!(!p, "procedural should be inactive");
1240        assert!(!d, "declarative should be inactive");
1241    }
1242
1243    #[test]
1244    fn levels_to_flags_episodic_and_procedural() {
1245        let (e, p, d) =
1246            levels_to_flags(&[CompressionLevel::Episodic, CompressionLevel::Procedural]);
1247        assert!(e);
1248        assert!(p);
1249        assert!(!d, "declarative should be inactive");
1250    }
1251
1252    #[test]
1253    fn levels_to_flags_declarative_only() {
1254        let (e, p, d) = levels_to_flags(&[CompressionLevel::Declarative]);
1255        assert!(!e, "episodic should be inactive");
1256        assert!(!p, "procedural should be inactive");
1257        assert!(d);
1258    }
1259
1260    // ── fetch_reasoning_strategies ────────────────────────────────────────────
1261
1262    #[tokio::test]
1263    async fn fetch_reasoning_strategies_returns_none_when_memory_is_none() {
1264        let mut view = empty_view();
1265        view.reasoning_config.enabled = true;
1266        let tc = NaiveTokenCounter;
1267        let (result, handle) = fetch_reasoning_strategies(&view, "query", 1000, 3, &tc)
1268            .await
1269            .unwrap();
1270        assert!(result.is_none());
1271        assert!(handle.is_none());
1272    }
1273
1274    #[tokio::test]
1275    async fn fetch_reasoning_strategies_returns_none_when_budget_zero() {
1276        let mut view = empty_view();
1277        view.reasoning_config.enabled = true;
1278        let tc = NaiveTokenCounter;
1279        let (result, handle) = fetch_reasoning_strategies(&view, "query", 0, 3, &tc)
1280            .await
1281            .unwrap();
1282        assert!(result.is_none());
1283        assert!(handle.is_none());
1284    }
1285
1286    // ── MockMemoryBackend ─────────────────────────────────────────────────────
1287
1288    use std::sync::{Arc, Mutex};
1289    use zeph_common::memory::{
1290        ContextMemoryBackend, GraphRecallParams, MemCorrection, MemDocumentChunk, MemGraphFact,
1291        MemPersonaFact, MemReasoningStrategy, MemRecalledMessage, MemSessionSummary, MemSummary,
1292        MemTrajectoryEntry, MemTreeNode,
1293    };
1294
1295    /// Known method names accepted by [`MockMemoryBackend::fail_on`].
1296    const KNOWN_FAIL_ON: &[&str] = &[
1297        "load_persona_facts",
1298        "load_trajectory_entries",
1299        "load_tree_nodes",
1300        "load_summaries",
1301        "retrieve_reasoning_strategies",
1302        "mark_reasoning_used",
1303        "retrieve_corrections",
1304        "recall",
1305        "recall_graph_facts",
1306        "search_session_summaries",
1307        "search_document_collection",
1308    ];
1309
1310    #[derive(Default)]
1311    struct MockMemoryBackend {
1312        persona_facts: Vec<MemPersonaFact>,
1313        trajectory_entries: Vec<MemTrajectoryEntry>,
1314        tree_nodes: Vec<MemTreeNode>,
1315        summaries: Vec<MemSummary>,
1316        reasoning_strategies: Vec<MemReasoningStrategy>,
1317        corrections: Vec<MemCorrection>,
1318        recalled: Vec<MemRecalledMessage>,
1319        graph_facts: Vec<MemGraphFact>,
1320        session_summaries: Vec<MemSessionSummary>,
1321        document_chunks: Vec<MemDocumentChunk>,
1322        /// When `Some("method_name")`, that method returns `Err(...)`.
1323        fail_on: Option<&'static str>,
1324        /// Tracks IDs passed to `mark_reasoning_used`.
1325        marked_ids: Mutex<Vec<String>>,
1326    }
1327
1328    impl MockMemoryBackend {
1329        fn with_fail_on(method: &'static str) -> Self {
1330            debug_assert!(
1331                KNOWN_FAIL_ON.contains(&method),
1332                "unknown fail_on method name: {method}"
1333            );
1334            Self {
1335                fail_on: Some(method),
1336                ..Default::default()
1337            }
1338        }
1339
1340        fn fail_err(method: &str) -> Box<dyn std::error::Error + Send + Sync> {
1341            format!("mock error in {method}").into()
1342        }
1343    }
1344
1345    impl ContextMemoryBackend for MockMemoryBackend {
1346        fn load_persona_facts<'a>(
1347            &'a self,
1348            _min_confidence: f64,
1349        ) -> std::pin::Pin<
1350            Box<
1351                dyn std::future::Future<
1352                        Output = Result<
1353                            Vec<MemPersonaFact>,
1354                            Box<dyn std::error::Error + Send + Sync>,
1355                        >,
1356                    > + Send
1357                    + 'a,
1358            >,
1359        > {
1360            let result = if self.fail_on == Some("load_persona_facts") {
1361                Err(Self::fail_err("load_persona_facts"))
1362            } else {
1363                Ok(self.persona_facts.clone())
1364            };
1365            Box::pin(async move { result })
1366        }
1367
1368        fn load_trajectory_entries<'a>(
1369            &'a self,
1370            _tier: Option<&'a str>,
1371            _top_k: usize,
1372        ) -> std::pin::Pin<
1373            Box<
1374                dyn std::future::Future<
1375                        Output = Result<
1376                            Vec<MemTrajectoryEntry>,
1377                            Box<dyn std::error::Error + Send + Sync>,
1378                        >,
1379                    > + Send
1380                    + 'a,
1381            >,
1382        > {
1383            let result = if self.fail_on == Some("load_trajectory_entries") {
1384                Err(Self::fail_err("load_trajectory_entries"))
1385            } else {
1386                Ok(self.trajectory_entries.clone())
1387            };
1388            Box::pin(async move { result })
1389        }
1390
1391        fn load_tree_nodes<'a>(
1392            &'a self,
1393            _level: u32,
1394            _top_k: usize,
1395        ) -> std::pin::Pin<
1396            Box<
1397                dyn std::future::Future<
1398                        Output = Result<Vec<MemTreeNode>, Box<dyn std::error::Error + Send + Sync>>,
1399                    > + Send
1400                    + 'a,
1401            >,
1402        > {
1403            let result = if self.fail_on == Some("load_tree_nodes") {
1404                Err(Self::fail_err("load_tree_nodes"))
1405            } else {
1406                Ok(self.tree_nodes.clone())
1407            };
1408            Box::pin(async move { result })
1409        }
1410
1411        fn load_summaries<'a>(
1412            &'a self,
1413            _conversation_id: i64,
1414        ) -> std::pin::Pin<
1415            Box<
1416                dyn std::future::Future<
1417                        Output = Result<Vec<MemSummary>, Box<dyn std::error::Error + Send + Sync>>,
1418                    > + Send
1419                    + 'a,
1420            >,
1421        > {
1422            let result = if self.fail_on == Some("load_summaries") {
1423                Err(Self::fail_err("load_summaries"))
1424            } else {
1425                Ok(self.summaries.clone())
1426            };
1427            Box::pin(async move { result })
1428        }
1429
1430        fn retrieve_reasoning_strategies<'a>(
1431            &'a self,
1432            _query: &'a str,
1433            _top_k: usize,
1434        ) -> std::pin::Pin<
1435            Box<
1436                dyn std::future::Future<
1437                        Output = Result<
1438                            Vec<MemReasoningStrategy>,
1439                            Box<dyn std::error::Error + Send + Sync>,
1440                        >,
1441                    > + Send
1442                    + 'a,
1443            >,
1444        > {
1445            let result = if self.fail_on == Some("retrieve_reasoning_strategies") {
1446                Err(Self::fail_err("retrieve_reasoning_strategies"))
1447            } else {
1448                Ok(self.reasoning_strategies.clone())
1449            };
1450            Box::pin(async move { result })
1451        }
1452
1453        fn mark_reasoning_used<'a>(
1454            &'a self,
1455            ids: &'a [String],
1456        ) -> std::pin::Pin<
1457            Box<
1458                dyn std::future::Future<
1459                        Output = Result<(), Box<dyn std::error::Error + Send + Sync>>,
1460                    > + Send
1461                    + 'a,
1462            >,
1463        > {
1464            if self.fail_on == Some("mark_reasoning_used") {
1465                return Box::pin(async move { Err(Self::fail_err("mark_reasoning_used")) });
1466            }
1467            let mut guard = self.marked_ids.lock().expect("marked_ids poisoned");
1468            guard.extend_from_slice(ids);
1469            Box::pin(async move { Ok(()) })
1470        }
1471
1472        fn retrieve_corrections<'a>(
1473            &'a self,
1474            _query: &'a str,
1475            _limit: usize,
1476            _min_score: f32,
1477        ) -> std::pin::Pin<
1478            Box<
1479                dyn std::future::Future<
1480                        Output = Result<
1481                            Vec<MemCorrection>,
1482                            Box<dyn std::error::Error + Send + Sync>,
1483                        >,
1484                    > + Send
1485                    + 'a,
1486            >,
1487        > {
1488            let result = if self.fail_on == Some("retrieve_corrections") {
1489                Err(Self::fail_err("retrieve_corrections"))
1490            } else {
1491                Ok(self.corrections.clone())
1492            };
1493            Box::pin(async move { result })
1494        }
1495
1496        fn recall<'a>(
1497            &'a self,
1498            _query: &'a str,
1499            _limit: usize,
1500            _router: Option<&'a dyn zeph_common::memory::AsyncMemoryRouter>,
1501        ) -> std::pin::Pin<
1502            Box<
1503                dyn std::future::Future<
1504                        Output = Result<
1505                            Vec<MemRecalledMessage>,
1506                            Box<dyn std::error::Error + Send + Sync>,
1507                        >,
1508                    > + Send
1509                    + 'a,
1510            >,
1511        > {
1512            let result = if self.fail_on == Some("recall") {
1513                Err(Self::fail_err("recall"))
1514            } else {
1515                Ok(self.recalled.clone())
1516            };
1517            Box::pin(async move { result })
1518        }
1519
1520        fn recall_graph_facts<'a>(
1521            &'a self,
1522            _query: &'a str,
1523            _params: GraphRecallParams<'a>,
1524        ) -> std::pin::Pin<
1525            Box<
1526                dyn std::future::Future<
1527                        Output = Result<
1528                            Vec<MemGraphFact>,
1529                            Box<dyn std::error::Error + Send + Sync>,
1530                        >,
1531                    > + Send
1532                    + 'a,
1533            >,
1534        > {
1535            let result = if self.fail_on == Some("recall_graph_facts") {
1536                Err(Self::fail_err("recall_graph_facts"))
1537            } else {
1538                Ok(self.graph_facts.clone())
1539            };
1540            Box::pin(async move { result })
1541        }
1542
1543        fn search_session_summaries<'a>(
1544            &'a self,
1545            _query: &'a str,
1546            _limit: usize,
1547            _current_conversation_id: Option<i64>,
1548        ) -> std::pin::Pin<
1549            Box<
1550                dyn std::future::Future<
1551                        Output = Result<
1552                            Vec<MemSessionSummary>,
1553                            Box<dyn std::error::Error + Send + Sync>,
1554                        >,
1555                    > + Send
1556                    + 'a,
1557            >,
1558        > {
1559            let result = if self.fail_on == Some("search_session_summaries") {
1560                Err(Self::fail_err("search_session_summaries"))
1561            } else {
1562                Ok(self.session_summaries.clone())
1563            };
1564            Box::pin(async move { result })
1565        }
1566
1567        fn search_document_collection<'a>(
1568            &'a self,
1569            _collection: &'a str,
1570            _query: &'a str,
1571            _top_k: usize,
1572        ) -> std::pin::Pin<
1573            Box<
1574                dyn std::future::Future<
1575                        Output = Result<
1576                            Vec<MemDocumentChunk>,
1577                            Box<dyn std::error::Error + Send + Sync>,
1578                        >,
1579                    > + Send
1580                    + 'a,
1581            >,
1582        > {
1583            let result = if self.fail_on == Some("search_document_collection") {
1584                Err(Self::fail_err("search_document_collection"))
1585            } else {
1586                Ok(self.document_chunks.clone())
1587            };
1588            Box::pin(async move { result })
1589        }
1590    }
1591
1592    fn mock_view(mock: MockMemoryBackend) -> ContextMemoryView {
1593        let mut v = empty_view();
1594        v.memory = Some(Arc::new(mock));
1595        v
1596    }
1597
1598    // ── fetch_graph_facts (happy path) ────────────────────────────────────────
1599
1600    #[tokio::test]
1601    async fn fetch_graph_facts_returns_message_when_memory_present() {
1602        let mock = MockMemoryBackend {
1603            graph_facts: vec![zeph_common::memory::MemGraphFact {
1604                fact: "Rust is fast".to_string(),
1605                confidence: 0.9,
1606                activation_score: None,
1607                neighbors: vec![],
1608                provenance_snippet: None,
1609            }],
1610            ..Default::default()
1611        };
1612        let mut view = mock_view(mock);
1613        view.graph_config.enabled = true;
1614        // recall_timeout_ms must be non-zero or it gets clamped to 100ms
1615        view.graph_config.spreading_activation.recall_timeout_ms = 5000;
1616        let tc = NaiveTokenCounter;
1617        let result = fetch_graph_facts(&view, "test", 1000, &tc).await.unwrap();
1618        assert!(result.is_some(), "expected Some message");
1619        let msg = result.unwrap();
1620        assert!(
1621            msg.content.contains("Rust is fast"),
1622            "expected fact text in output, got: {}",
1623            msg.content
1624        );
1625        assert!(
1626            msg.content.starts_with(GRAPH_FACTS_PREFIX),
1627            "expected GRAPH_FACTS_PREFIX"
1628        );
1629    }
1630
1631    #[tokio::test]
1632    async fn fetch_graph_facts_swallows_error_and_returns_none() {
1633        let mock = MockMemoryBackend::with_fail_on("recall_graph_facts");
1634        let mut view = mock_view(mock);
1635        view.graph_config.enabled = true;
1636        view.graph_config.spreading_activation.recall_timeout_ms = 5000;
1637        let tc = NaiveTokenCounter;
1638        // B1: fetch_graph_facts swallows errors via tracing::warn! and returns Ok(None)
1639        let result = fetch_graph_facts(&view, "test", 1000, &tc).await.unwrap();
1640        assert!(
1641            result.is_none(),
1642            "expected None when recall_graph_facts errors"
1643        );
1644    }
1645
1646    #[tokio::test]
1647    async fn fetch_graph_facts_returns_none_when_facts_empty() {
1648        let mock = MockMemoryBackend::default(); // empty graph_facts
1649        let mut view = mock_view(mock);
1650        view.graph_config.enabled = true;
1651        view.graph_config.spreading_activation.recall_timeout_ms = 5000;
1652        let tc = NaiveTokenCounter;
1653        let result = fetch_graph_facts(&view, "test", 1000, &tc).await.unwrap();
1654        assert!(result.is_none());
1655    }
1656
1657    // ── fetch_persona_facts ───────────────────────────────────────────────────
1658
1659    #[tokio::test]
1660    async fn fetch_persona_facts_returns_message_when_persona_enabled() {
1661        let mock = MockMemoryBackend {
1662            persona_facts: vec![MemPersonaFact {
1663                category: "preference".to_string(),
1664                content: "prefers concise answers".to_string(),
1665            }],
1666            ..Default::default()
1667        };
1668        let mut view = mock_view(mock);
1669        view.persona_config.enabled = true;
1670        view.persona_config.context_budget_tokens = 1000;
1671        let tc = NaiveTokenCounter;
1672        let result = fetch_persona_facts(&view, 1000, &tc).await.unwrap();
1673        assert!(result.is_some());
1674        let msg = result.unwrap();
1675        assert!(msg.content.contains("preference"));
1676        assert!(msg.content.contains("prefers concise answers"));
1677        assert!(msg.content.starts_with(crate::slot::PERSONA_PREFIX));
1678    }
1679
1680    #[tokio::test]
1681    async fn fetch_persona_facts_propagates_error() {
1682        let mock = MockMemoryBackend::with_fail_on("load_persona_facts");
1683        let mut view = mock_view(mock);
1684        view.persona_config.enabled = true;
1685        let tc = NaiveTokenCounter;
1686        let result = fetch_persona_facts(&view, 1000, &tc).await;
1687        assert!(
1688            result.is_err(),
1689            "expected Err from load_persona_facts failure"
1690        );
1691    }
1692
1693    // ── fetch_trajectory_hints ────────────────────────────────────────────────
1694
1695    #[tokio::test]
1696    async fn fetch_trajectory_hints_returns_message_when_trajectory_enabled() {
1697        let mock = MockMemoryBackend {
1698            trajectory_entries: vec![MemTrajectoryEntry {
1699                intent: "summarize code".to_string(),
1700                outcome: "produced concise summary".to_string(),
1701                confidence: 0.9,
1702            }],
1703            ..Default::default()
1704        };
1705        let mut view = mock_view(mock);
1706        view.trajectory_config.enabled = true;
1707        view.trajectory_config.context_budget_tokens = 1000;
1708        view.trajectory_config.min_confidence = 0.5;
1709        let tc = NaiveTokenCounter;
1710        let result = fetch_trajectory_hints(&view, 1000, &tc).await.unwrap();
1711        assert!(result.is_some());
1712        let msg = result.unwrap();
1713        assert!(msg.content.contains("summarize code"));
1714        assert!(msg.content.starts_with(crate::slot::TRAJECTORY_PREFIX));
1715    }
1716
1717    #[tokio::test]
1718    async fn fetch_trajectory_hints_passes_tier_filter() {
1719        // I1: confidence filtering — entry below min_confidence must be excluded,
1720        // entry above must be present. Verifies the .filter(|e| e.confidence >= min_conf) branch.
1721        let mock = MockMemoryBackend {
1722            trajectory_entries: vec![
1723                MemTrajectoryEntry {
1724                    intent: "debug async code".to_string(),
1725                    outcome: "fixed deadlock".to_string(),
1726                    confidence: 0.85,
1727                },
1728                MemTrajectoryEntry {
1729                    intent: "low confidence task".to_string(),
1730                    outcome: "irrelevant".to_string(),
1731                    confidence: 0.3,
1732                },
1733            ],
1734            ..Default::default()
1735        };
1736        let mut view = mock_view(mock);
1737        view.trajectory_config.enabled = true;
1738        view.trajectory_config.context_budget_tokens = 1000;
1739        view.trajectory_config.min_confidence = 0.5;
1740        let tc = NaiveTokenCounter;
1741        let result = fetch_trajectory_hints(&view, 1000, &tc).await.unwrap();
1742        assert!(result.is_some(), "expected Some message");
1743        let msg = result.unwrap();
1744        assert!(
1745            msg.content.contains("debug async code"),
1746            "high-confidence entry must be included"
1747        );
1748        assert!(
1749            !msg.content.contains("low confidence task"),
1750            "entry below min_confidence must be filtered out"
1751        );
1752    }
1753
1754    #[tokio::test]
1755    async fn fetch_trajectory_hints_propagates_error() {
1756        let mock = MockMemoryBackend::with_fail_on("load_trajectory_entries");
1757        let mut view = mock_view(mock);
1758        view.trajectory_config.enabled = true;
1759        let tc = NaiveTokenCounter;
1760        let result = fetch_trajectory_hints(&view, 1000, &tc).await;
1761        assert!(result.is_err());
1762    }
1763
1764    // ── fetch_tree_memory ─────────────────────────────────────────────────────
1765
1766    #[tokio::test]
1767    async fn fetch_tree_memory_returns_message_when_tree_enabled() {
1768        let mock = MockMemoryBackend {
1769            tree_nodes: vec![MemTreeNode {
1770                content: "Topic: async Rust patterns".to_string(),
1771            }],
1772            ..Default::default()
1773        };
1774        let mut view = mock_view(mock);
1775        view.tree_config.enabled = true;
1776        view.tree_config.context_budget_tokens = 1000;
1777        let tc = NaiveTokenCounter;
1778        let result = fetch_tree_memory(&view, 1000, &tc).await.unwrap();
1779        assert!(result.is_some());
1780        let msg = result.unwrap();
1781        assert!(msg.content.contains("async Rust patterns"));
1782        assert!(msg.content.starts_with(crate::slot::TREE_MEMORY_PREFIX));
1783    }
1784
1785    #[tokio::test]
1786    async fn fetch_tree_memory_propagates_error() {
1787        let mock = MockMemoryBackend::with_fail_on("load_tree_nodes");
1788        let mut view = mock_view(mock);
1789        view.tree_config.enabled = true;
1790        let tc = NaiveTokenCounter;
1791        let result = fetch_tree_memory(&view, 1000, &tc).await;
1792        assert!(result.is_err());
1793    }
1794
1795    // ── fetch_corrections ─────────────────────────────────────────────────────
1796
1797    #[tokio::test]
1798    async fn fetch_corrections_returns_message_when_corrections_present() {
1799        let mock = MockMemoryBackend {
1800            corrections: vec![MemCorrection {
1801                correction_text: "use snake_case not camelCase".to_string(),
1802            }],
1803            ..Default::default()
1804        };
1805        let view = mock_view(mock);
1806        let result = fetch_corrections(&view, "query", 10, 0.5, |s| s.into())
1807            .await
1808            .unwrap();
1809        assert!(result.is_some());
1810        let msg = result.unwrap();
1811        assert!(msg.content.contains("snake_case"));
1812        assert!(msg.content.starts_with(CORRECTIONS_PREFIX));
1813    }
1814
1815    #[tokio::test]
1816    async fn fetch_corrections_propagates_error() {
1817        // fetch_corrections uses map_err(AssemblerError::Memory)? so retrieve_corrections
1818        // errors are propagated instead of silently discarded.
1819        let mock = MockMemoryBackend::with_fail_on("retrieve_corrections");
1820        let view = mock_view(mock);
1821        let result = fetch_corrections(&view, "query", 10, 0.5, |s| s.into()).await;
1822        assert!(result.is_err(), "expected Err, got {result:?}");
1823    }
1824
1825    // ── fetch_semantic_recall ─────────────────────────────────────────────────
1826
1827    #[tokio::test]
1828    async fn fetch_semantic_recall_returns_message_with_content() {
1829        let mock = MockMemoryBackend {
1830            recalled: vec![
1831                MemRecalledMessage {
1832                    role: "user".to_string(),
1833                    content: "how does tokio work".to_string(),
1834                    score: 0.95,
1835                },
1836                MemRecalledMessage {
1837                    role: "assistant".to_string(),
1838                    content: "tokio is an async runtime".to_string(),
1839                    score: 0.88,
1840                },
1841            ],
1842            ..Default::default()
1843        };
1844        let mut view = mock_view(mock);
1845        view.recall_limit = 10;
1846        let tc = NaiveTokenCounter;
1847        let (msg, score) = fetch_semantic_recall(&view, "tokio", 1000, &tc, None)
1848            .await
1849            .unwrap();
1850        assert!(msg.is_some(), "expected Some message");
1851        // I4: verify score equals first message's score
1852        assert!(score.is_some_and(|s| (s - 0.95_f32).abs() < f32::EPSILON));
1853        let msg = msg.unwrap();
1854        // content is in parts.Recall so check parts
1855        let has_recall_part = msg.parts.iter().any(|p| {
1856            if let zeph_llm::provider::MessagePart::Recall { text } = p {
1857                text.contains("how does tokio work")
1858            } else {
1859                false
1860            }
1861        });
1862        assert!(has_recall_part, "expected recalled content in Recall part");
1863    }
1864
1865    #[tokio::test]
1866    async fn fetch_semantic_recall_returns_none_when_recalled_empty() {
1867        let mock = MockMemoryBackend::default();
1868        let mut view = mock_view(mock);
1869        view.recall_limit = 10;
1870        let tc = NaiveTokenCounter;
1871        let (msg, score) = fetch_semantic_recall(&view, "query", 1000, &tc, None)
1872            .await
1873            .unwrap();
1874        assert!(msg.is_none());
1875        assert!(score.is_none());
1876    }
1877
1878    #[tokio::test]
1879    async fn fetch_semantic_recall_propagates_error() {
1880        let mock = MockMemoryBackend::with_fail_on("recall");
1881        let mut view = mock_view(mock);
1882        view.recall_limit = 10;
1883        let tc = NaiveTokenCounter;
1884        let result = fetch_semantic_recall(&view, "query", 1000, &tc, None).await;
1885        assert!(result.is_err());
1886    }
1887
1888    // ── fetch_document_rag ────────────────────────────────────────────────────
1889
1890    #[tokio::test]
1891    async fn fetch_document_rag_returns_message_when_rag_enabled() {
1892        let mock = MockMemoryBackend {
1893            document_chunks: vec![MemDocumentChunk {
1894                text: "Rust ownership rules prevent data races".to_string(),
1895            }],
1896            ..Default::default()
1897        };
1898        let mut view = mock_view(mock);
1899        view.document_config.rag_enabled = true;
1900        let tc = NaiveTokenCounter;
1901        let result = fetch_document_rag(&view, "ownership", 1000, &tc)
1902            .await
1903            .unwrap();
1904        assert!(result.is_some());
1905        let msg = result.unwrap();
1906        assert!(msg.content.contains("ownership rules"));
1907        assert!(msg.content.starts_with(DOCUMENT_RAG_PREFIX));
1908    }
1909
1910    #[tokio::test]
1911    async fn fetch_document_rag_propagates_error() {
1912        let mock = MockMemoryBackend::with_fail_on("search_document_collection");
1913        let mut view = mock_view(mock);
1914        view.document_config.rag_enabled = true;
1915        let tc = NaiveTokenCounter;
1916        let result = fetch_document_rag(&view, "query", 1000, &tc).await;
1917        assert!(result.is_err());
1918    }
1919
1920    // ── fetch_summaries ───────────────────────────────────────────────────────
1921
1922    #[tokio::test]
1923    async fn fetch_summaries_returns_message_when_summaries_present() {
1924        let mock = MockMemoryBackend {
1925            summaries: vec![MemSummary {
1926                first_message_id: Some(1),
1927                last_message_id: Some(5),
1928                content: "User asked about async Rust".to_string(),
1929            }],
1930            ..Default::default()
1931        };
1932        let mut view = mock_view(mock);
1933        view.conversation_id = Some(42);
1934        let tc = NaiveTokenCounter;
1935        let result = fetch_summaries(&view, 1000, &tc).await.unwrap();
1936        assert!(result.is_some());
1937        let msg = result.unwrap();
1938        let has_summary_part = msg.parts.iter().any(|p| {
1939            if let zeph_llm::provider::MessagePart::Summary { text } = p {
1940                text.contains("Messages 1-5") && text.contains("async Rust")
1941            } else {
1942                false
1943            }
1944        });
1945        assert!(
1946            has_summary_part,
1947            "expected Summary part with messages range"
1948        );
1949    }
1950
1951    #[tokio::test]
1952    async fn fetch_summaries_returns_none_without_conversation_id() {
1953        let mock = MockMemoryBackend {
1954            summaries: vec![MemSummary {
1955                first_message_id: Some(1),
1956                last_message_id: Some(5),
1957                content: "some content".to_string(),
1958            }],
1959            ..Default::default()
1960        };
1961        let mut view = mock_view(mock);
1962        view.conversation_id = None; // no conversation_id → must return None
1963        let tc = NaiveTokenCounter;
1964        let result = fetch_summaries(&view, 1000, &tc).await.unwrap();
1965        assert!(result.is_none());
1966    }
1967
1968    #[tokio::test]
1969    async fn fetch_summaries_propagates_error() {
1970        let mock = MockMemoryBackend::with_fail_on("load_summaries");
1971        let mut view = mock_view(mock);
1972        view.conversation_id = Some(42);
1973        let tc = NaiveTokenCounter;
1974        let result = fetch_summaries(&view, 1000, &tc).await;
1975        assert!(result.is_err());
1976    }
1977
1978    // ── fetch_cross_session ───────────────────────────────────────────────────
1979
1980    #[tokio::test]
1981    async fn fetch_cross_session_returns_message_when_results_present() {
1982        let mock = MockMemoryBackend {
1983            session_summaries: vec![MemSessionSummary {
1984                summary_text: "Previous session: debugging tokio deadlock".to_string(),
1985                score: 0.9,
1986            }],
1987            ..Default::default()
1988        };
1989        let mut view = mock_view(mock);
1990        view.conversation_id = Some(1);
1991        view.cross_session_score_threshold = 0.5;
1992        let tc = NaiveTokenCounter;
1993        let result = fetch_cross_session(&view, "async", 1000, &tc)
1994            .await
1995            .unwrap();
1996        assert!(result.is_some());
1997        let msg = result.unwrap();
1998        let has_cross_session_part = msg.parts.iter().any(|p| {
1999            if let zeph_llm::provider::MessagePart::CrossSession { text } = p {
2000                text.contains("tokio deadlock")
2001            } else {
2002                false
2003            }
2004        });
2005        assert!(has_cross_session_part);
2006    }
2007
2008    #[tokio::test]
2009    async fn fetch_cross_session_propagates_error() {
2010        let mock = MockMemoryBackend::with_fail_on("search_session_summaries");
2011        let mut view = mock_view(mock);
2012        view.conversation_id = Some(1);
2013        let tc = NaiveTokenCounter;
2014        let result = fetch_cross_session(&view, "query", 1000, &tc).await;
2015        assert!(result.is_err());
2016    }
2017
2018    // ── fetch_reasoning_strategies (happy path + mark_used) ──────────────────
2019
2020    #[tokio::test]
2021    async fn fetch_reasoning_strategies_returns_message_and_marks_used() {
2022        let mock = Arc::new(MockMemoryBackend {
2023            reasoning_strategies: vec![
2024                MemReasoningStrategy {
2025                    id: "strat-1".to_string(),
2026                    outcome: "success".to_string(),
2027                    summary: "break the problem into small steps".to_string(),
2028                },
2029                MemReasoningStrategy {
2030                    id: "strat-2".to_string(),
2031                    outcome: "success".to_string(),
2032                    summary: "use tracing spans for debugging".to_string(),
2033                },
2034            ],
2035            ..Default::default()
2036        });
2037        let marked_ids = Arc::clone(&mock);
2038        let mut view = empty_view();
2039        view.memory = Some(mock);
2040        view.reasoning_config.enabled = true;
2041        view.reasoning_config.context_budget_tokens = 1000;
2042        let tc = NaiveTokenCounter;
2043        let (result, handle) = fetch_reasoning_strategies(&view, "debug", 1000, 5, &tc)
2044            .await
2045            .unwrap();
2046        assert!(result.is_some());
2047        let msg = result.unwrap();
2048        assert!(msg.content.starts_with(crate::slot::REASONING_PREFIX));
2049        assert!(msg.content.contains("break the problem"));
2050
2051        // Await the returned JoinHandle to ensure mark_reasoning_used completes before assertion.
2052        if let Some(h) = handle {
2053            h.await.expect("mark_reasoning_used task panicked");
2054        }
2055
2056        let ids = marked_ids.marked_ids.lock().expect("marked_ids poisoned");
2057        assert!(
2058            ids.contains(&"strat-1".to_string()),
2059            "expected strat-1 marked"
2060        );
2061        assert!(
2062            ids.contains(&"strat-2".to_string()),
2063            "expected strat-2 marked"
2064        );
2065    }
2066
2067    #[tokio::test]
2068    async fn fetch_reasoning_strategies_propagates_error() {
2069        let mock = MockMemoryBackend::with_fail_on("retrieve_reasoning_strategies");
2070        let mut view = mock_view(mock);
2071        view.reasoning_config.enabled = true;
2072        let tc = NaiveTokenCounter;
2073        let result = fetch_reasoning_strategies(&view, "query", 1000, 3, &tc).await;
2074        assert!(result.is_err());
2075    }
2076
2077    // ── edge cases ────────────────────────────────────────────────────────────
2078
2079    #[tokio::test]
2080    async fn fetch_semantic_recall_skips_skipped_and_stopped_messages() {
2081        let mock = MockMemoryBackend {
2082            recalled: vec![
2083                MemRecalledMessage {
2084                    role: "user".to_string(),
2085                    content: "[skipped] some content".to_string(),
2086                    score: 0.95,
2087                },
2088                MemRecalledMessage {
2089                    role: "user".to_string(),
2090                    content: "[stopped] other content".to_string(),
2091                    score: 0.90,
2092                },
2093                MemRecalledMessage {
2094                    role: "user".to_string(),
2095                    content: "valid content to recall".to_string(),
2096                    score: 0.85,
2097                },
2098            ],
2099            ..Default::default()
2100        };
2101        let mut view = mock_view(mock);
2102        view.recall_limit = 10;
2103        let tc = NaiveTokenCounter;
2104        let (msg, _) = fetch_semantic_recall(&view, "query", 1000, &tc, None)
2105            .await
2106            .unwrap();
2107        assert!(msg.is_some());
2108        let msg = msg.unwrap();
2109        let full_text = msg.parts.iter().find_map(|p| {
2110            if let zeph_llm::provider::MessagePart::Recall { text } = p {
2111                Some(text.clone())
2112            } else {
2113                None
2114            }
2115        });
2116        let text = full_text.unwrap_or_default();
2117        assert!(
2118            !text.contains("[skipped]"),
2119            "skipped messages must be excluded"
2120        );
2121        assert!(
2122            !text.contains("[stopped]"),
2123            "stopped messages must be excluded"
2124        );
2125        assert!(
2126            text.contains("valid content to recall"),
2127            "valid messages must be included"
2128        );
2129    }
2130
2131    #[tokio::test]
2132    async fn fetch_cross_session_filters_below_threshold() {
2133        let mock = MockMemoryBackend {
2134            session_summaries: vec![
2135                MemSessionSummary {
2136                    summary_text: "high relevance session".to_string(),
2137                    score: 0.9,
2138                },
2139                MemSessionSummary {
2140                    summary_text: "low relevance session".to_string(),
2141                    score: 0.2,
2142                },
2143            ],
2144            ..Default::default()
2145        };
2146        let mut view = mock_view(mock);
2147        view.conversation_id = Some(1);
2148        view.cross_session_score_threshold = 0.5;
2149        let tc = NaiveTokenCounter;
2150        let result = fetch_cross_session(&view, "query", 1000, &tc)
2151            .await
2152            .unwrap();
2153        assert!(result.is_some());
2154        let msg = result.unwrap();
2155        let text = msg
2156            .parts
2157            .iter()
2158            .find_map(|p| {
2159                if let zeph_llm::provider::MessagePart::CrossSession { text } = p {
2160                    Some(text.clone())
2161                } else {
2162                    None
2163                }
2164            })
2165            .unwrap_or_default();
2166        assert!(
2167            text.contains("high relevance"),
2168            "high score must be included"
2169        );
2170        assert!(
2171            !text.contains("low relevance"),
2172            "low score must be filtered out"
2173        );
2174    }
2175
2176    #[tokio::test]
2177    async fn fetch_document_rag_skips_empty_chunks() {
2178        let mock = MockMemoryBackend {
2179            document_chunks: vec![
2180                MemDocumentChunk {
2181                    text: String::new(),
2182                }, // empty — must be skipped
2183                MemDocumentChunk {
2184                    text: "real content here".to_string(),
2185                },
2186            ],
2187            ..Default::default()
2188        };
2189        let mut view = mock_view(mock);
2190        view.document_config.rag_enabled = true;
2191        let tc = NaiveTokenCounter;
2192        let result = fetch_document_rag(&view, "query", 1000, &tc).await.unwrap();
2193        assert!(result.is_some());
2194        let msg = result.unwrap();
2195        assert!(msg.content.contains("real content here"));
2196        // empty chunk text should not produce an empty line before prefix
2197        assert!(!msg.content.contains("\n\n\n"));
2198    }
2199
2200    #[tokio::test]
2201    async fn fetch_graph_facts_sanitizes_injection_payloads() {
2202        // I3: newlines and angle brackets are replaced with spaces
2203        let mock = MockMemoryBackend {
2204            graph_facts: vec![zeph_common::memory::MemGraphFact {
2205                fact: "fact with <script>alert(1)</script> and\nnewline".to_string(),
2206                confidence: 0.8,
2207                activation_score: None,
2208                neighbors: vec![],
2209                provenance_snippet: None,
2210            }],
2211            ..Default::default()
2212        };
2213        let mut view = mock_view(mock);
2214        view.graph_config.enabled = true;
2215        view.graph_config.spreading_activation.recall_timeout_ms = 5000;
2216        let tc = NaiveTokenCounter;
2217        let result = fetch_graph_facts(&view, "test", 1000, &tc).await.unwrap();
2218        assert!(result.is_some());
2219        let msg = result.unwrap();
2220        assert!(
2221            !msg.content.contains('<'),
2222            "angle brackets must be sanitized"
2223        );
2224        // The formatter adds trailing \n to each line, but embedded \n in fact text is replaced
2225        // with spaces. Verify no double-newline sequences exist (would indicate unsanitized \n).
2226        assert!(
2227            !msg.content.contains("\n\n"),
2228            "embedded newlines must be sanitized, no double-newline sequences expected"
2229        );
2230    }
2231
2232    #[tokio::test]
2233    async fn fetch_reasoning_strategies_sanitizes_injection_payloads() {
2234        // I3: newlines and angle brackets are replaced with spaces in strategy summaries
2235        let mock = MockMemoryBackend {
2236            reasoning_strategies: vec![MemReasoningStrategy {
2237                id: "s1".to_string(),
2238                outcome: "success".to_string(),
2239                summary: "strategy with <b>bold</b> and\nnewline".to_string(),
2240            }],
2241            ..Default::default()
2242        };
2243        let mut view = mock_view(mock);
2244        view.reasoning_config.enabled = true;
2245        let tc = NaiveTokenCounter;
2246        let (result, _handle) = fetch_reasoning_strategies(&view, "query", 1000, 3, &tc)
2247            .await
2248            .unwrap();
2249        assert!(result.is_some());
2250        let msg = result.unwrap();
2251        assert!(
2252            !msg.content.contains('<'),
2253            "angle brackets must be sanitized in strategy summaries"
2254        );
2255    }
2256
2257    // ── budget truncation (CR-1) ──────────────────────────────────────────────
2258
2259    #[tokio::test]
2260    async fn fetch_persona_facts_truncates_at_budget() {
2261        let tc = NaiveTokenCounter;
2262        // Tight budget: fits prefix + exactly 1 fact line, second must be omitted.
2263        let first_line = "[pref] brief\n";
2264        let budget = tc.count_tokens(crate::slot::PERSONA_PREFIX) + tc.count_tokens(first_line);
2265        let mock = MockMemoryBackend {
2266            persona_facts: vec![
2267                MemPersonaFact {
2268                    category: "pref".to_string(),
2269                    content: "brief".to_string(),
2270                },
2271                MemPersonaFact {
2272                    category: "lang".to_string(),
2273                    content: "english".to_string(),
2274                },
2275            ],
2276            ..Default::default()
2277        };
2278        let mut view = mock_view(mock);
2279        view.persona_config.enabled = true;
2280        let result = fetch_persona_facts(&view, budget, &tc).await.unwrap();
2281        let msg = result.unwrap();
2282        assert!(msg.content.contains("brief"), "first fact must be included");
2283        assert!(
2284            !msg.content.contains("english"),
2285            "second fact must be truncated by budget"
2286        );
2287    }
2288
2289    #[tokio::test]
2290    async fn fetch_semantic_recall_truncates_at_budget() {
2291        let tc = NaiveTokenCounter;
2292        // Tight budget: fits prefix + exactly 1 recall entry, second must be omitted.
2293        let first_entry = "- [user] first message\n";
2294        let budget = tc.count_tokens(RECALL_PREFIX) + tc.count_tokens(first_entry);
2295        let mock = MockMemoryBackend {
2296            recalled: vec![
2297                MemRecalledMessage {
2298                    role: "user".to_string(),
2299                    content: "first message".to_string(),
2300                    score: 0.95,
2301                },
2302                MemRecalledMessage {
2303                    role: "user".to_string(),
2304                    content: "second message that should be truncated".to_string(),
2305                    score: 0.80,
2306                },
2307            ],
2308            ..Default::default()
2309        };
2310        let mut view = mock_view(mock);
2311        view.recall_limit = 10;
2312        let (msg, _) = fetch_semantic_recall(&view, "query", budget, &tc, None)
2313            .await
2314            .unwrap();
2315        assert!(msg.is_some());
2316        let text = msg
2317            .unwrap()
2318            .parts
2319            .iter()
2320            .find_map(|p| {
2321                if let zeph_llm::provider::MessagePart::Recall { text } = p {
2322                    Some(text.clone())
2323                } else {
2324                    None
2325                }
2326            })
2327            .unwrap_or_default();
2328        assert!(
2329            text.contains("first message"),
2330            "first entry must be included"
2331        );
2332        assert!(
2333            !text.contains("second message"),
2334            "second entry must be truncated by budget"
2335        );
2336    }
2337
2338    // ── provenance_snippet sanitization (CR-2 test) ───────────────────────────
2339
2340    #[tokio::test]
2341    async fn fetch_graph_facts_sanitizes_provenance_snippet() {
2342        use zeph_common::memory::MemGraphNeighbor;
2343        let mock = MockMemoryBackend {
2344            graph_facts: vec![zeph_common::memory::MemGraphFact {
2345                fact: "safe fact".to_string(),
2346                confidence: 0.9,
2347                activation_score: None,
2348                neighbors: vec![MemGraphNeighbor {
2349                    fact: "neighbor".to_string(),
2350                    confidence: 0.7,
2351                }],
2352                provenance_snippet: Some("source with <injection>\nand newline".to_string()),
2353            }],
2354            ..Default::default()
2355        };
2356        let mut view = mock_view(mock);
2357        view.graph_config.enabled = true;
2358        view.graph_config.spreading_activation.recall_timeout_ms = 5000;
2359        let tc = NaiveTokenCounter;
2360        let result = fetch_graph_facts(&view, "test", 1000, &tc).await.unwrap();
2361        assert!(result.is_some());
2362        let msg = result.unwrap();
2363        assert!(
2364            !msg.content.contains('<'),
2365            "angle brackets in provenance_snippet must be sanitized"
2366        );
2367        assert!(
2368            !msg.content.contains("\n\n"),
2369            "newlines in provenance_snippet must be sanitized"
2370        );
2371        assert!(
2372            msg.content.contains("[source:"),
2373            "provenance snippet must be rendered"
2374        );
2375    }
2376}