Skip to main content

zeph_agent_context/
helpers.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Pure helper functions for context assembly.
5//!
6//! These functions are called by `assembly.rs` in `zeph-core` (via a module alias)
7//! and by the [`crate::service::ContextService`] stubs that will be filled in during
8//! subsequent migration steps.
9//!
10//! All functions operate on [`crate::state::ContextAssemblyView`] instead of the
11//! `zeph-core`-internal `MemoryState`, keeping this crate free of `zeph-core` types.
12
13use std::fmt::Write as _;
14use std::time::Instant;
15
16use zeph_config::ContextFormat;
17use zeph_llm::provider::{Message, MessagePart, Role};
18use zeph_memory::{RetrievalFailureRecord, RetrievalFailureType, TokenCounter};
19
20use crate::error::ContextError;
21use crate::state::ContextAssemblyView;
22
23/// System message prefix for persona context injected into the system prompt.
24pub const PERSONA_PREFIX: &str = "[Persona context]\n";
25/// System message prefix for trajectory (past experience) context.
26pub const TRAJECTORY_PREFIX: &str = "[Past experience]\n";
27/// System message prefix for tree-based memory summaries.
28pub const TREE_MEMORY_PREFIX: &str = "[Memory summary]\n";
29/// System message prefix for reasoning strategy context.
30pub const REASONING_PREFIX: &str = "[Reasoning Strategy]\n";
31
32/// System message prefix for graph memory facts injected into context.
33pub const GRAPH_FACTS_PREFIX: &str = "[known facts]\n";
34/// System message prefix for semantic recall entries.
35pub const RECALL_PREFIX: &str = "[semantic recall]\n";
36/// System message prefix for session summary entries.
37pub const SUMMARY_PREFIX: &str = "[conversation summaries]\n";
38/// System message prefix for cross-session context entries.
39pub const CROSS_SESSION_PREFIX: &str = "[cross-session context]\n";
40
41/// System message prefix for past user corrections injected into context.
42pub const CORRECTIONS_PREFIX: &str = "[past corrections]\n";
43/// System message prefix for code-context (repo-map / file context) injections.
44pub const CODE_CONTEXT_PREFIX: &str = "[code context]\n";
45/// User message prefix for session digest summaries from the previous interaction.
46pub const SESSION_DIGEST_PREFIX: &str = "[Session digest from previous interaction]\n";
47/// System message prefix for LSP context notes (diagnostics, hover data, etc.).
48pub const LSP_NOTE_PREFIX: &str = "[lsp ";
49/// System message prefix for document RAG results.
50pub const DOCUMENT_RAG_PREFIX: &str = "## Relevant documents\n";
51
52/// Truncate `s` to at most `max_chars` Unicode scalar values.
53///
54/// Delegates to `zeph_common::text::truncate_to_chars` which respects UTF-8 boundaries.
55#[must_use]
56pub fn truncate_chars(s: &str, max_chars: usize) -> String {
57    zeph_common::text::truncate_to_chars(s, max_chars)
58}
59
60/// Format a user correction as a single bullet point for injection into the system prompt.
61///
62/// The `correction_text` must already be scrubbed by the caller before being passed here.
63/// Truncated to 200 characters to avoid inflating the context with verbose correction notes.
64#[must_use]
65pub fn format_correction_note(correction_text: &str) -> String {
66    format!(
67        "- Past user correction: \"{}\"",
68        truncate_chars(correction_text, 200)
69    )
70}
71
72/// Return the effective spreading-activation recall timeout in milliseconds.
73///
74/// A configured value of `0` would silently disable recall; this function clamps it to
75/// `100ms` and emits a warning so operators notice the misconfiguration without a crash.
76pub fn effective_recall_timeout_ms(configured: u64) -> u64 {
77    if configured == 0 {
78        tracing::warn!(
79            "recall_timeout_ms is 0, which would disable spreading activation recall; \
80             clamping to 100ms"
81        );
82        100
83    } else {
84        configured
85    }
86}
87
88/// Fetch graph memory facts for the given query and inject them into the context budget.
89///
90/// Delegates to [`fetch_graph_facts_raw`] using fields from `view`.
91///
92/// Returns `None` when graph recall is disabled, the budget is zero, no memory is
93/// attached, or the recalled fact set is empty after budget enforcement.
94///
95/// # Errors
96///
97/// Returns [`ContextError::Memory`] when the graph recall backend returns an error.
98pub async fn fetch_graph_facts(
99    view: &ContextAssemblyView<'_>,
100    query: &str,
101    budget_tokens: usize,
102    tc: &TokenCounter,
103) -> Result<Option<Message>, ContextError> {
104    fetch_graph_facts_raw(
105        view.memory.as_deref(),
106        &view.graph_config,
107        query,
108        budget_tokens,
109        tc,
110    )
111    .await
112    .map_err(ContextError::Memory)
113}
114
115/// Fetch graph memory facts using individual field arguments.
116///
117/// This is the raw-args variant used by `zeph-core` test bridge methods and by
118/// [`fetch_graph_facts`] internally. It accepts only the fields that the graph recall
119/// logic actually accesses, avoiding the need to construct a full [`ContextAssemblyView`]
120/// in test harnesses.
121///
122/// # Errors
123///
124/// Returns [`zeph_memory::MemoryError`] when the graph recall backend returns an error.
125#[allow(clippy::too_many_lines, clippy::items_after_statements)]
126pub async fn fetch_graph_facts_raw(
127    memory: Option<&zeph_memory::semantic::SemanticMemory>,
128    graph_config: &zeph_config::GraphConfig,
129    query: &str,
130    budget_tokens: usize,
131    tc: &TokenCounter,
132) -> Result<Option<Message>, zeph_memory::MemoryError> {
133    if budget_tokens == 0 || !graph_config.enabled {
134        return Ok(None);
135    }
136    let Some(memory) = memory else {
137        return Ok(None);
138    };
139    let recall_limit = graph_config.recall_limit;
140    let temporal_decay_rate = graph_config.temporal_decay_rate;
141    let edge_types = zeph_memory::classify_graph_subgraph(query);
142    let sa_config = &graph_config.spreading_activation;
143
144    let mut body = String::from(GRAPH_FACTS_PREFIX);
145    let mut tokens_so_far = tc.count_tokens(&body);
146    let max_hops = graph_config.max_hops;
147
148    use zeph_config::memory::GraphRetrievalStrategy;
149    let effective_strategy = if sa_config.enabled {
150        GraphRetrievalStrategy::Synapse
151    } else {
152        graph_config.retrieval_strategy
153    };
154
155    let _span = tracing::info_span!("memory.graph.dispatch", ?effective_strategy).entered();
156    let strategy_str = format!("{effective_strategy:?}").to_lowercase();
157    let edge_types_json = serde_json::to_string(&edge_types).ok();
158
159    /// Append graph facts to `body` respecting the token budget; returns result count.
160    fn append_graph_facts(
161        facts: &[zeph_memory::graph::types::GraphFact],
162        body: &mut String,
163        tokens_so_far: &mut usize,
164        budget_tokens: usize,
165        tc: &TokenCounter,
166    ) -> usize {
167        let mut count = 0;
168        for f in facts {
169            let fact_text = f.fact.replace(['\n', '\r', '<', '>'], " ");
170            let line = format!("- {} (confidence: {:.2})\n", fact_text, f.confidence);
171            let line_tokens = tc.count_tokens(&line);
172            if *tokens_so_far + line_tokens > budget_tokens {
173                break;
174            }
175            body.push_str(&line);
176            *tokens_so_far += line_tokens;
177            count += 1;
178        }
179        count
180    }
181
182    match effective_strategy {
183        GraphRetrievalStrategy::Synapse => {
184            let sa_params = zeph_memory::graph::SpreadingActivationParams {
185                decay_lambda: sa_config.decay_lambda,
186                max_hops: sa_config.max_hops,
187                activation_threshold: sa_config.activation_threshold,
188                inhibition_threshold: sa_config.inhibition_threshold,
189                max_activated_nodes: sa_config.max_activated_nodes,
190                temporal_decay_rate,
191                seed_structural_weight: sa_config.seed_structural_weight,
192                seed_community_cap: sa_config.seed_community_cap,
193            };
194            let timeout_ms = effective_recall_timeout_ms(sa_config.recall_timeout_ms);
195            let t0 = Instant::now();
196            let activated_facts = match tokio::time::timeout(
197                std::time::Duration::from_millis(timeout_ms),
198                memory.recall_graph_activated(query, recall_limit, sa_params, &edge_types),
199            )
200            .await
201            {
202                Ok(Ok(facts)) => facts,
203                Ok(Err(e)) => {
204                    let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
205                    tracing::warn!("spreading activation recall failed: {e:#}");
206                    // TODO(#3576): conversation_id and turn_index not yet propagated into
207                    // context helpers; tracked for future enhancement when
208                    // ContextAssemblyView exposes them.
209                    memory.log_retrieval_failure(RetrievalFailureRecord {
210                        conversation_id: None,
211                        turn_index: 0,
212                        failure_type: RetrievalFailureType::Error,
213                        retrieval_strategy: strategy_str.clone(),
214                        query_text: query.to_owned(),
215                        query_len: query.len(),
216                        top_score: None,
217                        confidence_threshold: None,
218                        result_count: 0,
219                        latency_ms,
220                        edge_types: edge_types_json.clone(),
221                        error_context: Some(format!("{e:#}")),
222                    });
223                    Vec::new()
224                }
225                Err(_) => {
226                    let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
227                    tracing::warn!("spreading activation recall timed out ({timeout_ms}ms)");
228                    memory.log_retrieval_failure(RetrievalFailureRecord {
229                        conversation_id: None,
230                        turn_index: 0,
231                        failure_type: RetrievalFailureType::Timeout,
232                        retrieval_strategy: strategy_str.clone(),
233                        query_text: query.to_owned(),
234                        query_len: query.len(),
235                        top_score: None,
236                        confidence_threshold: None,
237                        result_count: 0,
238                        latency_ms,
239                        edge_types: edge_types_json.clone(),
240                        error_context: Some(format!("timeout after {timeout_ms}ms")),
241                    });
242                    Vec::new()
243                }
244            };
245            let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
246            if activated_facts.is_empty() {
247                memory.log_retrieval_failure(RetrievalFailureRecord {
248                    conversation_id: None,
249                    turn_index: 0,
250                    failure_type: RetrievalFailureType::NoHit,
251                    retrieval_strategy: strategy_str,
252                    query_text: query.to_owned(),
253                    query_len: query.len(),
254                    top_score: None,
255                    confidence_threshold: None,
256                    result_count: 0,
257                    latency_ms,
258                    edge_types: edge_types_json,
259                    error_context: None,
260                });
261                return Ok(None);
262            }
263            for f in &activated_facts {
264                let fact_text = f.edge.fact.replace(['\n', '\r', '<', '>'], " ");
265                let line = format!(
266                    "- {} (confidence: {:.2}, activation: {:.2})\n",
267                    fact_text, f.edge.confidence, f.activation_score
268                );
269                let line_tokens = tc.count_tokens(&line);
270                if tokens_so_far + line_tokens > budget_tokens {
271                    break;
272                }
273                body.push_str(&line);
274                tokens_so_far += line_tokens;
275            }
276        }
277        GraphRetrievalStrategy::Bfs => {
278            let t0 = Instant::now();
279            let facts = match memory
280                .recall_graph(
281                    query,
282                    recall_limit,
283                    max_hops,
284                    None,
285                    temporal_decay_rate,
286                    &edge_types,
287                )
288                .await
289            {
290                Ok(f) => f,
291                Err(e) => {
292                    let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
293                    memory.log_retrieval_failure(RetrievalFailureRecord {
294                        conversation_id: None,
295                        turn_index: 0,
296                        failure_type: RetrievalFailureType::Error,
297                        retrieval_strategy: strategy_str,
298                        query_text: query.to_owned(),
299                        query_len: query.len(),
300                        top_score: None,
301                        confidence_threshold: None,
302                        result_count: 0,
303                        latency_ms,
304                        edge_types: edge_types_json,
305                        error_context: Some(format!("{e:#}")),
306                    });
307                    return Err(e);
308                }
309            };
310            let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
311            if facts.is_empty() {
312                memory.log_retrieval_failure(RetrievalFailureRecord {
313                    conversation_id: None,
314                    turn_index: 0,
315                    failure_type: RetrievalFailureType::NoHit,
316                    retrieval_strategy: strategy_str,
317                    query_text: query.to_owned(),
318                    query_len: query.len(),
319                    top_score: None,
320                    confidence_threshold: None,
321                    result_count: 0,
322                    latency_ms,
323                    edge_types: edge_types_json,
324                    error_context: None,
325                });
326                return Ok(None);
327            }
328            append_graph_facts(&facts, &mut body, &mut tokens_so_far, budget_tokens, tc);
329        }
330        GraphRetrievalStrategy::AStar => {
331            let t0 = Instant::now();
332            let facts = match memory
333                .recall_graph_astar(
334                    query,
335                    recall_limit,
336                    max_hops,
337                    temporal_decay_rate,
338                    &edge_types,
339                )
340                .await
341            {
342                Ok(f) => f,
343                Err(e) => {
344                    let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
345                    memory.log_retrieval_failure(RetrievalFailureRecord {
346                        conversation_id: None,
347                        turn_index: 0,
348                        failure_type: RetrievalFailureType::Error,
349                        retrieval_strategy: strategy_str,
350                        query_text: query.to_owned(),
351                        query_len: query.len(),
352                        top_score: None,
353                        confidence_threshold: None,
354                        result_count: 0,
355                        latency_ms,
356                        edge_types: edge_types_json,
357                        error_context: Some(format!("{e:#}")),
358                    });
359                    return Err(e);
360                }
361            };
362            let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
363            if facts.is_empty() {
364                memory.log_retrieval_failure(RetrievalFailureRecord {
365                    conversation_id: None,
366                    turn_index: 0,
367                    failure_type: RetrievalFailureType::NoHit,
368                    retrieval_strategy: strategy_str,
369                    query_text: query.to_owned(),
370                    query_len: query.len(),
371                    top_score: None,
372                    confidence_threshold: None,
373                    result_count: 0,
374                    latency_ms,
375                    edge_types: edge_types_json,
376                    error_context: None,
377                });
378                return Ok(None);
379            }
380            append_graph_facts(&facts, &mut body, &mut tokens_so_far, budget_tokens, tc);
381        }
382        GraphRetrievalStrategy::WaterCircles => {
383            let ring_limit = graph_config.watercircles.ring_limit;
384            let t0 = Instant::now();
385            let facts = match memory
386                .recall_graph_watercircles(
387                    query,
388                    recall_limit,
389                    max_hops,
390                    ring_limit,
391                    temporal_decay_rate,
392                    &edge_types,
393                )
394                .await
395            {
396                Ok(f) => f,
397                Err(e) => {
398                    let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
399                    memory.log_retrieval_failure(RetrievalFailureRecord {
400                        conversation_id: None,
401                        turn_index: 0,
402                        failure_type: RetrievalFailureType::Error,
403                        retrieval_strategy: strategy_str,
404                        query_text: query.to_owned(),
405                        query_len: query.len(),
406                        top_score: None,
407                        confidence_threshold: None,
408                        result_count: 0,
409                        latency_ms,
410                        edge_types: edge_types_json,
411                        error_context: Some(format!("{e:#}")),
412                    });
413                    return Err(e);
414                }
415            };
416            let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
417            if facts.is_empty() {
418                memory.log_retrieval_failure(RetrievalFailureRecord {
419                    conversation_id: None,
420                    turn_index: 0,
421                    failure_type: RetrievalFailureType::NoHit,
422                    retrieval_strategy: strategy_str,
423                    query_text: query.to_owned(),
424                    query_len: query.len(),
425                    top_score: None,
426                    confidence_threshold: None,
427                    result_count: 0,
428                    latency_ms,
429                    edge_types: edge_types_json,
430                    error_context: None,
431                });
432                return Ok(None);
433            }
434            append_graph_facts(&facts, &mut body, &mut tokens_so_far, budget_tokens, tc);
435        }
436        GraphRetrievalStrategy::BeamSearch => {
437            let beam_width = graph_config.beam_search.beam_width;
438            let t0 = Instant::now();
439            let facts = match memory
440                .recall_graph_beam(
441                    query,
442                    recall_limit,
443                    beam_width,
444                    max_hops,
445                    temporal_decay_rate,
446                    &edge_types,
447                )
448                .await
449            {
450                Ok(f) => f,
451                Err(e) => {
452                    let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
453                    memory.log_retrieval_failure(RetrievalFailureRecord {
454                        conversation_id: None,
455                        turn_index: 0,
456                        failure_type: RetrievalFailureType::Error,
457                        retrieval_strategy: strategy_str,
458                        query_text: query.to_owned(),
459                        query_len: query.len(),
460                        top_score: None,
461                        confidence_threshold: None,
462                        result_count: 0,
463                        latency_ms,
464                        edge_types: edge_types_json,
465                        error_context: Some(format!("{e:#}")),
466                    });
467                    return Err(e);
468                }
469            };
470            let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
471            if facts.is_empty() {
472                memory.log_retrieval_failure(RetrievalFailureRecord {
473                    conversation_id: None,
474                    turn_index: 0,
475                    failure_type: RetrievalFailureType::NoHit,
476                    retrieval_strategy: strategy_str,
477                    query_text: query.to_owned(),
478                    query_len: query.len(),
479                    top_score: None,
480                    confidence_threshold: None,
481                    result_count: 0,
482                    latency_ms,
483                    edge_types: edge_types_json,
484                    error_context: None,
485                });
486                return Ok(None);
487            }
488            append_graph_facts(&facts, &mut body, &mut tokens_so_far, budget_tokens, tc);
489        }
490        GraphRetrievalStrategy::Hybrid => {
491            const CLASSIFIER_TIMEOUT_MS: u64 = 2_000;
492            let classifier_t0 = Instant::now();
493            let classified = if let Ok(s) = tokio::time::timeout(
494                std::time::Duration::from_millis(CLASSIFIER_TIMEOUT_MS),
495                memory.classify_graph_strategy(query),
496            )
497            .await
498            {
499                s
500            } else {
501                let latency_ms = classifier_t0
502                    .elapsed()
503                    .as_millis()
504                    .try_into()
505                    .unwrap_or(u64::MAX);
506                tracing::warn!(
507                    "hybrid strategy classifier timed out after {CLASSIFIER_TIMEOUT_MS}ms, \
508                     falling back to synapse"
509                );
510                memory.log_retrieval_failure(RetrievalFailureRecord {
511                    conversation_id: None,
512                    turn_index: 0,
513                    failure_type: RetrievalFailureType::Timeout,
514                    retrieval_strategy: "hybrid_classifier".to_owned(),
515                    query_text: query.to_owned(),
516                    query_len: query.len(),
517                    top_score: None,
518                    confidence_threshold: None,
519                    result_count: 0,
520                    latency_ms,
521                    edge_types: edge_types_json.clone(),
522                    error_context: Some(format!(
523                        "classifier timeout after {CLASSIFIER_TIMEOUT_MS}ms"
524                    )),
525                });
526                "synapse".to_owned()
527            };
528            tracing::debug!(classified_strategy = %classified, "hybrid dispatch: classified");
529            let t0 = Instant::now();
530            let facts = match classified.as_str() {
531                "astar" => {
532                    match memory
533                        .recall_graph_astar(
534                            query,
535                            recall_limit,
536                            max_hops,
537                            temporal_decay_rate,
538                            &edge_types,
539                        )
540                        .await
541                    {
542                        Ok(f) => f,
543                        Err(e) => {
544                            let latency_ms =
545                                t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
546                            memory.log_retrieval_failure(RetrievalFailureRecord {
547                                conversation_id: None,
548                                turn_index: 0,
549                                failure_type: RetrievalFailureType::Error,
550                                retrieval_strategy: strategy_str,
551                                query_text: query.to_owned(),
552                                query_len: query.len(),
553                                top_score: None,
554                                confidence_threshold: None,
555                                result_count: 0,
556                                latency_ms,
557                                edge_types: edge_types_json,
558                                error_context: Some(format!("{e:#}")),
559                            });
560                            return Err(e);
561                        }
562                    }
563                }
564                "watercircles" => {
565                    let ring_limit = graph_config.watercircles.ring_limit;
566                    match memory
567                        .recall_graph_watercircles(
568                            query,
569                            recall_limit,
570                            max_hops,
571                            ring_limit,
572                            temporal_decay_rate,
573                            &edge_types,
574                        )
575                        .await
576                    {
577                        Ok(f) => f,
578                        Err(e) => {
579                            let latency_ms =
580                                t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
581                            memory.log_retrieval_failure(RetrievalFailureRecord {
582                                conversation_id: None,
583                                turn_index: 0,
584                                failure_type: RetrievalFailureType::Error,
585                                retrieval_strategy: strategy_str,
586                                query_text: query.to_owned(),
587                                query_len: query.len(),
588                                top_score: None,
589                                confidence_threshold: None,
590                                result_count: 0,
591                                latency_ms,
592                                edge_types: edge_types_json,
593                                error_context: Some(format!("{e:#}")),
594                            });
595                            return Err(e);
596                        }
597                    }
598                }
599                "beam_search" => {
600                    let beam_width = graph_config.beam_search.beam_width;
601                    match memory
602                        .recall_graph_beam(
603                            query,
604                            recall_limit,
605                            beam_width,
606                            max_hops,
607                            temporal_decay_rate,
608                            &edge_types,
609                        )
610                        .await
611                    {
612                        Ok(f) => f,
613                        Err(e) => {
614                            let latency_ms =
615                                t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
616                            memory.log_retrieval_failure(RetrievalFailureRecord {
617                                conversation_id: None,
618                                turn_index: 0,
619                                failure_type: RetrievalFailureType::Error,
620                                retrieval_strategy: strategy_str,
621                                query_text: query.to_owned(),
622                                query_len: query.len(),
623                                top_score: None,
624                                confidence_threshold: None,
625                                result_count: 0,
626                                latency_ms,
627                                edge_types: edge_types_json,
628                                error_context: Some(format!("{e:#}")),
629                            });
630                            return Err(e);
631                        }
632                    }
633                }
634                _ => {
635                    let sa_params = zeph_memory::graph::SpreadingActivationParams {
636                        decay_lambda: sa_config.decay_lambda,
637                        max_hops: sa_config.max_hops,
638                        activation_threshold: sa_config.activation_threshold,
639                        inhibition_threshold: sa_config.inhibition_threshold,
640                        max_activated_nodes: sa_config.max_activated_nodes,
641                        temporal_decay_rate,
642                        seed_structural_weight: sa_config.seed_structural_weight,
643                        seed_community_cap: sa_config.seed_community_cap,
644                    };
645                    match memory
646                        .recall_graph_activated(query, recall_limit, sa_params, &edge_types)
647                        .await
648                    {
649                        Ok(activated) => activated
650                            .into_iter()
651                            .map(|f| zeph_memory::graph::types::GraphFact {
652                                entity_name: f.edge.source_entity_id.to_string(),
653                                relation: f.edge.relation.clone(),
654                                target_name: f.edge.target_entity_id.to_string(),
655                                fact: f.edge.fact.clone(),
656                                entity_match_score: f.activation_score,
657                                hop_distance: 0,
658                                confidence: f.edge.confidence,
659                                valid_from: Some(f.edge.valid_from.clone()),
660                                edge_type: f.edge.edge_type,
661                                retrieval_count: f.edge.retrieval_count,
662                                edge_id: Some(f.edge.id),
663                            })
664                            .collect(),
665                        Err(e) => {
666                            let latency_ms =
667                                t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
668                            memory.log_retrieval_failure(RetrievalFailureRecord {
669                                conversation_id: None,
670                                turn_index: 0,
671                                failure_type: RetrievalFailureType::Error,
672                                retrieval_strategy: strategy_str,
673                                query_text: query.to_owned(),
674                                query_len: query.len(),
675                                top_score: None,
676                                confidence_threshold: None,
677                                result_count: 0,
678                                latency_ms,
679                                edge_types: edge_types_json,
680                                error_context: Some(format!("{e:#}")),
681                            });
682                            return Err(e);
683                        }
684                    }
685                }
686            };
687            let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
688            if facts.is_empty() {
689                memory.log_retrieval_failure(RetrievalFailureRecord {
690                    conversation_id: None,
691                    turn_index: 0,
692                    failure_type: RetrievalFailureType::NoHit,
693                    retrieval_strategy: strategy_str,
694                    query_text: query.to_owned(),
695                    query_len: query.len(),
696                    top_score: None,
697                    confidence_threshold: None,
698                    result_count: 0,
699                    latency_ms,
700                    edge_types: edge_types_json,
701                    error_context: None,
702                });
703                return Ok(None);
704            }
705            append_graph_facts(&facts, &mut body, &mut tokens_so_far, budget_tokens, tc);
706        }
707    }
708
709    if body == GRAPH_FACTS_PREFIX {
710        return Ok(None);
711    }
712
713    Ok(Some(Message::from_legacy(Role::System, body)))
714}
715
716/// Fetch semantically recalled messages using individual field arguments.
717///
718/// Raw-args variant used by `zeph-core` test bridge methods and by [`fetch_semantic_recall`].
719///
720/// When `low_confidence_threshold` is `Some(t)`, results with a top score below `t` are
721/// classified as low-confidence and logged via the memory's retrieval failure logger.
722///
723/// # Errors
724///
725/// Returns [`zeph_memory::MemoryError`] when the memory backend returns an error.
726#[allow(clippy::too_many_arguments)]
727pub async fn fetch_semantic_recall_raw(
728    memory: Option<&zeph_memory::semantic::SemanticMemory>,
729    recall_limit: usize,
730    context_format: ContextFormat,
731    query: &str,
732    token_budget: usize,
733    tc: &TokenCounter,
734    router: Option<&dyn zeph_memory::AsyncMemoryRouter>,
735    low_confidence_threshold: Option<f32>,
736) -> Result<(Option<Message>, Option<f32>), zeph_memory::MemoryError> {
737    let Some(memory) = memory else {
738        return Ok((None, None));
739    };
740    if recall_limit == 0 || token_budget == 0 {
741        return Ok((None, None));
742    }
743
744    let t0 = Instant::now();
745    let recalled = if let Some(r) = router {
746        memory
747            .recall_routed_async(query, recall_limit, None, r)
748            .await?
749    } else {
750        memory.recall(query, recall_limit, None).await?
751    };
752    let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
753
754    if recalled.is_empty() {
755        memory.log_retrieval_failure(RetrievalFailureRecord {
756            conversation_id: None,
757            turn_index: 0,
758            failure_type: RetrievalFailureType::NoHit,
759            retrieval_strategy: "semantic".to_owned(),
760            query_text: query.to_owned(),
761            query_len: query.len(),
762            top_score: None,
763            confidence_threshold: low_confidence_threshold,
764            result_count: 0,
765            latency_ms,
766            edge_types: None,
767            error_context: None,
768        });
769        return Ok((None, None));
770    }
771
772    let top_score = recalled.first().map(|r| r.score);
773
774    if let (Some(score), Some(threshold)) = (top_score, low_confidence_threshold)
775        && score < threshold
776    {
777        memory.log_retrieval_failure(RetrievalFailureRecord {
778            conversation_id: None,
779            turn_index: 0,
780            failure_type: RetrievalFailureType::LowConfidence,
781            retrieval_strategy: "semantic".to_owned(),
782            query_text: query.to_owned(),
783            query_len: query.len(),
784            top_score: Some(score),
785            confidence_threshold: Some(threshold),
786            result_count: recalled.len(),
787            latency_ms,
788            edge_types: None,
789            error_context: None,
790        });
791    }
792    let initial_cap = (recall_limit * 512).min(token_budget * 3);
793    let mut recall_text = String::with_capacity(initial_cap);
794    recall_text.push_str(RECALL_PREFIX);
795    let mut tokens_used = tc.count_tokens(&recall_text);
796
797    for item in &recalled {
798        if item.message.content.starts_with("[skipped]")
799            || item.message.content.starts_with("[stopped]")
800        {
801            continue;
802        }
803        let entry = match context_format {
804            ContextFormat::Structured => format_structured_recall_entry(item),
805            ContextFormat::Plain => format_plain_recall_entry(item),
806        };
807        let entry_tokens = tc.count_tokens(&entry);
808        if tokens_used + entry_tokens > token_budget {
809            break;
810        }
811        recall_text.push_str(&entry);
812        tokens_used += entry_tokens;
813    }
814
815    if tokens_used > tc.count_tokens(RECALL_PREFIX) {
816        Ok((
817            Some(Message::from_parts(
818                Role::System,
819                vec![MessagePart::Recall { text: recall_text }],
820            )),
821            top_score,
822        ))
823    } else {
824        Ok((None, None))
825    }
826}
827
828/// Fetch session summaries using individual field arguments.
829///
830/// Raw-args variant used by `zeph-core` test bridge methods and by [`fetch_summaries`].
831///
832/// # Errors
833///
834/// Returns [`zeph_memory::MemoryError`] when the memory backend returns an error.
835pub async fn fetch_summaries_raw(
836    memory: Option<&zeph_memory::semantic::SemanticMemory>,
837    conversation_id: Option<zeph_memory::ConversationId>,
838    token_budget: usize,
839    tc: &TokenCounter,
840) -> Result<Option<Message>, zeph_memory::MemoryError> {
841    let (Some(memory), Some(cid)) = (memory, conversation_id) else {
842        return Ok(None);
843    };
844    if token_budget == 0 {
845        return Ok(None);
846    }
847
848    let summaries = memory.load_summaries(cid).await?;
849    if summaries.is_empty() {
850        return Ok(None);
851    }
852
853    let mut summary_text = String::from(SUMMARY_PREFIX);
854    let mut tokens_used = tc.count_tokens(&summary_text);
855
856    for summary in summaries.iter().rev() {
857        let first = summary.first_message_id.map_or(0, |m| m.0);
858        let last = summary.last_message_id.map_or(0, |m| m.0);
859        let entry = format!("- Messages {first}-{last}: {}\n", summary.content);
860        let cost = tc.count_tokens(&entry);
861        if tokens_used + cost > token_budget {
862            break;
863        }
864        summary_text.push_str(&entry);
865        tokens_used += cost;
866    }
867
868    if tokens_used > tc.count_tokens(SUMMARY_PREFIX) {
869        Ok(Some(Message::from_parts(
870            Role::System,
871            vec![MessagePart::Summary { text: summary_text }],
872        )))
873    } else {
874        Ok(None)
875    }
876}
877
878/// Fetch cross-session context summaries using individual field arguments.
879///
880/// Raw-args variant used by `zeph-core` test bridge methods and by [`fetch_cross_session`].
881///
882/// # Errors
883///
884/// Returns [`zeph_memory::MemoryError`] when the memory backend returns an error.
885pub async fn fetch_cross_session_raw(
886    memory: Option<&zeph_memory::semantic::SemanticMemory>,
887    conversation_id: Option<zeph_memory::ConversationId>,
888    cross_session_score_threshold: f32,
889    query: &str,
890    token_budget: usize,
891    tc: &TokenCounter,
892) -> Result<Option<Message>, zeph_memory::MemoryError> {
893    let (Some(memory), Some(cid)) = (memory, conversation_id) else {
894        return Ok(None);
895    };
896    if token_budget == 0 {
897        return Ok(None);
898    }
899
900    let results: Vec<_> = memory
901        .search_session_summaries(query, 5, Some(cid))
902        .await?
903        .into_iter()
904        .filter(|r| r.score >= cross_session_score_threshold)
905        .collect();
906    if results.is_empty() {
907        return Ok(None);
908    }
909
910    let mut text = String::from(CROSS_SESSION_PREFIX);
911    let mut tokens_used = tc.count_tokens(&text);
912
913    for item in &results {
914        let entry = format!("- {}\n", item.summary_text);
915        let cost = tc.count_tokens(&entry);
916        if tokens_used + cost > token_budget {
917            break;
918        }
919        text.push_str(&entry);
920        tokens_used += cost;
921    }
922
923    if tokens_used > tc.count_tokens(CROSS_SESSION_PREFIX) {
924        Ok(Some(Message::from_parts(
925            Role::System,
926            vec![MessagePart::CrossSession { text }],
927        )))
928    } else {
929        Ok(None)
930    }
931}
932
933/// Fetch semantically recalled messages for the given query and enforce the token budget.
934///
935/// Delegates to [`fetch_semantic_recall_raw`] using fields from `view`.
936///
937/// Returns `(None, None)` when memory is absent, recall is disabled, the budget is zero,
938/// or the recalled set is empty.
939///
940/// The second element of the tuple is the similarity score of the top recalled entry, used
941/// by the caller to track recall confidence for telemetry.
942///
943/// # Errors
944///
945/// Returns [`ContextError::Memory`] when the memory recall backend returns an error.
946pub async fn fetch_semantic_recall(
947    view: &ContextAssemblyView<'_>,
948    query: &str,
949    token_budget: usize,
950    tc: &TokenCounter,
951    router: Option<&dyn zeph_memory::AsyncMemoryRouter>,
952) -> Result<(Option<Message>, Option<f32>), ContextError> {
953    fetch_semantic_recall_raw(
954        view.memory.as_deref(),
955        view.recall_limit,
956        view.context_format,
957        query,
958        token_budget,
959        tc,
960        router,
961        None,
962    )
963    .await
964    .map_err(ContextError::Memory)
965}
966
967fn format_plain_recall_entry(item: &zeph_memory::RecalledMessage) -> String {
968    let role_label = match item.message.role {
969        Role::User => "user",
970        Role::Assistant => "assistant",
971        Role::System => "system",
972    };
973    format!("- [{}] {}\n", role_label, item.message.content)
974}
975
976#[allow(clippy::map_unwrap_or)]
977fn format_structured_recall_entry(item: &zeph_memory::RecalledMessage) -> String {
978    let source = match item.message.role {
979        Role::User => "user",
980        Role::Assistant => "assistant",
981        Role::System => "system",
982    };
983    // Use compacted_at as a proxy for message age when available; otherwise "unknown".
984    // A full timestamp lookup from SQLite would require an async DB call in the assembler
985    // and is deferred to a future enhancement (TODO: enhance when message timestamps are
986    // propagated into RecalledMessage).
987    let date = item
988        .message
989        .metadata
990        .compacted_at
991        .and_then(|ts| chrono::DateTime::from_timestamp(ts, 0))
992        .map(|dt| dt.format("%Y-%m-%d").to_string())
993        .unwrap_or_else(|| "unknown".to_owned());
994    format!(
995        "[Memory | {} | {} | relevance: {:.2}]\n{}\n",
996        source, date, item.score, item.message.content
997    )
998}
999
1000/// Fetch session summaries for the current conversation and enforce the token budget.
1001///
1002/// Delegates to [`fetch_summaries_raw`] using fields from `view`.
1003///
1004/// Returns `None` when memory or the conversation ID is absent, the budget is zero,
1005/// or no summaries exist yet.
1006///
1007/// # Errors
1008///
1009/// Returns [`ContextError::Memory`] when the memory backend returns an error.
1010pub async fn fetch_summaries(
1011    view: &ContextAssemblyView<'_>,
1012    token_budget: usize,
1013    tc: &TokenCounter,
1014) -> Result<Option<Message>, ContextError> {
1015    fetch_summaries_raw(
1016        view.memory.as_deref(),
1017        view.conversation_id,
1018        token_budget,
1019        tc,
1020    )
1021    .await
1022    .map_err(ContextError::Memory)
1023}
1024
1025/// Fetch cross-session context summaries for the given query and enforce the token budget.
1026///
1027/// Delegates to [`fetch_cross_session_raw`] using fields from `view`.
1028///
1029/// Results are filtered by `view.cross_session_score_threshold` before token counting,
1030/// and the current conversation is excluded from the search results.
1031///
1032/// Returns `None` when memory or the conversation ID is absent, the budget is zero,
1033/// no results exceed the threshold, or the result set is empty.
1034///
1035/// # Errors
1036///
1037/// Returns [`ContextError::Memory`] when the memory backend returns an error.
1038pub async fn fetch_cross_session(
1039    view: &ContextAssemblyView<'_>,
1040    query: &str,
1041    token_budget: usize,
1042    tc: &TokenCounter,
1043) -> Result<Option<Message>, ContextError> {
1044    fetch_cross_session_raw(
1045        view.memory.as_deref(),
1046        view.conversation_id,
1047        view.cross_session_score_threshold,
1048        query,
1049        token_budget,
1050        tc,
1051    )
1052    .await
1053    .map_err(ContextError::Memory)
1054}
1055
1056/// Budget state injected into the volatile system prompt section.
1057///
1058/// All fields are optional — omitted when the corresponding data source is unavailable.
1059/// [`BudgetHint::format_xml`] returns `None` when all fields would be absent.
1060///
1061/// Callers should construct this from cost-tracker and tool-orchestrator state, then call
1062/// `format_xml` and append the result to the system prompt when `Some`.
1063pub struct BudgetHint {
1064    /// Remaining daily budget in US cents, if a daily limit is configured.
1065    pub remaining_cost_cents: Option<f64>,
1066    /// Total daily budget in US cents, if a daily limit is configured.
1067    pub total_budget_cents: Option<f64>,
1068    /// Remaining tool-call iterations this turn.
1069    pub remaining_tool_calls: usize,
1070    /// Maximum allowed tool-call iterations per turn (0 = no limit configured).
1071    pub max_tool_calls: usize,
1072}
1073
1074impl BudgetHint {
1075    /// Render the budget hint as an XML fragment for injection into the system prompt.
1076    ///
1077    /// Returns `None` when no meaningful budget data is available — callers must skip
1078    /// injection rather than injecting an empty `<budget></budget>` block.
1079    ///
1080    /// # Examples
1081    ///
1082    /// ```
1083    /// use zeph_agent_context::helpers::BudgetHint;
1084    ///
1085    /// let hint = BudgetHint {
1086    ///     remaining_cost_cents: Some(50.0),
1087    ///     total_budget_cents: Some(100.0),
1088    ///     remaining_tool_calls: 8,
1089    ///     max_tool_calls: 10,
1090    /// };
1091    /// let xml = hint.format_xml().unwrap();
1092    /// assert!(xml.contains("<remaining_cost_cents>50.00</remaining_cost_cents>"));
1093    /// assert!(xml.contains("<remaining_tool_calls>8</remaining_tool_calls>"));
1094    /// ```
1095    #[must_use]
1096    pub fn format_xml(&self) -> Option<String> {
1097        let has_cost = self.remaining_cost_cents.is_some();
1098        // Always include tool call budget — max_tool_calls > 0 in any real config.
1099        if !has_cost && self.max_tool_calls == 0 {
1100            return None;
1101        }
1102        let mut s = String::from("<budget>");
1103        if let Some(remaining) = self.remaining_cost_cents {
1104            let _ = write!(
1105                s,
1106                "\n<remaining_cost_cents>{remaining:.2}</remaining_cost_cents>"
1107            );
1108        }
1109        if let Some(total) = self.total_budget_cents {
1110            let _ = write!(s, "\n<total_budget_cents>{total:.2}</total_budget_cents>");
1111        }
1112        if self.max_tool_calls > 0 {
1113            let _ = write!(
1114                s,
1115                "\n<remaining_tool_calls>{}</remaining_tool_calls>",
1116                self.remaining_tool_calls
1117            );
1118            let _ = write!(
1119                s,
1120                "\n<max_tool_calls>{}</max_tool_calls>",
1121                self.max_tool_calls
1122            );
1123        }
1124        s.push_str("\n</budget>");
1125        Some(s)
1126    }
1127}
1128
1129#[cfg(test)]
1130mod budget_hint_tests {
1131    use super::*;
1132
1133    #[test]
1134    fn format_xml_none_when_no_data() {
1135        let hint = BudgetHint {
1136            remaining_cost_cents: None,
1137            total_budget_cents: None,
1138            remaining_tool_calls: 0,
1139            max_tool_calls: 0,
1140        };
1141        assert!(hint.format_xml().is_none());
1142    }
1143
1144    #[test]
1145    fn format_xml_with_cost_only() {
1146        let hint = BudgetHint {
1147            remaining_cost_cents: Some(25.5),
1148            total_budget_cents: Some(100.0),
1149            remaining_tool_calls: 0,
1150            max_tool_calls: 0,
1151        };
1152        let xml = hint.format_xml().unwrap();
1153        assert!(xml.contains("<remaining_cost_cents>25.50</remaining_cost_cents>"));
1154        assert!(xml.contains("<total_budget_cents>100.00</total_budget_cents>"));
1155    }
1156
1157    #[test]
1158    fn format_xml_with_tool_calls_only() {
1159        let hint = BudgetHint {
1160            remaining_cost_cents: None,
1161            total_budget_cents: None,
1162            remaining_tool_calls: 3,
1163            max_tool_calls: 10,
1164        };
1165        let xml = hint.format_xml().unwrap();
1166        assert!(xml.contains("<remaining_tool_calls>3</remaining_tool_calls>"));
1167        assert!(xml.contains("<max_tool_calls>10</max_tool_calls>"));
1168    }
1169
1170    #[test]
1171    fn format_xml_with_all_fields() {
1172        let hint = BudgetHint {
1173            remaining_cost_cents: Some(50.0),
1174            total_budget_cents: Some(100.0),
1175            remaining_tool_calls: 8,
1176            max_tool_calls: 10,
1177        };
1178        let xml = hint.format_xml().unwrap();
1179        assert!(xml.starts_with("<budget>"));
1180        assert!(xml.ends_with("</budget>"));
1181    }
1182}