Skip to main content

zeph_agent_context/
helpers.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Pure helper functions for context assembly.
5//!
6//! These functions are called by `assembly.rs` in `zeph-core` (via a module alias)
7//! and by the [`crate::service::ContextService`] stubs that will be filled in during
8//! subsequent migration steps.
9//!
10//! All functions operate on [`crate::state::ContextAssemblyView`] instead of the
11//! `zeph-core`-internal `MemoryState`, keeping this crate free of `zeph-core` types.
12
13use std::fmt::Write as _;
14use std::time::Instant;
15
16use zeph_config::ContextFormat;
17use zeph_llm::provider::{Message, MessagePart, Role};
18use zeph_memory::{RetrievalFailureRecord, RetrievalFailureType, TokenCounter};
19
20use crate::error::ContextError;
21use crate::state::ContextAssemblyView;
22
23/// System message prefix for persona context injected into the system prompt.
24pub const PERSONA_PREFIX: &str = "[Persona context]\n";
25/// System message prefix for trajectory (past experience) context.
26pub const TRAJECTORY_PREFIX: &str = "[Past experience]\n";
27/// System message prefix for tree-based memory summaries.
28pub const TREE_MEMORY_PREFIX: &str = "[Memory summary]\n";
29/// System message prefix for reasoning strategy context.
30pub const REASONING_PREFIX: &str = "[Reasoning Strategy]\n";
31
32/// System message prefix for graph memory facts injected into context.
33pub const GRAPH_FACTS_PREFIX: &str = "[known facts]\n";
34/// System message prefix for semantic recall entries.
35pub const RECALL_PREFIX: &str = "[semantic recall]\n";
36/// System message prefix for session summary entries.
37pub const SUMMARY_PREFIX: &str = "[conversation summaries]\n";
38/// System message prefix for cross-session context entries.
39pub const CROSS_SESSION_PREFIX: &str = "[cross-session context]\n";
40
41/// System message prefix for past user corrections injected into context.
42pub const CORRECTIONS_PREFIX: &str = "[past corrections]\n";
43/// System message prefix for code-context (repo-map / file context) injections.
44pub const CODE_CONTEXT_PREFIX: &str = "[code context]\n";
45/// User message prefix for session digest summaries from the previous interaction.
46pub const SESSION_DIGEST_PREFIX: &str = "[Session digest from previous interaction]\n";
47/// System message prefix for LSP context notes (diagnostics, hover data, etc.).
48pub const LSP_NOTE_PREFIX: &str = "[lsp ";
49/// System message prefix for document RAG results.
50pub const DOCUMENT_RAG_PREFIX: &str = "## Relevant documents\n";
51
52/// Truncate `s` to at most `max_chars` Unicode scalar values.
53///
54/// Delegates to `zeph_common::text::truncate_to_chars` which respects UTF-8 boundaries.
55#[must_use]
56pub fn truncate_chars(s: &str, max_chars: usize) -> String {
57    zeph_common::text::truncate_to_chars(s, max_chars)
58}
59
60/// Format a user correction as a single bullet point for injection into the system prompt.
61///
62/// The `correction_text` must already be scrubbed by the caller before being passed here.
63/// Truncated to 200 characters to avoid inflating the context with verbose correction notes.
64#[must_use]
65pub fn format_correction_note(correction_text: &str) -> String {
66    format!(
67        "- Past user correction: \"{}\"",
68        truncate_chars(correction_text, 200)
69    )
70}
71
72/// Return the effective spreading-activation recall timeout in milliseconds.
73///
74/// A configured value of `0` would silently disable recall; this function clamps it to
75/// `100ms` and emits a warning so operators notice the misconfiguration without a crash.
76pub fn effective_recall_timeout_ms(configured: u64) -> u64 {
77    if configured == 0 {
78        tracing::warn!(
79            "recall_timeout_ms is 0, which would disable spreading activation recall; \
80             clamping to 100ms"
81        );
82        100
83    } else {
84        configured
85    }
86}
87
88/// Fetch graph memory facts for the given query and inject them into the context budget.
89///
90/// Delegates to [`fetch_graph_facts_raw`] using fields from `view`.
91///
92/// Returns `None` when graph recall is disabled, the budget is zero, no memory is
93/// attached, or the recalled fact set is empty after budget enforcement.
94///
95/// # Errors
96///
97/// Returns [`ContextError::Memory`] when the graph recall backend returns an error.
98pub async fn fetch_graph_facts(
99    view: &ContextAssemblyView<'_>,
100    query: &str,
101    budget_tokens: usize,
102    tc: &TokenCounter,
103) -> Result<Option<Message>, ContextError> {
104    fetch_graph_facts_raw(
105        view.memory.as_deref(),
106        &view.graph_config,
107        query,
108        budget_tokens,
109        tc,
110    )
111    .await
112    .map_err(ContextError::Memory)
113}
114
115/// Fetch graph memory facts using individual field arguments.
116///
117/// This is the raw-args variant used by `zeph-core` test bridge methods and by
118/// [`fetch_graph_facts`] internally. It accepts only the fields that the graph recall
119/// logic actually accesses, avoiding the need to construct a full [`ContextAssemblyView`]
120/// in test harnesses.
121///
122/// # Errors
123///
124/// Returns [`zeph_memory::MemoryError`] when the graph recall backend returns an error.
125#[allow(clippy::too_many_lines, clippy::items_after_statements)]
126pub async fn fetch_graph_facts_raw(
127    memory: Option<&zeph_memory::semantic::SemanticMemory>,
128    graph_config: &zeph_config::GraphConfig,
129    query: &str,
130    budget_tokens: usize,
131    tc: &TokenCounter,
132) -> Result<Option<Message>, zeph_memory::MemoryError> {
133    if budget_tokens == 0 || !graph_config.enabled {
134        return Ok(None);
135    }
136    let Some(memory) = memory else {
137        return Ok(None);
138    };
139    let recall_limit = graph_config.recall_limit;
140    let temporal_decay_rate = graph_config.temporal_decay_rate;
141    let edge_types = zeph_memory::classify_graph_subgraph(query);
142    let sa_config = &graph_config.spreading_activation;
143
144    let mut body = String::from(GRAPH_FACTS_PREFIX);
145    let mut tokens_so_far = tc.count_tokens(&body);
146    let max_hops = graph_config.max_hops;
147
148    use zeph_config::memory::GraphRetrievalStrategy;
149    let effective_strategy = if sa_config.enabled {
150        GraphRetrievalStrategy::Synapse
151    } else {
152        graph_config.retrieval_strategy
153    };
154
155    let _span = tracing::info_span!("memory.graph.dispatch", ?effective_strategy).entered();
156    let strategy_str = format!("{effective_strategy:?}").to_lowercase();
157    let edge_types_json = serde_json::to_string(&edge_types).ok();
158
159    /// Append graph facts to `body` respecting the token budget; returns result count.
160    fn append_graph_facts(
161        facts: &[zeph_memory::graph::types::GraphFact],
162        body: &mut String,
163        tokens_so_far: &mut usize,
164        budget_tokens: usize,
165        tc: &TokenCounter,
166    ) -> usize {
167        let mut count = 0;
168        for f in facts {
169            let fact_text = f.fact.replace(['\n', '\r', '<', '>'], " ");
170            let line = format!("- {} (confidence: {:.2})\n", fact_text, f.confidence);
171            let line_tokens = tc.count_tokens(&line);
172            if *tokens_so_far + line_tokens > budget_tokens {
173                break;
174            }
175            body.push_str(&line);
176            *tokens_so_far += line_tokens;
177            count += 1;
178        }
179        count
180    }
181
182    match effective_strategy {
183        GraphRetrievalStrategy::Synapse => {
184            let sa_params = zeph_memory::graph::SpreadingActivationParams {
185                decay_lambda: sa_config.decay_lambda,
186                max_hops: sa_config.max_hops,
187                activation_threshold: sa_config.activation_threshold,
188                inhibition_threshold: sa_config.inhibition_threshold,
189                max_activated_nodes: sa_config.max_activated_nodes,
190                temporal_decay_rate,
191                seed_structural_weight: sa_config.seed_structural_weight,
192                seed_community_cap: sa_config.seed_community_cap,
193            };
194            let timeout_ms = effective_recall_timeout_ms(sa_config.recall_timeout_ms);
195            let t0 = Instant::now();
196            let activated_facts = match tokio::time::timeout(
197                std::time::Duration::from_millis(timeout_ms),
198                memory.recall_graph_activated(query, recall_limit, sa_params, &edge_types),
199            )
200            .await
201            {
202                Ok(Ok(facts)) => facts,
203                Ok(Err(e)) => {
204                    let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
205                    tracing::warn!("spreading activation recall failed: {e:#}");
206                    // TODO(#3576): conversation_id and turn_index not yet propagated into
207                    // context helpers; tracked for future enhancement when
208                    // ContextAssemblyView exposes them.
209                    memory.log_retrieval_failure(RetrievalFailureRecord {
210                        conversation_id: None,
211                        turn_index: 0,
212                        failure_type: RetrievalFailureType::Error,
213                        retrieval_strategy: strategy_str.clone(),
214                        query_text: query.to_owned(),
215                        query_len: query.len(),
216                        top_score: None,
217                        confidence_threshold: None,
218                        result_count: 0,
219                        latency_ms,
220                        edge_types: edge_types_json.clone(),
221                        error_context: Some(format!("{e:#}")),
222                    });
223                    Vec::new()
224                }
225                Err(_) => {
226                    let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
227                    tracing::warn!("spreading activation recall timed out ({timeout_ms}ms)");
228                    memory.log_retrieval_failure(RetrievalFailureRecord {
229                        conversation_id: None,
230                        turn_index: 0,
231                        failure_type: RetrievalFailureType::Timeout,
232                        retrieval_strategy: strategy_str.clone(),
233                        query_text: query.to_owned(),
234                        query_len: query.len(),
235                        top_score: None,
236                        confidence_threshold: None,
237                        result_count: 0,
238                        latency_ms,
239                        edge_types: edge_types_json.clone(),
240                        error_context: Some(format!("timeout after {timeout_ms}ms")),
241                    });
242                    Vec::new()
243                }
244            };
245            let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
246            if activated_facts.is_empty() {
247                memory.log_retrieval_failure(RetrievalFailureRecord {
248                    conversation_id: None,
249                    turn_index: 0,
250                    failure_type: RetrievalFailureType::NoHit,
251                    retrieval_strategy: strategy_str,
252                    query_text: query.to_owned(),
253                    query_len: query.len(),
254                    top_score: None,
255                    confidence_threshold: None,
256                    result_count: 0,
257                    latency_ms,
258                    edge_types: edge_types_json,
259                    error_context: None,
260                });
261                return Ok(None);
262            }
263            for f in &activated_facts {
264                let fact_text = f.edge.fact.replace(['\n', '\r', '<', '>'], " ");
265                let line = format!(
266                    "- {} (confidence: {:.2}, activation: {:.2})\n",
267                    fact_text, f.edge.confidence, f.activation_score
268                );
269                let line_tokens = tc.count_tokens(&line);
270                if tokens_so_far + line_tokens > budget_tokens {
271                    break;
272                }
273                body.push_str(&line);
274                tokens_so_far += line_tokens;
275            }
276        }
277        GraphRetrievalStrategy::Bfs => {
278            let t0 = Instant::now();
279            let facts = match memory
280                .recall_graph(
281                    query,
282                    recall_limit,
283                    max_hops,
284                    None,
285                    temporal_decay_rate,
286                    &edge_types,
287                )
288                .await
289            {
290                Ok(f) => f,
291                Err(e) => {
292                    let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
293                    memory.log_retrieval_failure(RetrievalFailureRecord {
294                        conversation_id: None,
295                        turn_index: 0,
296                        failure_type: RetrievalFailureType::Error,
297                        retrieval_strategy: strategy_str,
298                        query_text: query.to_owned(),
299                        query_len: query.len(),
300                        top_score: None,
301                        confidence_threshold: None,
302                        result_count: 0,
303                        latency_ms,
304                        edge_types: edge_types_json,
305                        error_context: Some(format!("{e:#}")),
306                    });
307                    return Err(e);
308                }
309            };
310            let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
311            if facts.is_empty() {
312                memory.log_retrieval_failure(RetrievalFailureRecord {
313                    conversation_id: None,
314                    turn_index: 0,
315                    failure_type: RetrievalFailureType::NoHit,
316                    retrieval_strategy: strategy_str,
317                    query_text: query.to_owned(),
318                    query_len: query.len(),
319                    top_score: None,
320                    confidence_threshold: None,
321                    result_count: 0,
322                    latency_ms,
323                    edge_types: edge_types_json,
324                    error_context: None,
325                });
326                return Ok(None);
327            }
328            append_graph_facts(&facts, &mut body, &mut tokens_so_far, budget_tokens, tc);
329        }
330        GraphRetrievalStrategy::AStar => {
331            let t0 = Instant::now();
332            let facts = match memory
333                .recall_graph_astar(
334                    query,
335                    recall_limit,
336                    max_hops,
337                    temporal_decay_rate,
338                    &edge_types,
339                )
340                .await
341            {
342                Ok(f) => f,
343                Err(e) => {
344                    let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
345                    memory.log_retrieval_failure(RetrievalFailureRecord {
346                        conversation_id: None,
347                        turn_index: 0,
348                        failure_type: RetrievalFailureType::Error,
349                        retrieval_strategy: strategy_str,
350                        query_text: query.to_owned(),
351                        query_len: query.len(),
352                        top_score: None,
353                        confidence_threshold: None,
354                        result_count: 0,
355                        latency_ms,
356                        edge_types: edge_types_json,
357                        error_context: Some(format!("{e:#}")),
358                    });
359                    return Err(e);
360                }
361            };
362            let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
363            if facts.is_empty() {
364                memory.log_retrieval_failure(RetrievalFailureRecord {
365                    conversation_id: None,
366                    turn_index: 0,
367                    failure_type: RetrievalFailureType::NoHit,
368                    retrieval_strategy: strategy_str,
369                    query_text: query.to_owned(),
370                    query_len: query.len(),
371                    top_score: None,
372                    confidence_threshold: None,
373                    result_count: 0,
374                    latency_ms,
375                    edge_types: edge_types_json,
376                    error_context: None,
377                });
378                return Ok(None);
379            }
380            append_graph_facts(&facts, &mut body, &mut tokens_so_far, budget_tokens, tc);
381        }
382        GraphRetrievalStrategy::WaterCircles => {
383            let ring_limit = graph_config.watercircles.ring_limit;
384            let t0 = Instant::now();
385            let facts = match memory
386                .recall_graph_watercircles(
387                    query,
388                    recall_limit,
389                    max_hops,
390                    ring_limit,
391                    temporal_decay_rate,
392                    &edge_types,
393                )
394                .await
395            {
396                Ok(f) => f,
397                Err(e) => {
398                    let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
399                    memory.log_retrieval_failure(RetrievalFailureRecord {
400                        conversation_id: None,
401                        turn_index: 0,
402                        failure_type: RetrievalFailureType::Error,
403                        retrieval_strategy: strategy_str,
404                        query_text: query.to_owned(),
405                        query_len: query.len(),
406                        top_score: None,
407                        confidence_threshold: None,
408                        result_count: 0,
409                        latency_ms,
410                        edge_types: edge_types_json,
411                        error_context: Some(format!("{e:#}")),
412                    });
413                    return Err(e);
414                }
415            };
416            let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
417            if facts.is_empty() {
418                memory.log_retrieval_failure(RetrievalFailureRecord {
419                    conversation_id: None,
420                    turn_index: 0,
421                    failure_type: RetrievalFailureType::NoHit,
422                    retrieval_strategy: strategy_str,
423                    query_text: query.to_owned(),
424                    query_len: query.len(),
425                    top_score: None,
426                    confidence_threshold: None,
427                    result_count: 0,
428                    latency_ms,
429                    edge_types: edge_types_json,
430                    error_context: None,
431                });
432                return Ok(None);
433            }
434            append_graph_facts(&facts, &mut body, &mut tokens_so_far, budget_tokens, tc);
435        }
436        GraphRetrievalStrategy::BeamSearch => {
437            let beam_width = graph_config.beam_search.beam_width;
438            let t0 = Instant::now();
439            let facts = match memory
440                .recall_graph_beam(
441                    query,
442                    recall_limit,
443                    beam_width,
444                    max_hops,
445                    temporal_decay_rate,
446                    &edge_types,
447                )
448                .await
449            {
450                Ok(f) => f,
451                Err(e) => {
452                    let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
453                    memory.log_retrieval_failure(RetrievalFailureRecord {
454                        conversation_id: None,
455                        turn_index: 0,
456                        failure_type: RetrievalFailureType::Error,
457                        retrieval_strategy: strategy_str,
458                        query_text: query.to_owned(),
459                        query_len: query.len(),
460                        top_score: None,
461                        confidence_threshold: None,
462                        result_count: 0,
463                        latency_ms,
464                        edge_types: edge_types_json,
465                        error_context: Some(format!("{e:#}")),
466                    });
467                    return Err(e);
468                }
469            };
470            let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
471            if facts.is_empty() {
472                memory.log_retrieval_failure(RetrievalFailureRecord {
473                    conversation_id: None,
474                    turn_index: 0,
475                    failure_type: RetrievalFailureType::NoHit,
476                    retrieval_strategy: strategy_str,
477                    query_text: query.to_owned(),
478                    query_len: query.len(),
479                    top_score: None,
480                    confidence_threshold: None,
481                    result_count: 0,
482                    latency_ms,
483                    edge_types: edge_types_json,
484                    error_context: None,
485                });
486                return Ok(None);
487            }
488            append_graph_facts(&facts, &mut body, &mut tokens_so_far, budget_tokens, tc);
489        }
490        GraphRetrievalStrategy::Hybrid => {
491            const CLASSIFIER_TIMEOUT_MS: u64 = 2_000;
492            let classifier_t0 = Instant::now();
493            let classified = if let Ok(s) = tokio::time::timeout(
494                std::time::Duration::from_millis(CLASSIFIER_TIMEOUT_MS),
495                memory.classify_graph_strategy(query),
496            )
497            .await
498            {
499                s
500            } else {
501                let latency_ms = classifier_t0
502                    .elapsed()
503                    .as_millis()
504                    .try_into()
505                    .unwrap_or(u64::MAX);
506                tracing::warn!(
507                    "hybrid strategy classifier timed out after {CLASSIFIER_TIMEOUT_MS}ms, \
508                     falling back to synapse"
509                );
510                memory.log_retrieval_failure(RetrievalFailureRecord {
511                    conversation_id: None,
512                    turn_index: 0,
513                    failure_type: RetrievalFailureType::Timeout,
514                    retrieval_strategy: "hybrid_classifier".to_owned(),
515                    query_text: query.to_owned(),
516                    query_len: query.len(),
517                    top_score: None,
518                    confidence_threshold: None,
519                    result_count: 0,
520                    latency_ms,
521                    edge_types: edge_types_json.clone(),
522                    error_context: Some(format!(
523                        "classifier timeout after {CLASSIFIER_TIMEOUT_MS}ms"
524                    )),
525                });
526                "synapse".to_owned()
527            };
528            tracing::debug!(classified_strategy = %classified, "hybrid dispatch: classified");
529            let t0 = Instant::now();
530            let facts = match classified.as_str() {
531                "astar" => {
532                    match memory
533                        .recall_graph_astar(
534                            query,
535                            recall_limit,
536                            max_hops,
537                            temporal_decay_rate,
538                            &edge_types,
539                        )
540                        .await
541                    {
542                        Ok(f) => f,
543                        Err(e) => {
544                            let latency_ms =
545                                t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
546                            memory.log_retrieval_failure(RetrievalFailureRecord {
547                                conversation_id: None,
548                                turn_index: 0,
549                                failure_type: RetrievalFailureType::Error,
550                                retrieval_strategy: strategy_str,
551                                query_text: query.to_owned(),
552                                query_len: query.len(),
553                                top_score: None,
554                                confidence_threshold: None,
555                                result_count: 0,
556                                latency_ms,
557                                edge_types: edge_types_json,
558                                error_context: Some(format!("{e:#}")),
559                            });
560                            return Err(e);
561                        }
562                    }
563                }
564                "watercircles" => {
565                    let ring_limit = graph_config.watercircles.ring_limit;
566                    match memory
567                        .recall_graph_watercircles(
568                            query,
569                            recall_limit,
570                            max_hops,
571                            ring_limit,
572                            temporal_decay_rate,
573                            &edge_types,
574                        )
575                        .await
576                    {
577                        Ok(f) => f,
578                        Err(e) => {
579                            let latency_ms =
580                                t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
581                            memory.log_retrieval_failure(RetrievalFailureRecord {
582                                conversation_id: None,
583                                turn_index: 0,
584                                failure_type: RetrievalFailureType::Error,
585                                retrieval_strategy: strategy_str,
586                                query_text: query.to_owned(),
587                                query_len: query.len(),
588                                top_score: None,
589                                confidence_threshold: None,
590                                result_count: 0,
591                                latency_ms,
592                                edge_types: edge_types_json,
593                                error_context: Some(format!("{e:#}")),
594                            });
595                            return Err(e);
596                        }
597                    }
598                }
599                "beam_search" => {
600                    let beam_width = graph_config.beam_search.beam_width;
601                    match memory
602                        .recall_graph_beam(
603                            query,
604                            recall_limit,
605                            beam_width,
606                            max_hops,
607                            temporal_decay_rate,
608                            &edge_types,
609                        )
610                        .await
611                    {
612                        Ok(f) => f,
613                        Err(e) => {
614                            let latency_ms =
615                                t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
616                            memory.log_retrieval_failure(RetrievalFailureRecord {
617                                conversation_id: None,
618                                turn_index: 0,
619                                failure_type: RetrievalFailureType::Error,
620                                retrieval_strategy: strategy_str,
621                                query_text: query.to_owned(),
622                                query_len: query.len(),
623                                top_score: None,
624                                confidence_threshold: None,
625                                result_count: 0,
626                                latency_ms,
627                                edge_types: edge_types_json,
628                                error_context: Some(format!("{e:#}")),
629                            });
630                            return Err(e);
631                        }
632                    }
633                }
634                _ => {
635                    let sa_params = zeph_memory::graph::SpreadingActivationParams {
636                        decay_lambda: sa_config.decay_lambda,
637                        max_hops: sa_config.max_hops,
638                        activation_threshold: sa_config.activation_threshold,
639                        inhibition_threshold: sa_config.inhibition_threshold,
640                        max_activated_nodes: sa_config.max_activated_nodes,
641                        temporal_decay_rate,
642                        seed_structural_weight: sa_config.seed_structural_weight,
643                        seed_community_cap: sa_config.seed_community_cap,
644                    };
645                    match memory
646                        .recall_graph_activated(query, recall_limit, sa_params, &edge_types)
647                        .await
648                    {
649                        Ok(activated) => activated
650                            .into_iter()
651                            .map(|f| zeph_memory::graph::types::GraphFact {
652                                entity_name: f.edge.source_entity_id.to_string(),
653                                relation: f.edge.relation.clone(),
654                                target_name: f.edge.target_entity_id.to_string(),
655                                fact: f.edge.fact.clone(),
656                                entity_match_score: f.activation_score,
657                                hop_distance: 0,
658                                confidence: f.edge.confidence,
659                                valid_from: Some(f.edge.valid_from.clone()),
660                                edge_type: f.edge.edge_type,
661                                retrieval_count: f.edge.retrieval_count,
662                                edge_id: Some(f.edge.id),
663                            })
664                            .collect(),
665                        Err(e) => {
666                            let latency_ms =
667                                t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
668                            memory.log_retrieval_failure(RetrievalFailureRecord {
669                                conversation_id: None,
670                                turn_index: 0,
671                                failure_type: RetrievalFailureType::Error,
672                                retrieval_strategy: strategy_str,
673                                query_text: query.to_owned(),
674                                query_len: query.len(),
675                                top_score: None,
676                                confidence_threshold: None,
677                                result_count: 0,
678                                latency_ms,
679                                edge_types: edge_types_json,
680                                error_context: Some(format!("{e:#}")),
681                            });
682                            return Err(e);
683                        }
684                    }
685                }
686            };
687            let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
688            if facts.is_empty() {
689                memory.log_retrieval_failure(RetrievalFailureRecord {
690                    conversation_id: None,
691                    turn_index: 0,
692                    failure_type: RetrievalFailureType::NoHit,
693                    retrieval_strategy: strategy_str,
694                    query_text: query.to_owned(),
695                    query_len: query.len(),
696                    top_score: None,
697                    confidence_threshold: None,
698                    result_count: 0,
699                    latency_ms,
700                    edge_types: edge_types_json,
701                    error_context: None,
702                });
703                return Ok(None);
704            }
705            append_graph_facts(&facts, &mut body, &mut tokens_so_far, budget_tokens, tc);
706        }
707        _ => {}
708    }
709
710    if body == GRAPH_FACTS_PREFIX {
711        return Ok(None);
712    }
713
714    Ok(Some(Message::from_legacy(Role::System, body)))
715}
716
717/// Fetch semantically recalled messages using individual field arguments.
718///
719/// Raw-args variant used by `zeph-core` test bridge methods and by [`fetch_semantic_recall`].
720///
721/// When `low_confidence_threshold` is `Some(t)`, results with a top score below `t` are
722/// classified as low-confidence and logged via the memory's retrieval failure logger.
723///
724/// # Errors
725///
726/// Returns [`zeph_memory::MemoryError`] when the memory backend returns an error.
727#[allow(clippy::too_many_arguments)]
728pub async fn fetch_semantic_recall_raw(
729    memory: Option<&zeph_memory::semantic::SemanticMemory>,
730    recall_limit: usize,
731    context_format: ContextFormat,
732    query: &str,
733    token_budget: usize,
734    tc: &TokenCounter,
735    router: Option<&dyn zeph_memory::AsyncMemoryRouter>,
736    low_confidence_threshold: Option<f32>,
737) -> Result<(Option<Message>, Option<f32>), zeph_memory::MemoryError> {
738    let Some(memory) = memory else {
739        return Ok((None, None));
740    };
741    if recall_limit == 0 || token_budget == 0 {
742        return Ok((None, None));
743    }
744
745    let t0 = Instant::now();
746    let recalled = if let Some(r) = router {
747        memory
748            .recall_routed_async(query, recall_limit, None, r, None)
749            .await?
750    } else {
751        memory.recall(query, recall_limit, None).await?
752    };
753    let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
754
755    if recalled.is_empty() {
756        memory.log_retrieval_failure(RetrievalFailureRecord {
757            conversation_id: None,
758            turn_index: 0,
759            failure_type: RetrievalFailureType::NoHit,
760            retrieval_strategy: "semantic".to_owned(),
761            query_text: query.to_owned(),
762            query_len: query.len(),
763            top_score: None,
764            confidence_threshold: low_confidence_threshold,
765            result_count: 0,
766            latency_ms,
767            edge_types: None,
768            error_context: None,
769        });
770        return Ok((None, None));
771    }
772
773    let top_score = recalled.first().map(|r| r.score);
774
775    if let (Some(score), Some(threshold)) = (top_score, low_confidence_threshold)
776        && score < threshold
777    {
778        memory.log_retrieval_failure(RetrievalFailureRecord {
779            conversation_id: None,
780            turn_index: 0,
781            failure_type: RetrievalFailureType::LowConfidence,
782            retrieval_strategy: "semantic".to_owned(),
783            query_text: query.to_owned(),
784            query_len: query.len(),
785            top_score: Some(score),
786            confidence_threshold: Some(threshold),
787            result_count: recalled.len(),
788            latency_ms,
789            edge_types: None,
790            error_context: None,
791        });
792    }
793    let initial_cap = (recall_limit * 512).min(token_budget * 3);
794    let mut recall_text = String::with_capacity(initial_cap);
795    recall_text.push_str(RECALL_PREFIX);
796    let mut tokens_used = tc.count_tokens(&recall_text);
797
798    for item in &recalled {
799        if item.message.content.starts_with("[skipped]")
800            || item.message.content.starts_with("[stopped]")
801        {
802            continue;
803        }
804        let entry = match context_format {
805            ContextFormat::Structured => format_structured_recall_entry(item),
806            _ => format_plain_recall_entry(item),
807        };
808        let entry_tokens = tc.count_tokens(&entry);
809        if tokens_used + entry_tokens > token_budget {
810            break;
811        }
812        recall_text.push_str(&entry);
813        tokens_used += entry_tokens;
814    }
815
816    if tokens_used > tc.count_tokens(RECALL_PREFIX) {
817        Ok((
818            Some(Message::from_parts(
819                Role::System,
820                vec![MessagePart::Recall { text: recall_text }],
821            )),
822            top_score,
823        ))
824    } else {
825        Ok((None, None))
826    }
827}
828
829/// Fetch session summaries using individual field arguments.
830///
831/// Raw-args variant used by `zeph-core` test bridge methods and by [`fetch_summaries`].
832///
833/// # Errors
834///
835/// Returns [`zeph_memory::MemoryError`] when the memory backend returns an error.
836pub async fn fetch_summaries_raw(
837    memory: Option<&zeph_memory::semantic::SemanticMemory>,
838    conversation_id: Option<zeph_memory::ConversationId>,
839    token_budget: usize,
840    tc: &TokenCounter,
841) -> Result<Option<Message>, zeph_memory::MemoryError> {
842    let (Some(memory), Some(cid)) = (memory, conversation_id) else {
843        return Ok(None);
844    };
845    if token_budget == 0 {
846        return Ok(None);
847    }
848
849    let summaries = memory.load_summaries(cid).await?;
850    if summaries.is_empty() {
851        return Ok(None);
852    }
853
854    let mut summary_text = String::from(SUMMARY_PREFIX);
855    let mut tokens_used = tc.count_tokens(&summary_text);
856
857    for summary in summaries.iter().rev() {
858        let first = summary.first_message_id.map_or(0, |m| m.0);
859        let last = summary.last_message_id.map_or(0, |m| m.0);
860        let entry = format!("- Messages {first}-{last}: {}\n", summary.content);
861        let cost = tc.count_tokens(&entry);
862        if tokens_used + cost > token_budget {
863            break;
864        }
865        summary_text.push_str(&entry);
866        tokens_used += cost;
867    }
868
869    if tokens_used > tc.count_tokens(SUMMARY_PREFIX) {
870        Ok(Some(Message::from_parts(
871            Role::System,
872            vec![MessagePart::Summary { text: summary_text }],
873        )))
874    } else {
875        Ok(None)
876    }
877}
878
879/// Fetch cross-session context summaries using individual field arguments.
880///
881/// Raw-args variant used by `zeph-core` test bridge methods and by [`fetch_cross_session`].
882///
883/// # Errors
884///
885/// Returns [`zeph_memory::MemoryError`] when the memory backend returns an error.
886pub async fn fetch_cross_session_raw(
887    memory: Option<&zeph_memory::semantic::SemanticMemory>,
888    conversation_id: Option<zeph_memory::ConversationId>,
889    cross_session_score_threshold: f32,
890    query: &str,
891    token_budget: usize,
892    tc: &TokenCounter,
893) -> Result<Option<Message>, zeph_memory::MemoryError> {
894    let (Some(memory), Some(cid)) = (memory, conversation_id) else {
895        return Ok(None);
896    };
897    if token_budget == 0 {
898        return Ok(None);
899    }
900
901    let results: Vec<_> = memory
902        .search_session_summaries(query, 5, Some(cid))
903        .await?
904        .into_iter()
905        .filter(|r| r.score >= cross_session_score_threshold)
906        .collect();
907    if results.is_empty() {
908        return Ok(None);
909    }
910
911    let mut text = String::from(CROSS_SESSION_PREFIX);
912    let mut tokens_used = tc.count_tokens(&text);
913
914    for item in &results {
915        let entry = format!("- {}\n", item.summary_text);
916        let cost = tc.count_tokens(&entry);
917        if tokens_used + cost > token_budget {
918            break;
919        }
920        text.push_str(&entry);
921        tokens_used += cost;
922    }
923
924    if tokens_used > tc.count_tokens(CROSS_SESSION_PREFIX) {
925        Ok(Some(Message::from_parts(
926            Role::System,
927            vec![MessagePart::CrossSession { text }],
928        )))
929    } else {
930        Ok(None)
931    }
932}
933
934/// Fetch semantically recalled messages for the given query and enforce the token budget.
935///
936/// Delegates to [`fetch_semantic_recall_raw`] using fields from `view`.
937///
938/// Returns `(None, None)` when memory is absent, recall is disabled, the budget is zero,
939/// or the recalled set is empty.
940///
941/// The second element of the tuple is the similarity score of the top recalled entry, used
942/// by the caller to track recall confidence for telemetry.
943///
944/// # Errors
945///
946/// Returns [`ContextError::Memory`] when the memory recall backend returns an error.
947pub async fn fetch_semantic_recall(
948    view: &ContextAssemblyView<'_>,
949    query: &str,
950    token_budget: usize,
951    tc: &TokenCounter,
952    router: Option<&dyn zeph_memory::AsyncMemoryRouter>,
953) -> Result<(Option<Message>, Option<f32>), ContextError> {
954    fetch_semantic_recall_raw(
955        view.memory.as_deref(),
956        view.recall_limit,
957        view.context_format,
958        query,
959        token_budget,
960        tc,
961        router,
962        None,
963    )
964    .await
965    .map_err(ContextError::Memory)
966}
967
968fn format_plain_recall_entry(item: &zeph_memory::RecalledMessage) -> String {
969    let role_label = match item.message.role {
970        Role::User => "user",
971        Role::Assistant => "assistant",
972        Role::System => "system",
973    };
974    format!("- [{}] {}\n", role_label, item.message.content)
975}
976
977#[allow(clippy::map_unwrap_or)]
978fn format_structured_recall_entry(item: &zeph_memory::RecalledMessage) -> String {
979    let source = match item.message.role {
980        Role::User => "user",
981        Role::Assistant => "assistant",
982        Role::System => "system",
983    };
984    // Use compacted_at as a proxy for message age when available; otherwise "unknown".
985    // A full timestamp lookup from SQLite would require an async DB call in the assembler
986    // and is deferred to a future enhancement (TODO: enhance when message timestamps are
987    // propagated into RecalledMessage).
988    let date = item
989        .message
990        .metadata
991        .compacted_at
992        .and_then(|ts| chrono::DateTime::from_timestamp(ts, 0))
993        .map(|dt| dt.format("%Y-%m-%d").to_string())
994        .unwrap_or_else(|| "unknown".to_owned());
995    format!(
996        "[Memory | {} | {} | relevance: {:.2}]\n{}\n",
997        source, date, item.score, item.message.content
998    )
999}
1000
1001/// Fetch session summaries for the current conversation and enforce the token budget.
1002///
1003/// Delegates to [`fetch_summaries_raw`] using fields from `view`.
1004///
1005/// Returns `None` when memory or the conversation ID is absent, the budget is zero,
1006/// or no summaries exist yet.
1007///
1008/// # Errors
1009///
1010/// Returns [`ContextError::Memory`] when the memory backend returns an error.
1011pub async fn fetch_summaries(
1012    view: &ContextAssemblyView<'_>,
1013    token_budget: usize,
1014    tc: &TokenCounter,
1015) -> Result<Option<Message>, ContextError> {
1016    fetch_summaries_raw(
1017        view.memory.as_deref(),
1018        view.conversation_id,
1019        token_budget,
1020        tc,
1021    )
1022    .await
1023    .map_err(ContextError::Memory)
1024}
1025
1026/// Fetch cross-session context summaries for the given query and enforce the token budget.
1027///
1028/// Delegates to [`fetch_cross_session_raw`] using fields from `view`.
1029///
1030/// Results are filtered by `view.cross_session_score_threshold` before token counting,
1031/// and the current conversation is excluded from the search results.
1032///
1033/// Returns `None` when memory or the conversation ID is absent, the budget is zero,
1034/// no results exceed the threshold, or the result set is empty.
1035///
1036/// # Errors
1037///
1038/// Returns [`ContextError::Memory`] when the memory backend returns an error.
1039pub async fn fetch_cross_session(
1040    view: &ContextAssemblyView<'_>,
1041    query: &str,
1042    token_budget: usize,
1043    tc: &TokenCounter,
1044) -> Result<Option<Message>, ContextError> {
1045    fetch_cross_session_raw(
1046        view.memory.as_deref(),
1047        view.conversation_id,
1048        view.cross_session_score_threshold,
1049        query,
1050        token_budget,
1051        tc,
1052    )
1053    .await
1054    .map_err(ContextError::Memory)
1055}
1056
1057/// Budget state injected into the volatile system prompt section.
1058///
1059/// All fields are optional — omitted when the corresponding data source is unavailable.
1060/// [`BudgetHint::format_xml`] returns `None` when all fields would be absent.
1061///
1062/// Callers should construct this from cost-tracker and tool-orchestrator state, then call
1063/// `format_xml` and append the result to the system prompt when `Some`.
1064pub struct BudgetHint {
1065    /// Remaining daily budget in US cents, if a daily limit is configured.
1066    pub remaining_cost_cents: Option<f64>,
1067    /// Total daily budget in US cents, if a daily limit is configured.
1068    pub total_budget_cents: Option<f64>,
1069    /// Remaining tool-call iterations this turn.
1070    pub remaining_tool_calls: usize,
1071    /// Maximum allowed tool-call iterations per turn (0 = no limit configured).
1072    pub max_tool_calls: usize,
1073}
1074
1075impl BudgetHint {
1076    /// Render the budget hint as an XML fragment for injection into the system prompt.
1077    ///
1078    /// Returns `None` when no meaningful budget data is available — callers must skip
1079    /// injection rather than injecting an empty `<budget></budget>` block.
1080    ///
1081    /// # Examples
1082    ///
1083    /// ```
1084    /// use zeph_agent_context::helpers::BudgetHint;
1085    ///
1086    /// let hint = BudgetHint {
1087    ///     remaining_cost_cents: Some(50.0),
1088    ///     total_budget_cents: Some(100.0),
1089    ///     remaining_tool_calls: 8,
1090    ///     max_tool_calls: 10,
1091    /// };
1092    /// let xml = hint.format_xml().unwrap();
1093    /// assert!(xml.contains("<remaining_cost_cents>50.00</remaining_cost_cents>"));
1094    /// assert!(xml.contains("<remaining_tool_calls>8</remaining_tool_calls>"));
1095    /// ```
1096    #[must_use]
1097    pub fn format_xml(&self) -> Option<String> {
1098        let has_cost = self.remaining_cost_cents.is_some();
1099        // Always include tool call budget — max_tool_calls > 0 in any real config.
1100        if !has_cost && self.max_tool_calls == 0 {
1101            return None;
1102        }
1103        let mut s = String::from("<budget>");
1104        if let Some(remaining) = self.remaining_cost_cents {
1105            let _ = write!(
1106                s,
1107                "\n<remaining_cost_cents>{remaining:.2}</remaining_cost_cents>"
1108            );
1109        }
1110        if let Some(total) = self.total_budget_cents {
1111            let _ = write!(s, "\n<total_budget_cents>{total:.2}</total_budget_cents>");
1112        }
1113        if self.max_tool_calls > 0 {
1114            let _ = write!(
1115                s,
1116                "\n<remaining_tool_calls>{}</remaining_tool_calls>",
1117                self.remaining_tool_calls
1118            );
1119            let _ = write!(
1120                s,
1121                "\n<max_tool_calls>{}</max_tool_calls>",
1122                self.max_tool_calls
1123            );
1124        }
1125        s.push_str("\n</budget>");
1126        Some(s)
1127    }
1128}
1129
1130#[cfg(test)]
1131mod budget_hint_tests {
1132    use super::*;
1133
1134    #[test]
1135    fn format_xml_none_when_no_data() {
1136        let hint = BudgetHint {
1137            remaining_cost_cents: None,
1138            total_budget_cents: None,
1139            remaining_tool_calls: 0,
1140            max_tool_calls: 0,
1141        };
1142        assert!(hint.format_xml().is_none());
1143    }
1144
1145    #[test]
1146    fn format_xml_with_cost_only() {
1147        let hint = BudgetHint {
1148            remaining_cost_cents: Some(25.5),
1149            total_budget_cents: Some(100.0),
1150            remaining_tool_calls: 0,
1151            max_tool_calls: 0,
1152        };
1153        let xml = hint.format_xml().unwrap();
1154        assert!(xml.contains("<remaining_cost_cents>25.50</remaining_cost_cents>"));
1155        assert!(xml.contains("<total_budget_cents>100.00</total_budget_cents>"));
1156    }
1157
1158    #[test]
1159    fn format_xml_with_tool_calls_only() {
1160        let hint = BudgetHint {
1161            remaining_cost_cents: None,
1162            total_budget_cents: None,
1163            remaining_tool_calls: 3,
1164            max_tool_calls: 10,
1165        };
1166        let xml = hint.format_xml().unwrap();
1167        assert!(xml.contains("<remaining_tool_calls>3</remaining_tool_calls>"));
1168        assert!(xml.contains("<max_tool_calls>10</max_tool_calls>"));
1169    }
1170
1171    #[test]
1172    fn format_xml_with_all_fields() {
1173        let hint = BudgetHint {
1174            remaining_cost_cents: Some(50.0),
1175            total_budget_cents: Some(100.0),
1176            remaining_tool_calls: 8,
1177            max_tool_calls: 10,
1178        };
1179        let xml = hint.format_xml().unwrap();
1180        assert!(xml.starts_with("<budget>"));
1181        assert!(xml.ends_with("</budget>"));
1182    }
1183}