1use std::fmt::Write as _;
14use std::time::Instant;
15
16use zeph_config::ContextFormat;
17use zeph_llm::provider::{Message, MessagePart, Role};
18use zeph_memory::{RetrievalFailureRecord, RetrievalFailureType, TokenCounter};
19
20use crate::error::ContextError;
21use crate::state::ContextAssemblyView;
22
23pub const PERSONA_PREFIX: &str = "[Persona context]\n";
25pub const TRAJECTORY_PREFIX: &str = "[Past experience]\n";
27pub const TREE_MEMORY_PREFIX: &str = "[Memory summary]\n";
29pub const REASONING_PREFIX: &str = "[Reasoning Strategy]\n";
31
32pub const GRAPH_FACTS_PREFIX: &str = "[known facts]\n";
34pub const RECALL_PREFIX: &str = "[semantic recall]\n";
36pub const SUMMARY_PREFIX: &str = "[conversation summaries]\n";
38pub const CROSS_SESSION_PREFIX: &str = "[cross-session context]\n";
40
41pub const CORRECTIONS_PREFIX: &str = "[past corrections]\n";
43pub const CODE_CONTEXT_PREFIX: &str = "[code context]\n";
45pub const SESSION_DIGEST_PREFIX: &str = "[Session digest from previous interaction]\n";
47pub const LSP_NOTE_PREFIX: &str = "[lsp ";
49pub const DOCUMENT_RAG_PREFIX: &str = "## Relevant documents\n";
51
52#[must_use]
56pub fn truncate_chars(s: &str, max_chars: usize) -> String {
57 zeph_common::text::truncate_to_chars(s, max_chars)
58}
59
60#[must_use]
65pub fn format_correction_note(correction_text: &str) -> String {
66 format!(
67 "- Past user correction: \"{}\"",
68 truncate_chars(correction_text, 200)
69 )
70}
71
72pub fn effective_recall_timeout_ms(configured: u64) -> u64 {
77 if configured == 0 {
78 tracing::warn!(
79 "recall_timeout_ms is 0, which would disable spreading activation recall; \
80 clamping to 100ms"
81 );
82 100
83 } else {
84 configured
85 }
86}
87
88pub async fn fetch_graph_facts(
99 view: &ContextAssemblyView<'_>,
100 query: &str,
101 budget_tokens: usize,
102 tc: &TokenCounter,
103) -> Result<Option<Message>, ContextError> {
104 fetch_graph_facts_raw(
105 view.memory.as_deref(),
106 &view.graph_config,
107 query,
108 budget_tokens,
109 tc,
110 )
111 .await
112 .map_err(ContextError::Memory)
113}
114
115#[allow(clippy::too_many_lines, clippy::items_after_statements)]
126pub async fn fetch_graph_facts_raw(
127 memory: Option<&zeph_memory::semantic::SemanticMemory>,
128 graph_config: &zeph_config::GraphConfig,
129 query: &str,
130 budget_tokens: usize,
131 tc: &TokenCounter,
132) -> Result<Option<Message>, zeph_memory::MemoryError> {
133 if budget_tokens == 0 || !graph_config.enabled {
134 return Ok(None);
135 }
136 let Some(memory) = memory else {
137 return Ok(None);
138 };
139 let recall_limit = graph_config.recall_limit;
140 let temporal_decay_rate = graph_config.temporal_decay_rate;
141 let edge_types = zeph_memory::classify_graph_subgraph(query);
142 let sa_config = &graph_config.spreading_activation;
143
144 let mut body = String::from(GRAPH_FACTS_PREFIX);
145 let mut tokens_so_far = tc.count_tokens(&body);
146 let max_hops = graph_config.max_hops;
147
148 use zeph_config::memory::GraphRetrievalStrategy;
149 let effective_strategy = if sa_config.enabled {
150 GraphRetrievalStrategy::Synapse
151 } else {
152 graph_config.retrieval_strategy
153 };
154
155 let _span = tracing::info_span!("memory.graph.dispatch", ?effective_strategy).entered();
156 let strategy_str = format!("{effective_strategy:?}").to_lowercase();
157 let edge_types_json = serde_json::to_string(&edge_types).ok();
158
159 fn append_graph_facts(
161 facts: &[zeph_memory::graph::types::GraphFact],
162 body: &mut String,
163 tokens_so_far: &mut usize,
164 budget_tokens: usize,
165 tc: &TokenCounter,
166 ) -> usize {
167 let mut count = 0;
168 for f in facts {
169 let fact_text = f.fact.replace(['\n', '\r', '<', '>'], " ");
170 let line = format!("- {} (confidence: {:.2})\n", fact_text, f.confidence);
171 let line_tokens = tc.count_tokens(&line);
172 if *tokens_so_far + line_tokens > budget_tokens {
173 break;
174 }
175 body.push_str(&line);
176 *tokens_so_far += line_tokens;
177 count += 1;
178 }
179 count
180 }
181
182 match effective_strategy {
183 GraphRetrievalStrategy::Synapse => {
184 let sa_params = zeph_memory::graph::SpreadingActivationParams {
185 decay_lambda: sa_config.decay_lambda,
186 max_hops: sa_config.max_hops,
187 activation_threshold: sa_config.activation_threshold,
188 inhibition_threshold: sa_config.inhibition_threshold,
189 max_activated_nodes: sa_config.max_activated_nodes,
190 temporal_decay_rate,
191 seed_structural_weight: sa_config.seed_structural_weight,
192 seed_community_cap: sa_config.seed_community_cap,
193 };
194 let timeout_ms = effective_recall_timeout_ms(sa_config.recall_timeout_ms);
195 let t0 = Instant::now();
196 let activated_facts = match tokio::time::timeout(
197 std::time::Duration::from_millis(timeout_ms),
198 memory.recall_graph_activated(query, recall_limit, sa_params, &edge_types),
199 )
200 .await
201 {
202 Ok(Ok(facts)) => facts,
203 Ok(Err(e)) => {
204 let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
205 tracing::warn!("spreading activation recall failed: {e:#}");
206 memory.log_retrieval_failure(RetrievalFailureRecord {
210 conversation_id: None,
211 turn_index: 0,
212 failure_type: RetrievalFailureType::Error,
213 retrieval_strategy: strategy_str.clone(),
214 query_text: query.to_owned(),
215 query_len: query.len(),
216 top_score: None,
217 confidence_threshold: None,
218 result_count: 0,
219 latency_ms,
220 edge_types: edge_types_json.clone(),
221 error_context: Some(format!("{e:#}")),
222 });
223 Vec::new()
224 }
225 Err(_) => {
226 let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
227 tracing::warn!("spreading activation recall timed out ({timeout_ms}ms)");
228 memory.log_retrieval_failure(RetrievalFailureRecord {
229 conversation_id: None,
230 turn_index: 0,
231 failure_type: RetrievalFailureType::Timeout,
232 retrieval_strategy: strategy_str.clone(),
233 query_text: query.to_owned(),
234 query_len: query.len(),
235 top_score: None,
236 confidence_threshold: None,
237 result_count: 0,
238 latency_ms,
239 edge_types: edge_types_json.clone(),
240 error_context: Some(format!("timeout after {timeout_ms}ms")),
241 });
242 Vec::new()
243 }
244 };
245 let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
246 if activated_facts.is_empty() {
247 memory.log_retrieval_failure(RetrievalFailureRecord {
248 conversation_id: None,
249 turn_index: 0,
250 failure_type: RetrievalFailureType::NoHit,
251 retrieval_strategy: strategy_str,
252 query_text: query.to_owned(),
253 query_len: query.len(),
254 top_score: None,
255 confidence_threshold: None,
256 result_count: 0,
257 latency_ms,
258 edge_types: edge_types_json,
259 error_context: None,
260 });
261 return Ok(None);
262 }
263 for f in &activated_facts {
264 let fact_text = f.edge.fact.replace(['\n', '\r', '<', '>'], " ");
265 let line = format!(
266 "- {} (confidence: {:.2}, activation: {:.2})\n",
267 fact_text, f.edge.confidence, f.activation_score
268 );
269 let line_tokens = tc.count_tokens(&line);
270 if tokens_so_far + line_tokens > budget_tokens {
271 break;
272 }
273 body.push_str(&line);
274 tokens_so_far += line_tokens;
275 }
276 }
277 GraphRetrievalStrategy::Bfs => {
278 let t0 = Instant::now();
279 let facts = match memory
280 .recall_graph(
281 query,
282 recall_limit,
283 max_hops,
284 None,
285 temporal_decay_rate,
286 &edge_types,
287 )
288 .await
289 {
290 Ok(f) => f,
291 Err(e) => {
292 let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
293 memory.log_retrieval_failure(RetrievalFailureRecord {
294 conversation_id: None,
295 turn_index: 0,
296 failure_type: RetrievalFailureType::Error,
297 retrieval_strategy: strategy_str,
298 query_text: query.to_owned(),
299 query_len: query.len(),
300 top_score: None,
301 confidence_threshold: None,
302 result_count: 0,
303 latency_ms,
304 edge_types: edge_types_json,
305 error_context: Some(format!("{e:#}")),
306 });
307 return Err(e);
308 }
309 };
310 let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
311 if facts.is_empty() {
312 memory.log_retrieval_failure(RetrievalFailureRecord {
313 conversation_id: None,
314 turn_index: 0,
315 failure_type: RetrievalFailureType::NoHit,
316 retrieval_strategy: strategy_str,
317 query_text: query.to_owned(),
318 query_len: query.len(),
319 top_score: None,
320 confidence_threshold: None,
321 result_count: 0,
322 latency_ms,
323 edge_types: edge_types_json,
324 error_context: None,
325 });
326 return Ok(None);
327 }
328 append_graph_facts(&facts, &mut body, &mut tokens_so_far, budget_tokens, tc);
329 }
330 GraphRetrievalStrategy::AStar => {
331 let t0 = Instant::now();
332 let facts = match memory
333 .recall_graph_astar(
334 query,
335 recall_limit,
336 max_hops,
337 temporal_decay_rate,
338 &edge_types,
339 )
340 .await
341 {
342 Ok(f) => f,
343 Err(e) => {
344 let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
345 memory.log_retrieval_failure(RetrievalFailureRecord {
346 conversation_id: None,
347 turn_index: 0,
348 failure_type: RetrievalFailureType::Error,
349 retrieval_strategy: strategy_str,
350 query_text: query.to_owned(),
351 query_len: query.len(),
352 top_score: None,
353 confidence_threshold: None,
354 result_count: 0,
355 latency_ms,
356 edge_types: edge_types_json,
357 error_context: Some(format!("{e:#}")),
358 });
359 return Err(e);
360 }
361 };
362 let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
363 if facts.is_empty() {
364 memory.log_retrieval_failure(RetrievalFailureRecord {
365 conversation_id: None,
366 turn_index: 0,
367 failure_type: RetrievalFailureType::NoHit,
368 retrieval_strategy: strategy_str,
369 query_text: query.to_owned(),
370 query_len: query.len(),
371 top_score: None,
372 confidence_threshold: None,
373 result_count: 0,
374 latency_ms,
375 edge_types: edge_types_json,
376 error_context: None,
377 });
378 return Ok(None);
379 }
380 append_graph_facts(&facts, &mut body, &mut tokens_so_far, budget_tokens, tc);
381 }
382 GraphRetrievalStrategy::WaterCircles => {
383 let ring_limit = graph_config.watercircles.ring_limit;
384 let t0 = Instant::now();
385 let facts = match memory
386 .recall_graph_watercircles(
387 query,
388 recall_limit,
389 max_hops,
390 ring_limit,
391 temporal_decay_rate,
392 &edge_types,
393 )
394 .await
395 {
396 Ok(f) => f,
397 Err(e) => {
398 let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
399 memory.log_retrieval_failure(RetrievalFailureRecord {
400 conversation_id: None,
401 turn_index: 0,
402 failure_type: RetrievalFailureType::Error,
403 retrieval_strategy: strategy_str,
404 query_text: query.to_owned(),
405 query_len: query.len(),
406 top_score: None,
407 confidence_threshold: None,
408 result_count: 0,
409 latency_ms,
410 edge_types: edge_types_json,
411 error_context: Some(format!("{e:#}")),
412 });
413 return Err(e);
414 }
415 };
416 let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
417 if facts.is_empty() {
418 memory.log_retrieval_failure(RetrievalFailureRecord {
419 conversation_id: None,
420 turn_index: 0,
421 failure_type: RetrievalFailureType::NoHit,
422 retrieval_strategy: strategy_str,
423 query_text: query.to_owned(),
424 query_len: query.len(),
425 top_score: None,
426 confidence_threshold: None,
427 result_count: 0,
428 latency_ms,
429 edge_types: edge_types_json,
430 error_context: None,
431 });
432 return Ok(None);
433 }
434 append_graph_facts(&facts, &mut body, &mut tokens_so_far, budget_tokens, tc);
435 }
436 GraphRetrievalStrategy::BeamSearch => {
437 let beam_width = graph_config.beam_search.beam_width;
438 let t0 = Instant::now();
439 let facts = match memory
440 .recall_graph_beam(
441 query,
442 recall_limit,
443 beam_width,
444 max_hops,
445 temporal_decay_rate,
446 &edge_types,
447 )
448 .await
449 {
450 Ok(f) => f,
451 Err(e) => {
452 let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
453 memory.log_retrieval_failure(RetrievalFailureRecord {
454 conversation_id: None,
455 turn_index: 0,
456 failure_type: RetrievalFailureType::Error,
457 retrieval_strategy: strategy_str,
458 query_text: query.to_owned(),
459 query_len: query.len(),
460 top_score: None,
461 confidence_threshold: None,
462 result_count: 0,
463 latency_ms,
464 edge_types: edge_types_json,
465 error_context: Some(format!("{e:#}")),
466 });
467 return Err(e);
468 }
469 };
470 let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
471 if facts.is_empty() {
472 memory.log_retrieval_failure(RetrievalFailureRecord {
473 conversation_id: None,
474 turn_index: 0,
475 failure_type: RetrievalFailureType::NoHit,
476 retrieval_strategy: strategy_str,
477 query_text: query.to_owned(),
478 query_len: query.len(),
479 top_score: None,
480 confidence_threshold: None,
481 result_count: 0,
482 latency_ms,
483 edge_types: edge_types_json,
484 error_context: None,
485 });
486 return Ok(None);
487 }
488 append_graph_facts(&facts, &mut body, &mut tokens_so_far, budget_tokens, tc);
489 }
490 GraphRetrievalStrategy::Hybrid => {
491 const CLASSIFIER_TIMEOUT_MS: u64 = 2_000;
492 let classifier_t0 = Instant::now();
493 let classified = if let Ok(s) = tokio::time::timeout(
494 std::time::Duration::from_millis(CLASSIFIER_TIMEOUT_MS),
495 memory.classify_graph_strategy(query),
496 )
497 .await
498 {
499 s
500 } else {
501 let latency_ms = classifier_t0
502 .elapsed()
503 .as_millis()
504 .try_into()
505 .unwrap_or(u64::MAX);
506 tracing::warn!(
507 "hybrid strategy classifier timed out after {CLASSIFIER_TIMEOUT_MS}ms, \
508 falling back to synapse"
509 );
510 memory.log_retrieval_failure(RetrievalFailureRecord {
511 conversation_id: None,
512 turn_index: 0,
513 failure_type: RetrievalFailureType::Timeout,
514 retrieval_strategy: "hybrid_classifier".to_owned(),
515 query_text: query.to_owned(),
516 query_len: query.len(),
517 top_score: None,
518 confidence_threshold: None,
519 result_count: 0,
520 latency_ms,
521 edge_types: edge_types_json.clone(),
522 error_context: Some(format!(
523 "classifier timeout after {CLASSIFIER_TIMEOUT_MS}ms"
524 )),
525 });
526 "synapse".to_owned()
527 };
528 tracing::debug!(classified_strategy = %classified, "hybrid dispatch: classified");
529 let t0 = Instant::now();
530 let facts = match classified.as_str() {
531 "astar" => {
532 match memory
533 .recall_graph_astar(
534 query,
535 recall_limit,
536 max_hops,
537 temporal_decay_rate,
538 &edge_types,
539 )
540 .await
541 {
542 Ok(f) => f,
543 Err(e) => {
544 let latency_ms =
545 t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
546 memory.log_retrieval_failure(RetrievalFailureRecord {
547 conversation_id: None,
548 turn_index: 0,
549 failure_type: RetrievalFailureType::Error,
550 retrieval_strategy: strategy_str,
551 query_text: query.to_owned(),
552 query_len: query.len(),
553 top_score: None,
554 confidence_threshold: None,
555 result_count: 0,
556 latency_ms,
557 edge_types: edge_types_json,
558 error_context: Some(format!("{e:#}")),
559 });
560 return Err(e);
561 }
562 }
563 }
564 "watercircles" => {
565 let ring_limit = graph_config.watercircles.ring_limit;
566 match memory
567 .recall_graph_watercircles(
568 query,
569 recall_limit,
570 max_hops,
571 ring_limit,
572 temporal_decay_rate,
573 &edge_types,
574 )
575 .await
576 {
577 Ok(f) => f,
578 Err(e) => {
579 let latency_ms =
580 t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
581 memory.log_retrieval_failure(RetrievalFailureRecord {
582 conversation_id: None,
583 turn_index: 0,
584 failure_type: RetrievalFailureType::Error,
585 retrieval_strategy: strategy_str,
586 query_text: query.to_owned(),
587 query_len: query.len(),
588 top_score: None,
589 confidence_threshold: None,
590 result_count: 0,
591 latency_ms,
592 edge_types: edge_types_json,
593 error_context: Some(format!("{e:#}")),
594 });
595 return Err(e);
596 }
597 }
598 }
599 "beam_search" => {
600 let beam_width = graph_config.beam_search.beam_width;
601 match memory
602 .recall_graph_beam(
603 query,
604 recall_limit,
605 beam_width,
606 max_hops,
607 temporal_decay_rate,
608 &edge_types,
609 )
610 .await
611 {
612 Ok(f) => f,
613 Err(e) => {
614 let latency_ms =
615 t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
616 memory.log_retrieval_failure(RetrievalFailureRecord {
617 conversation_id: None,
618 turn_index: 0,
619 failure_type: RetrievalFailureType::Error,
620 retrieval_strategy: strategy_str,
621 query_text: query.to_owned(),
622 query_len: query.len(),
623 top_score: None,
624 confidence_threshold: None,
625 result_count: 0,
626 latency_ms,
627 edge_types: edge_types_json,
628 error_context: Some(format!("{e:#}")),
629 });
630 return Err(e);
631 }
632 }
633 }
634 _ => {
635 let sa_params = zeph_memory::graph::SpreadingActivationParams {
636 decay_lambda: sa_config.decay_lambda,
637 max_hops: sa_config.max_hops,
638 activation_threshold: sa_config.activation_threshold,
639 inhibition_threshold: sa_config.inhibition_threshold,
640 max_activated_nodes: sa_config.max_activated_nodes,
641 temporal_decay_rate,
642 seed_structural_weight: sa_config.seed_structural_weight,
643 seed_community_cap: sa_config.seed_community_cap,
644 };
645 match memory
646 .recall_graph_activated(query, recall_limit, sa_params, &edge_types)
647 .await
648 {
649 Ok(activated) => activated
650 .into_iter()
651 .map(|f| zeph_memory::graph::types::GraphFact {
652 entity_name: f.edge.source_entity_id.to_string(),
653 relation: f.edge.relation.clone(),
654 target_name: f.edge.target_entity_id.to_string(),
655 fact: f.edge.fact.clone(),
656 entity_match_score: f.activation_score,
657 hop_distance: 0,
658 confidence: f.edge.confidence,
659 valid_from: Some(f.edge.valid_from.clone()),
660 edge_type: f.edge.edge_type,
661 retrieval_count: f.edge.retrieval_count,
662 edge_id: Some(f.edge.id),
663 })
664 .collect(),
665 Err(e) => {
666 let latency_ms =
667 t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
668 memory.log_retrieval_failure(RetrievalFailureRecord {
669 conversation_id: None,
670 turn_index: 0,
671 failure_type: RetrievalFailureType::Error,
672 retrieval_strategy: strategy_str,
673 query_text: query.to_owned(),
674 query_len: query.len(),
675 top_score: None,
676 confidence_threshold: None,
677 result_count: 0,
678 latency_ms,
679 edge_types: edge_types_json,
680 error_context: Some(format!("{e:#}")),
681 });
682 return Err(e);
683 }
684 }
685 }
686 };
687 let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
688 if facts.is_empty() {
689 memory.log_retrieval_failure(RetrievalFailureRecord {
690 conversation_id: None,
691 turn_index: 0,
692 failure_type: RetrievalFailureType::NoHit,
693 retrieval_strategy: strategy_str,
694 query_text: query.to_owned(),
695 query_len: query.len(),
696 top_score: None,
697 confidence_threshold: None,
698 result_count: 0,
699 latency_ms,
700 edge_types: edge_types_json,
701 error_context: None,
702 });
703 return Ok(None);
704 }
705 append_graph_facts(&facts, &mut body, &mut tokens_so_far, budget_tokens, tc);
706 }
707 }
708
709 if body == GRAPH_FACTS_PREFIX {
710 return Ok(None);
711 }
712
713 Ok(Some(Message::from_legacy(Role::System, body)))
714}
715
716#[allow(clippy::too_many_arguments)]
727pub async fn fetch_semantic_recall_raw(
728 memory: Option<&zeph_memory::semantic::SemanticMemory>,
729 recall_limit: usize,
730 context_format: ContextFormat,
731 query: &str,
732 token_budget: usize,
733 tc: &TokenCounter,
734 router: Option<&dyn zeph_memory::AsyncMemoryRouter>,
735 low_confidence_threshold: Option<f32>,
736) -> Result<(Option<Message>, Option<f32>), zeph_memory::MemoryError> {
737 let Some(memory) = memory else {
738 return Ok((None, None));
739 };
740 if recall_limit == 0 || token_budget == 0 {
741 return Ok((None, None));
742 }
743
744 let t0 = Instant::now();
745 let recalled = if let Some(r) = router {
746 memory
747 .recall_routed_async(query, recall_limit, None, r)
748 .await?
749 } else {
750 memory.recall(query, recall_limit, None).await?
751 };
752 let latency_ms = t0.elapsed().as_millis().try_into().unwrap_or(u64::MAX);
753
754 if recalled.is_empty() {
755 memory.log_retrieval_failure(RetrievalFailureRecord {
756 conversation_id: None,
757 turn_index: 0,
758 failure_type: RetrievalFailureType::NoHit,
759 retrieval_strategy: "semantic".to_owned(),
760 query_text: query.to_owned(),
761 query_len: query.len(),
762 top_score: None,
763 confidence_threshold: low_confidence_threshold,
764 result_count: 0,
765 latency_ms,
766 edge_types: None,
767 error_context: None,
768 });
769 return Ok((None, None));
770 }
771
772 let top_score = recalled.first().map(|r| r.score);
773
774 if let (Some(score), Some(threshold)) = (top_score, low_confidence_threshold)
775 && score < threshold
776 {
777 memory.log_retrieval_failure(RetrievalFailureRecord {
778 conversation_id: None,
779 turn_index: 0,
780 failure_type: RetrievalFailureType::LowConfidence,
781 retrieval_strategy: "semantic".to_owned(),
782 query_text: query.to_owned(),
783 query_len: query.len(),
784 top_score: Some(score),
785 confidence_threshold: Some(threshold),
786 result_count: recalled.len(),
787 latency_ms,
788 edge_types: None,
789 error_context: None,
790 });
791 }
792 let initial_cap = (recall_limit * 512).min(token_budget * 3);
793 let mut recall_text = String::with_capacity(initial_cap);
794 recall_text.push_str(RECALL_PREFIX);
795 let mut tokens_used = tc.count_tokens(&recall_text);
796
797 for item in &recalled {
798 if item.message.content.starts_with("[skipped]")
799 || item.message.content.starts_with("[stopped]")
800 {
801 continue;
802 }
803 let entry = match context_format {
804 ContextFormat::Structured => format_structured_recall_entry(item),
805 ContextFormat::Plain => format_plain_recall_entry(item),
806 };
807 let entry_tokens = tc.count_tokens(&entry);
808 if tokens_used + entry_tokens > token_budget {
809 break;
810 }
811 recall_text.push_str(&entry);
812 tokens_used += entry_tokens;
813 }
814
815 if tokens_used > tc.count_tokens(RECALL_PREFIX) {
816 Ok((
817 Some(Message::from_parts(
818 Role::System,
819 vec![MessagePart::Recall { text: recall_text }],
820 )),
821 top_score,
822 ))
823 } else {
824 Ok((None, None))
825 }
826}
827
828pub async fn fetch_summaries_raw(
836 memory: Option<&zeph_memory::semantic::SemanticMemory>,
837 conversation_id: Option<zeph_memory::ConversationId>,
838 token_budget: usize,
839 tc: &TokenCounter,
840) -> Result<Option<Message>, zeph_memory::MemoryError> {
841 let (Some(memory), Some(cid)) = (memory, conversation_id) else {
842 return Ok(None);
843 };
844 if token_budget == 0 {
845 return Ok(None);
846 }
847
848 let summaries = memory.load_summaries(cid).await?;
849 if summaries.is_empty() {
850 return Ok(None);
851 }
852
853 let mut summary_text = String::from(SUMMARY_PREFIX);
854 let mut tokens_used = tc.count_tokens(&summary_text);
855
856 for summary in summaries.iter().rev() {
857 let first = summary.first_message_id.map_or(0, |m| m.0);
858 let last = summary.last_message_id.map_or(0, |m| m.0);
859 let entry = format!("- Messages {first}-{last}: {}\n", summary.content);
860 let cost = tc.count_tokens(&entry);
861 if tokens_used + cost > token_budget {
862 break;
863 }
864 summary_text.push_str(&entry);
865 tokens_used += cost;
866 }
867
868 if tokens_used > tc.count_tokens(SUMMARY_PREFIX) {
869 Ok(Some(Message::from_parts(
870 Role::System,
871 vec![MessagePart::Summary { text: summary_text }],
872 )))
873 } else {
874 Ok(None)
875 }
876}
877
878pub async fn fetch_cross_session_raw(
886 memory: Option<&zeph_memory::semantic::SemanticMemory>,
887 conversation_id: Option<zeph_memory::ConversationId>,
888 cross_session_score_threshold: f32,
889 query: &str,
890 token_budget: usize,
891 tc: &TokenCounter,
892) -> Result<Option<Message>, zeph_memory::MemoryError> {
893 let (Some(memory), Some(cid)) = (memory, conversation_id) else {
894 return Ok(None);
895 };
896 if token_budget == 0 {
897 return Ok(None);
898 }
899
900 let results: Vec<_> = memory
901 .search_session_summaries(query, 5, Some(cid))
902 .await?
903 .into_iter()
904 .filter(|r| r.score >= cross_session_score_threshold)
905 .collect();
906 if results.is_empty() {
907 return Ok(None);
908 }
909
910 let mut text = String::from(CROSS_SESSION_PREFIX);
911 let mut tokens_used = tc.count_tokens(&text);
912
913 for item in &results {
914 let entry = format!("- {}\n", item.summary_text);
915 let cost = tc.count_tokens(&entry);
916 if tokens_used + cost > token_budget {
917 break;
918 }
919 text.push_str(&entry);
920 tokens_used += cost;
921 }
922
923 if tokens_used > tc.count_tokens(CROSS_SESSION_PREFIX) {
924 Ok(Some(Message::from_parts(
925 Role::System,
926 vec![MessagePart::CrossSession { text }],
927 )))
928 } else {
929 Ok(None)
930 }
931}
932
933pub async fn fetch_semantic_recall(
947 view: &ContextAssemblyView<'_>,
948 query: &str,
949 token_budget: usize,
950 tc: &TokenCounter,
951 router: Option<&dyn zeph_memory::AsyncMemoryRouter>,
952) -> Result<(Option<Message>, Option<f32>), ContextError> {
953 fetch_semantic_recall_raw(
954 view.memory.as_deref(),
955 view.recall_limit,
956 view.context_format,
957 query,
958 token_budget,
959 tc,
960 router,
961 None,
962 )
963 .await
964 .map_err(ContextError::Memory)
965}
966
967fn format_plain_recall_entry(item: &zeph_memory::RecalledMessage) -> String {
968 let role_label = match item.message.role {
969 Role::User => "user",
970 Role::Assistant => "assistant",
971 Role::System => "system",
972 };
973 format!("- [{}] {}\n", role_label, item.message.content)
974}
975
976#[allow(clippy::map_unwrap_or)]
977fn format_structured_recall_entry(item: &zeph_memory::RecalledMessage) -> String {
978 let source = match item.message.role {
979 Role::User => "user",
980 Role::Assistant => "assistant",
981 Role::System => "system",
982 };
983 let date = item
988 .message
989 .metadata
990 .compacted_at
991 .and_then(|ts| chrono::DateTime::from_timestamp(ts, 0))
992 .map(|dt| dt.format("%Y-%m-%d").to_string())
993 .unwrap_or_else(|| "unknown".to_owned());
994 format!(
995 "[Memory | {} | {} | relevance: {:.2}]\n{}\n",
996 source, date, item.score, item.message.content
997 )
998}
999
1000pub async fn fetch_summaries(
1011 view: &ContextAssemblyView<'_>,
1012 token_budget: usize,
1013 tc: &TokenCounter,
1014) -> Result<Option<Message>, ContextError> {
1015 fetch_summaries_raw(
1016 view.memory.as_deref(),
1017 view.conversation_id,
1018 token_budget,
1019 tc,
1020 )
1021 .await
1022 .map_err(ContextError::Memory)
1023}
1024
1025pub async fn fetch_cross_session(
1039 view: &ContextAssemblyView<'_>,
1040 query: &str,
1041 token_budget: usize,
1042 tc: &TokenCounter,
1043) -> Result<Option<Message>, ContextError> {
1044 fetch_cross_session_raw(
1045 view.memory.as_deref(),
1046 view.conversation_id,
1047 view.cross_session_score_threshold,
1048 query,
1049 token_budget,
1050 tc,
1051 )
1052 .await
1053 .map_err(ContextError::Memory)
1054}
1055
1056pub struct BudgetHint {
1064 pub remaining_cost_cents: Option<f64>,
1066 pub total_budget_cents: Option<f64>,
1068 pub remaining_tool_calls: usize,
1070 pub max_tool_calls: usize,
1072}
1073
1074impl BudgetHint {
1075 #[must_use]
1096 pub fn format_xml(&self) -> Option<String> {
1097 let has_cost = self.remaining_cost_cents.is_some();
1098 if !has_cost && self.max_tool_calls == 0 {
1100 return None;
1101 }
1102 let mut s = String::from("<budget>");
1103 if let Some(remaining) = self.remaining_cost_cents {
1104 let _ = write!(
1105 s,
1106 "\n<remaining_cost_cents>{remaining:.2}</remaining_cost_cents>"
1107 );
1108 }
1109 if let Some(total) = self.total_budget_cents {
1110 let _ = write!(s, "\n<total_budget_cents>{total:.2}</total_budget_cents>");
1111 }
1112 if self.max_tool_calls > 0 {
1113 let _ = write!(
1114 s,
1115 "\n<remaining_tool_calls>{}</remaining_tool_calls>",
1116 self.remaining_tool_calls
1117 );
1118 let _ = write!(
1119 s,
1120 "\n<max_tool_calls>{}</max_tool_calls>",
1121 self.max_tool_calls
1122 );
1123 }
1124 s.push_str("\n</budget>");
1125 Some(s)
1126 }
1127}
1128
1129#[cfg(test)]
1130mod budget_hint_tests {
1131 use super::*;
1132
1133 #[test]
1134 fn format_xml_none_when_no_data() {
1135 let hint = BudgetHint {
1136 remaining_cost_cents: None,
1137 total_budget_cents: None,
1138 remaining_tool_calls: 0,
1139 max_tool_calls: 0,
1140 };
1141 assert!(hint.format_xml().is_none());
1142 }
1143
1144 #[test]
1145 fn format_xml_with_cost_only() {
1146 let hint = BudgetHint {
1147 remaining_cost_cents: Some(25.5),
1148 total_budget_cents: Some(100.0),
1149 remaining_tool_calls: 0,
1150 max_tool_calls: 0,
1151 };
1152 let xml = hint.format_xml().unwrap();
1153 assert!(xml.contains("<remaining_cost_cents>25.50</remaining_cost_cents>"));
1154 assert!(xml.contains("<total_budget_cents>100.00</total_budget_cents>"));
1155 }
1156
1157 #[test]
1158 fn format_xml_with_tool_calls_only() {
1159 let hint = BudgetHint {
1160 remaining_cost_cents: None,
1161 total_budget_cents: None,
1162 remaining_tool_calls: 3,
1163 max_tool_calls: 10,
1164 };
1165 let xml = hint.format_xml().unwrap();
1166 assert!(xml.contains("<remaining_tool_calls>3</remaining_tool_calls>"));
1167 assert!(xml.contains("<max_tool_calls>10</max_tool_calls>"));
1168 }
1169
1170 #[test]
1171 fn format_xml_with_all_fields() {
1172 let hint = BudgetHint {
1173 remaining_cost_cents: Some(50.0),
1174 total_budget_cents: Some(100.0),
1175 remaining_tool_calls: 8,
1176 max_tool_calls: 10,
1177 };
1178 let xml = hint.format_xml().unwrap();
1179 assert!(xml.starts_with("<budget>"));
1180 assert!(xml.ends_with("</budget>"));
1181 }
1182}