1use std::time::Duration;
25
26use futures::StreamExt as _;
27use futures::future::BoxFuture;
28use tracing::Instrument as _;
29use tracing::info_span;
30use zeph_common::memory::TokenCounting;
31use zeph_common::{ContextFidelity, PlannedToolHint};
32use zeph_llm::LlmError;
33use zeph_llm::LlmProviderDyn;
34use zeph_llm::provider::{EmbedFuture, Message, MessageMetadata, MessagePart, Role};
35
36use crate::assembler::CORRECTIONS_PREFIX;
37
38pub use zeph_config::FidelityConfig;
40
41trait EmbedCall: Send + Sync {
47 fn call<'a>(&'a self, text: &'a str) -> BoxFuture<'a, Result<Vec<f32>, LlmError>>;
48}
49
50struct ClosureEmbed<F>(F);
52
53impl<F> EmbedCall for ClosureEmbed<F>
54where
55 F: Fn(&str) -> EmbedFuture + Send + Sync,
56{
57 fn call<'a>(&'a self, text: &'a str) -> BoxFuture<'a, Result<Vec<f32>, LlmError>> {
58 Box::pin((self.0)(text))
61 }
62}
63
64struct ProviderEmbed<'p>(&'p dyn LlmProviderDyn);
66
67impl EmbedCall for ProviderEmbed<'_> {
68 fn call<'a>(&'a self, text: &'a str) -> BoxFuture<'a, Result<Vec<f32>, LlmError>> {
69 self.0.embed(text)
70 }
71}
72
73async fn embed_prepass_dyn(
79 messages: &[Message],
80 embed: &dyn EmbedCall,
81 config: &FidelityConfig,
82 inserted_count: usize,
83) -> std::collections::HashMap<usize, Vec<f32>> {
84 let concurrency = if config.embed_concurrency == 0 {
85 tracing::warn!(
86 "embed_concurrency is 0, clamping to 1; set a positive value in [memory.fidelity]"
87 );
88 1
89 } else {
90 config.embed_concurrency
91 };
92
93 let tasks = messages.iter().enumerate().filter_map(|(i, msg)| {
94 if is_exempt(msg, i, inserted_count)
95 || msg.content.is_empty()
96 || msg.metadata.embedding.is_some()
97 {
98 return None;
99 }
100 let content = match config.max_embed_input_tokens {
101 Some(n) => truncate_to_byte_limit(&msg.content, n.saturating_mul(4)),
102 None => msg.content.clone(),
103 };
104 Some((i, content))
105 });
106
107 let timeout = Duration::from_secs(config.embed_timeout_secs);
108 futures::stream::iter(tasks)
109 .map(|(i, content)| async move {
110 let result = tokio::time::timeout(timeout, embed.call(&content)).await;
111 match result {
112 Ok(Ok(vec)) => Some((i, vec)),
113 Ok(Err(e)) => {
114 tracing::warn!(idx = i, err = %e, "embed_prepass: embed failed, skipping");
115 None
116 }
117 Err(_) => {
118 tracing::warn!(idx = i, "embed_prepass: embed timed out, skipping");
119 None
120 }
121 }
122 })
123 .buffer_unordered(concurrency)
124 .filter_map(|opt| async move { opt })
125 .collect()
126 .await
127}
128
129#[tracing::instrument(name = "context.fidelity.embed_prepass", skip_all)]
159pub async fn embed_prepass<F>(
160 messages: &[Message],
161 embed: &F,
162 config: &FidelityConfig,
163 inserted_count: usize,
164) -> std::collections::HashMap<usize, Vec<f32>>
165where
166 F: Fn(&str) -> EmbedFuture + Send + Sync,
167{
168 embed_prepass_dyn(messages, &ClosureEmbed(embed), config, inserted_count).await
169}
170
171fn truncate_to_byte_limit(s: &str, max_bytes: usize) -> String {
180 if s.len() <= max_bytes {
181 return s.to_string();
182 }
183 let boundary = s.floor_char_boundary(max_bytes);
184 s[..boundary].to_string()
185}
186
187struct FidelityScore {
188 score: f32,
189 level: ContextFidelity,
190 original_tokens: u32,
191}
192
193pub struct FidelityScorer;
219
220impl FidelityScorer {
221 #[tracing::instrument(name = "context.fidelity.score_and_apply", skip_all)]
253 #[allow(clippy::too_many_arguments, clippy::too_many_lines)]
254 pub async fn score_and_apply(
255 &self,
256 messages: &mut Vec<Message>,
257 query: &str,
258 planned_tools: &[PlannedToolHint],
259 config: &FidelityConfig,
260 tc: &dyn TokenCounting,
261 inserted_count: usize,
262 allow_upgrade: bool,
263 embed_provider: Option<&dyn LlmProviderDyn>,
264 compress_provider: Option<&dyn LlmProviderDyn>,
265 ) {
266 if !config.enabled || messages.is_empty() {
267 return;
268 }
269
270 let query_embedding: Option<Vec<f32>> = if let (true, Some(p)) =
272 (config.semantic_scoring_provider.is_some(), embed_provider)
273 && p.supports_embeddings()
274 {
275 match tokio::time::timeout(
276 Duration::from_secs(config.embed_timeout_secs),
277 p.embed(query),
278 )
279 .instrument(info_span!("context.fidelity.embed_query"))
280 .await
281 {
282 Ok(Ok(v)) => Some(v),
283 Ok(Err(e)) => {
284 tracing::warn!(error = %e, "semantic scoring provider unavailable, falling back to keyword");
285 None
286 }
287 Err(_) => {
288 tracing::warn!("fidelity query embed timed out, falling back to keyword");
289 None
290 }
291 }
292 } else {
293 None
294 };
295
296 if let (Some(q_emb), Some(p)) = (&query_embedding, embed_provider) {
300 let n = messages.len();
301 let score_end = if n > config.max_scored_messages {
302 n.saturating_sub(config.exempt_tail_messages)
303 } else {
304 n
305 };
306 let embeddings = embed_prepass_dyn(
307 &messages[..score_end],
308 &ProviderEmbed(p),
309 config,
310 inserted_count,
311 )
312 .instrument(info_span!("context.fidelity.embed_prepass"))
313 .await;
314 for (i, emb) in embeddings {
315 messages[i].metadata.embedding = Some(emb);
316 }
317 let _ = q_emb; }
319
320 let scores = compute_scores(
321 messages,
322 query,
323 planned_tools,
324 config,
325 tc,
326 inserted_count,
327 allow_upgrade,
328 query_embedding.as_deref(),
329 );
330 apply_scores(messages, &scores, config, tc, compress_provider).await;
331
332 let _merge_span = info_span!("context.fidelity.merge").entered();
333 let merged_count = merge_consecutive_placeholders(messages);
334 tracing::debug!(merged_count, "fidelity merge complete");
335 }
336}
337
338#[allow(clippy::too_many_arguments)]
339fn compute_scores(
340 messages: &[Message],
341 query: &str,
342 planned_tools: &[PlannedToolHint],
343 config: &FidelityConfig,
344 tc: &dyn TokenCounting,
345 inserted_count: usize,
346 allow_upgrade: bool,
347 query_embedding: Option<&[f32]>,
348) -> Vec<Option<FidelityScore>> {
349 let n = messages.len();
350
351 let score_end = if n > config.max_scored_messages {
353 n.saturating_sub(config.exempt_tail_messages)
354 } else {
355 n
356 };
357
358 let semantic_active = query.len() >= config.min_query_length;
359 let plan_active = !planned_tools.is_empty();
360 let query_words: std::collections::HashSet<&str> = if semantic_active {
362 query.split_whitespace().collect()
363 } else {
364 std::collections::HashSet::default()
365 };
366
367 let mut weight_sum = config.w_temporal + config.w_importance;
369 if semantic_active {
370 weight_sum += config.w_semantic;
371 }
372 if plan_active {
373 weight_sum += config.w_plan;
374 }
375 if weight_sum <= 0.0 {
376 weight_sum = 1.0;
377 }
378
379 #[allow(clippy::cast_precision_loss)]
380 let max_dist = score_end.saturating_sub(1) as f32;
381
382 let mut scores: Vec<Option<FidelityScore>> = (0..n).map(|_| None).collect();
383
384 for (i, msg) in messages.iter().enumerate().take(score_end) {
385 if is_exempt(msg, i, inserted_count) {
386 continue;
387 }
388
389 #[allow(clippy::cast_possible_truncation)]
390 let original_tokens = tc.count_tokens(&msg.content) as u32;
391
392 #[allow(clippy::cast_precision_loss)]
395 let temporal = if max_dist > 0.0 {
396 let distance_from_end = (score_end - 1 - i) as f32;
397 1.0 - distance_from_end / max_dist
398 } else {
399 1.0
400 };
401 let importance = if msg
403 .parts
404 .iter()
405 .any(|p| matches!(p, MessagePart::ToolResult { .. }))
406 {
407 0.4
408 } else {
409 role_weight(msg.role)
410 };
411 let semantic = if semantic_active {
412 match (query_embedding, msg.metadata.embedding.as_deref()) {
413 (Some(q_emb), Some(m_emb)) => semantic_overlap(m_emb, q_emb),
414 _ => keyword_overlap(&msg.content, &query_words),
415 }
416 } else {
417 0.0
418 };
419 let plan = if plan_active {
420 plan_relevance(&msg.content, planned_tools)
421 } else {
422 0.0
423 };
424
425 let raw = config.w_temporal * temporal
426 + config.w_importance * importance
427 + if semantic_active {
428 config.w_semantic * semantic
429 } else {
430 0.0
431 }
432 + if plan_active {
433 config.w_plan * plan
434 } else {
435 0.0
436 };
437
438 let score = (raw / weight_sum).clamp(0.0, 1.0);
439 let candidate_level = score_to_level(score, config);
440
441 let level = if allow_upgrade {
446 candidate_level
447 } else {
448 match msg.metadata.fidelity_tag {
449 Some(ContextFidelity::Placeholder) => ContextFidelity::Placeholder,
450 Some(ContextFidelity::Compressed) => {
451 if candidate_level == ContextFidelity::Full {
452 ContextFidelity::Compressed
453 } else {
454 candidate_level
455 }
456 }
457 _ => candidate_level,
458 }
459 };
460
461 scores[i] = Some(FidelityScore {
462 score,
463 level,
464 original_tokens,
465 });
466 }
467
468 apply_tool_pair_atomicity(messages, &mut scores, config);
469 scores
470}
471
472#[tracing::instrument(name = "context.fidelity.apply", skip_all)]
473async fn apply_scores(
474 messages: &mut [Message],
475 scores: &[Option<FidelityScore>],
476 config: &FidelityConfig,
477 tc: &dyn TokenCounting,
478 provider: Option<&dyn LlmProviderDyn>,
479) {
480 let (mut full_count, mut compressed_count, mut placeholder_count, mut tokens_saved) =
481 (0u32, 0u32, 0u32, 0u32);
482
483 for (i, msg) in messages.iter_mut().enumerate() {
484 let Some(ref fs) = scores[i] else { continue };
485 match fs.level {
486 ContextFidelity::Compressed => {
487 #[allow(clippy::cast_possible_truncation)]
488 let original_tokens = fs.original_tokens;
489 render_compressed(msg, config, tc, provider).await;
490 #[allow(clippy::cast_possible_truncation)]
491 let new_tokens = tc.count_tokens(&msg.content) as u32;
492 tokens_saved += original_tokens.saturating_sub(new_tokens);
493 compressed_count += 1;
494 }
495 ContextFidelity::Placeholder => {
496 render_placeholder(msg, fs.score, fs.original_tokens);
497 placeholder_count += 1;
498 }
499 _ => {
501 msg.metadata.fidelity_tag = Some(ContextFidelity::Full);
502 full_count += 1;
503 }
504 }
505 }
506
507 tracing::debug!(
508 full_count,
509 compressed_count,
510 placeholder_count,
511 tokens_saved,
512 "fidelity apply complete"
513 );
514}
515
516fn is_exempt(msg: &Message, idx: usize, inserted_count: usize) -> bool {
517 (idx == 0 && msg.role == Role::System)
522 || msg.metadata.focus_pinned
523 || msg.content.starts_with(CORRECTIONS_PREFIX)
524 || (idx >= 1 && idx < 1 + inserted_count)
525}
526
527fn role_weight(role: Role) -> f32 {
528 match role {
529 Role::System => 1.0,
530 Role::User => 0.8,
531 Role::Assistant | _ => 0.6,
532 }
533}
534
535fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
536 if a.len() != b.len() || a.is_empty() {
537 return 0.0;
538 }
539 let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
540 let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
541 let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
542 if norm_a == 0.0 || norm_b == 0.0 {
543 return 0.0;
544 }
545 (dot / (norm_a * norm_b)).clamp(0.0, 1.0)
546}
547
548fn semantic_overlap(msg_embedding: &[f32], query_embedding: &[f32]) -> f32 {
549 cosine_similarity(msg_embedding, query_embedding)
550}
551
552fn keyword_overlap(content: &str, query_words: &std::collections::HashSet<&str>) -> f32 {
556 let content_words: std::collections::HashSet<&str> = content.split_whitespace().collect();
557 let min_len = content_words.len().min(query_words.len());
558 if min_len == 0 {
559 return 0.0;
560 }
561 #[allow(clippy::cast_precision_loss)]
562 let result = content_words.intersection(query_words).count() as f32 / min_len as f32;
563 result.clamp(0.0, 1.0)
564}
565
566fn plan_relevance(content: &str, planned_tools: &[PlannedToolHint]) -> f32 {
570 if planned_tools.is_empty() {
571 return 0.0;
572 }
573 let content_words: std::collections::HashSet<&str> = content.split_whitespace().collect();
574 let mut weighted_sum = 0.0f32;
575 let mut weight_total = 0.0f32;
576 for hint in planned_tools {
577 let dist = f32::from(hint.distance_from_current.max(1));
578 let weight = 1.0 / dist;
579 weight_total += weight;
580 let hint_words: std::collections::HashSet<&str> =
581 hint.keywords.iter().map(String::as_str).collect();
582 let min_len = content_words.len().min(hint_words.len());
583 if min_len == 0 {
584 continue;
585 }
586 #[allow(clippy::cast_precision_loss)]
587 let overlap = content_words.intersection(&hint_words).count() as f32 / min_len as f32;
588 weighted_sum += weight * overlap.clamp(0.0, 1.0);
589 }
590 if weight_total <= 0.0 {
591 return 0.0;
592 }
593 (weighted_sum / weight_total).clamp(0.0, 1.0)
594}
595
596fn apply_tool_pair_atomicity(
601 messages: &[Message],
602 scores: &mut [Option<FidelityScore>],
603 config: &FidelityConfig,
604) {
605 let mut tool_result_map: std::collections::HashMap<&str, usize> =
607 std::collections::HashMap::new();
608 for (i, msg) in messages.iter().enumerate() {
609 for part in &msg.parts {
610 if let MessagePart::ToolResult { tool_use_id, .. } = part {
611 tool_result_map.insert(tool_use_id.as_str(), i);
612 }
613 }
614 }
615
616 for (i, msg) in messages.iter().enumerate().rev() {
618 for part in &msg.parts {
619 if let MessagePart::ToolUse { id, .. } = part
620 && let Some(&result_idx) = tool_result_map.get(id.as_str())
621 {
622 let score_a = scores[i].as_ref().map_or(1.0, |s| s.score);
623 let score_b = scores[result_idx].as_ref().map_or(1.0, |s| s.score);
624 let min_score = score_a.min(score_b);
625
626 let level_a = scores[i]
630 .as_ref()
631 .map_or(ContextFidelity::Full, |s| s.level);
632 let level_b = scores[result_idx]
633 .as_ref()
634 .map_or(ContextFidelity::Full, |s| s.level);
635 let float_level = score_to_level(min_score, config);
636 let min_level = more_restrictive(more_restrictive(level_a, level_b), float_level);
637
638 let tokens_a = scores[i].as_ref().map_or(0, |s| s.original_tokens);
639 let tokens_b = scores[result_idx].as_ref().map_or(0, |s| s.original_tokens);
640 scores[i] = Some(FidelityScore {
641 score: min_score,
642 level: min_level,
643 original_tokens: tokens_a,
644 });
645 scores[result_idx] = Some(FidelityScore {
646 score: min_score,
647 level: min_level,
648 original_tokens: tokens_b,
649 });
650 }
651 }
652 }
653}
654
655fn more_restrictive(a: ContextFidelity, b: ContextFidelity) -> ContextFidelity {
659 use ContextFidelity::{Compressed, Full, Placeholder};
660 match (a, b) {
661 (Placeholder, _) | (_, Placeholder) => Placeholder,
662 (Compressed, _) | (_, Compressed) => Compressed,
663 _ => Full,
664 }
665}
666
667fn score_to_level(score: f32, config: &FidelityConfig) -> ContextFidelity {
668 if score >= config.full_threshold {
669 ContextFidelity::Full
670 } else if score >= config.compressed_threshold {
671 ContextFidelity::Compressed
672 } else {
673 ContextFidelity::Placeholder
674 }
675}
676
677async fn render_compressed(
678 msg: &mut Message,
679 config: &FidelityConfig,
680 tc: &dyn TokenCounting,
681 provider: Option<&dyn LlmProviderDyn>,
682) {
683 if let Some(summary) = msg.metadata.deferred_summary.take() {
685 msg.content = summary;
686 } else if config.compress_provider.is_some()
687 && let Some(p) = provider
688 {
689 let input_tokens = tc.count_tokens(&msg.content);
691 if input_tokens > config.compressed_max_tokens * 2 && input_tokens > 0 {
693 if let Some(max_in) = config.max_compress_input_tokens {
695 apply_input_cap(&mut msg.content, max_in);
696 }
697
698 let prompt = format!(
699 "Summarize in {} tokens or fewer: {}",
700 config.compressed_max_tokens, msg.content
701 );
702 let req = vec![Message {
703 role: Role::User,
704 content: prompt,
705 parts: vec![],
706 metadata: MessageMetadata::default(),
707 }];
708
709 let span = info_span!(
710 "context.fidelity.compress_llm",
711 input_tokens,
712 cached = false,
713 );
714 let result = tokio::time::timeout(
715 Duration::from_secs(config.compress_timeout_secs),
716 p.chat(&req),
717 )
718 .instrument(span)
719 .await;
720
721 match result {
722 Ok(Ok(summary)) => {
723 msg.metadata.deferred_summary = Some(summary.clone());
724 msg.content = summary;
725 }
726 Ok(Err(e)) => {
727 tracing::debug!(error = %e, "compress_llm failed, falling back to truncation");
728 }
729 Err(_) => {
730 tracing::warn!("compress_llm timed out, falling back to truncation");
731 }
732 }
733 }
734 } else if let Some(max_in) = config.max_compress_input_tokens {
735 apply_input_cap(&mut msg.content, max_in);
737 }
738
739 truncate_to_tokens(&mut msg.content, config.compressed_max_tokens, tc);
742 msg.parts.clear();
743 msg.metadata.fidelity_tag = Some(ContextFidelity::Compressed);
744}
745
746pub fn apply_input_cap(content: &mut String, max_tokens: usize) {
754 let max_bytes = max_tokens.saturating_mul(4);
755 if content.len() > max_bytes {
756 let boundary = content.floor_char_boundary(max_bytes);
757 content.truncate(boundary);
758 }
759}
760
761fn truncate_to_tokens(content: &mut String, max_tokens: usize, tc: &dyn TokenCounting) {
762 if tc.count_tokens(content) <= max_tokens {
763 return;
764 }
765 let mut lo: usize = 0;
770 let mut hi: usize = content.len();
771 while hi - lo > 1 {
772 let mid = content.floor_char_boundary(usize::midpoint(lo, hi));
773 if mid == lo {
774 hi = mid;
777 } else if tc.count_tokens(&content[..mid]) <= max_tokens {
778 lo = mid;
779 } else {
780 hi = mid;
781 }
782 }
783 content.truncate(lo);
784}
785
786fn render_placeholder(msg: &mut Message, score: f32, original_tokens: u32) {
787 let role_str = match msg.role {
788 Role::System => "system",
789 Role::Assistant => "assistant",
790 Role::User | _ => "user",
791 };
792 msg.content = format!(
793 "[placeholder: role={role_str}, original_tokens={original_tokens}, importance={score:.2}]"
794 );
795 msg.parts.clear();
796 msg.metadata.fidelity_tag = Some(ContextFidelity::Placeholder);
797}
798
799fn merge_consecutive_placeholders(messages: &mut Vec<Message>) -> usize {
803 let mut merged_count = 0usize;
804 let mut i = 0;
805 while i < messages.len() {
806 if messages[i].metadata.fidelity_tag != Some(ContextFidelity::Placeholder)
807 || messages[i].role == Role::System
808 {
809 i += 1;
810 continue;
811 }
812 let role = messages[i].role;
813 let mut j = i + 1;
814 while j < messages.len()
815 && messages[j].metadata.fidelity_tag == Some(ContextFidelity::Placeholder)
816 && messages[j].role == role
817 {
818 j += 1;
819 }
820 if j - i <= 1 {
821 i += 1;
822 continue;
823 }
824 let count = j - i;
825 let mut total_tokens = 0u32;
826 let mut importance_sum = 0.0f32;
827 for msg in &messages[i..j] {
828 total_tokens += parse_placeholder_tokens(&msg.content);
829 importance_sum += parse_placeholder_importance(&msg.content);
830 }
831 debug_assert!(count >= 2, "placeholder merge triggered with count={count}");
832 #[allow(clippy::cast_precision_loss)]
833 let avg_importance = if count > 0 {
834 importance_sum / count as f32
835 } else {
836 0.0
837 };
838 let role_str = match role {
839 Role::System => "system",
840 Role::Assistant => "assistant",
841 Role::User | _ => "user",
842 };
843 let merged_content = format!(
844 "[placeholder: {count} messages, role={role_str}, total_tokens={total_tokens}, avg_importance={avg_importance:.2}]"
845 );
846 let first = messages[i].clone();
847 messages.drain(i..j);
848 messages.insert(
849 i,
850 Message {
851 role: first.role,
852 content: merged_content,
853 parts: vec![],
854 metadata: {
855 let mut m = first.metadata;
856 m.fidelity_tag = Some(ContextFidelity::Placeholder);
857 m
858 },
859 },
860 );
861 merged_count += count - 1;
862 i += 1;
863 }
864 merged_count
865}
866
867fn parse_placeholder_tokens(content: &str) -> u32 {
868 for part in content.split(',') {
869 let part = part.trim();
870 for prefix in &["original_tokens=", "total_tokens="] {
871 if let Some(rest) = part.strip_prefix(prefix)
872 && let Ok(n) = rest.trim_end_matches(']').trim().parse::<u32>()
873 {
874 return n;
875 }
876 }
877 }
878 0
879}
880
881fn parse_placeholder_importance(content: &str) -> f32 {
882 for part in content.split(',') {
883 let part = part.trim();
884 for prefix in &["importance=", "avg_importance="] {
885 if let Some(rest) = part.strip_prefix(prefix)
886 && let Ok(v) = rest.trim_end_matches(']').trim().parse::<f32>()
887 {
888 return v;
889 }
890 }
891 }
892 0.0
893}
894
895#[cfg(test)]
896mod tests {
897 use super::*;
898 use zeph_common::ProviderName;
899 use zeph_llm::provider::{Message, MessageMetadata, MessagePart, Role};
900
901 struct FixedTc(usize);
902 impl TokenCounting for FixedTc {
903 fn count_tokens(&self, text: &str) -> usize {
904 text.len() / self.0.max(1)
905 }
906
907 fn count_tool_schema_tokens(&self, _schema: &serde_json::Value) -> usize {
908 0
909 }
910 }
911
912 fn make_msg(role: Role, content: &str) -> Message {
913 Message {
914 role,
915 content: content.to_string(),
916 parts: vec![],
917 metadata: MessageMetadata::default(),
918 }
919 }
920
921 fn make_cfg() -> FidelityConfig {
922 FidelityConfig {
923 enabled: true,
924 w_semantic: 0.3,
925 w_temporal: 0.3,
926 w_importance: 0.2,
927 w_plan: 0.2,
928 full_threshold: 0.7,
929 compressed_threshold: 0.3,
930 compressed_max_tokens: 50,
931 regrade_threshold: 0.6,
932 min_query_length: 8,
933 max_scored_messages: 500,
934 exempt_tail_messages: 0,
935 compress_provider: None,
936 semantic_scoring_provider: None,
937 lookahead_depth: 3,
938 embed_concurrency: 32,
939 max_embed_input_tokens: None,
940 max_compress_input_tokens: None,
941 embed_timeout_secs: 30,
942 compress_timeout_secs: 30,
943 }
944 }
945
946 #[tokio::test]
948 async fn empty_window_no_change() {
949 let scorer = FidelityScorer;
950 let cfg = make_cfg();
951 let tc = FixedTc(4);
952 let mut messages: Vec<Message> = vec![];
953 scorer
954 .score_and_apply(
955 &mut messages,
956 "query text",
957 &[],
958 &cfg,
959 &tc,
960 0,
961 false,
962 None,
963 None,
964 )
965 .await;
966 assert!(messages.is_empty());
967 }
968
969 #[tokio::test]
971 async fn all_exempt_no_downgrade() {
972 let scorer = FidelityScorer;
973 let cfg = make_cfg();
974 let tc = FixedTc(4);
975 let mut messages = vec![
976 make_msg(Role::System, "system prompt"),
977 make_msg(Role::User, "memory context"),
979 ];
980 scorer
981 .score_and_apply(&mut messages, "short", &[], &cfg, &tc, 1, false, None, None)
982 .await;
983 for msg in &messages {
984 assert!(
985 msg.metadata.fidelity_tag.is_none()
986 || msg.metadata.fidelity_tag == Some(ContextFidelity::Full)
987 );
988 }
989 }
990
991 #[tokio::test]
993 async fn tool_pair_atomicity() {
994 let scorer = FidelityScorer;
995 let cfg = FidelityConfig {
997 full_threshold: 0.9,
998 compressed_threshold: 0.5,
999 ..make_cfg()
1000 };
1001 let tc = FixedTc(4);
1002 let tool_use_id = "abc123".to_string();
1003 let mut tool_use_msg = make_msg(Role::Assistant, "calling tool");
1004 tool_use_msg.parts = vec![MessagePart::ToolUse {
1005 id: tool_use_id.clone(),
1006 name: "shell".to_string(),
1007 input: serde_json::json!({}),
1008 }];
1009 let mut tool_result_msg = make_msg(Role::User, "tool result body");
1010 tool_result_msg.parts = vec![MessagePart::ToolResult {
1011 tool_use_id: tool_use_id.clone(),
1012 content: "result".to_string(),
1013 is_error: false,
1014 }];
1015 let mut messages = vec![
1016 make_msg(Role::System, "system"),
1017 tool_use_msg,
1018 tool_result_msg,
1019 ];
1020 scorer
1021 .score_and_apply(
1022 &mut messages,
1023 "completely unrelated query blah",
1024 &[],
1025 &cfg,
1026 &tc,
1027 0,
1028 false,
1029 None,
1030 None,
1031 )
1032 .await;
1033 let tag_a = messages[1].metadata.fidelity_tag;
1034 let tag_b = messages[2].metadata.fidelity_tag;
1035 assert_eq!(tag_a, tag_b, "tool pair must share fidelity level");
1036 }
1037
1038 #[tokio::test]
1040 async fn same_role_placeholder_merge() {
1041 let scorer = FidelityScorer;
1042 let cfg = FidelityConfig {
1044 full_threshold: 2.0, compressed_threshold: 1.5, ..make_cfg()
1047 };
1048 let tc = FixedTc(4);
1049 let mut messages: Vec<Message> = std::iter::once(make_msg(Role::System, "system"))
1050 .chain((0..5).map(|i| make_msg(Role::Assistant, &format!("msg {i}"))))
1051 .collect();
1052 scorer
1053 .score_and_apply(
1054 &mut messages,
1055 "some query here",
1056 &[],
1057 &cfg,
1058 &tc,
1059 0,
1060 false,
1061 None,
1062 None,
1063 )
1064 .await;
1065 assert_eq!(
1067 messages.len(),
1068 2,
1069 "5 assistant placeholders must merge to 1"
1070 );
1071 assert!(messages[1].content.contains("5 messages"));
1072 }
1073
1074 #[tokio::test]
1076 async fn score_normalization_no_panic() {
1077 let scorer = FidelityScorer;
1078 let cfg = make_cfg();
1079 let tc = FixedTc(4);
1080 let mut messages = vec![
1081 make_msg(Role::System, "system"),
1082 make_msg(Role::User, "hello"),
1083 make_msg(Role::Assistant, "world response"),
1084 ];
1085 scorer
1086 .score_and_apply(
1087 &mut messages,
1088 "hello world signal",
1089 &[],
1090 &cfg,
1091 &tc,
1092 0,
1093 false,
1094 None,
1095 None,
1096 )
1097 .await;
1098 for msg in &messages {
1099 let _ = msg.metadata.fidelity_tag;
1100 }
1101 }
1102
1103 #[tokio::test]
1105 async fn short_query_fallback() {
1106 let scorer = FidelityScorer;
1107 let cfg = FidelityConfig {
1108 min_query_length: 8,
1109 ..make_cfg()
1110 };
1111 let tc = FixedTc(4);
1112 let mut messages = vec![
1113 make_msg(Role::System, "system"),
1114 make_msg(Role::User, "test"),
1115 ];
1116 scorer
1118 .score_and_apply(&mut messages, "short", &[], &cfg, &tc, 0, false, None, None)
1119 .await;
1120 }
1121
1122 #[tokio::test]
1128 async fn memory_first_bypass_is_callers_responsibility() {
1129 let scorer = FidelityScorer;
1130 let cfg = FidelityConfig {
1133 enabled: false,
1134 ..make_cfg()
1135 };
1136 let tc = FixedTc(4);
1137 let mut messages = vec![
1138 make_msg(Role::System, "system prompt"),
1139 make_msg(Role::User, "memory-injected context"),
1140 make_msg(Role::Assistant, "response"),
1141 ];
1142 let before: Vec<_> = messages.iter().map(|m| m.content.clone()).collect();
1143 scorer
1145 .score_and_apply(
1146 &mut messages,
1147 "some user query text here",
1148 &[],
1149 &cfg,
1150 &tc,
1151 2,
1152 false,
1153 None,
1154 None,
1155 )
1156 .await;
1157 for (msg, orig) in messages.iter().zip(&before) {
1158 assert_eq!(msg.content, *orig, "content must be unchanged");
1159 assert!(
1160 msg.metadata.fidelity_tag.is_none(),
1161 "no fidelity tag must be set"
1162 );
1163 }
1164 }
1165
1166 #[tokio::test]
1168 async fn enabled_false_guard() {
1169 let scorer = FidelityScorer;
1170 let cfg = FidelityConfig {
1171 enabled: false,
1172 ..make_cfg()
1173 };
1174 let tc = FixedTc(4);
1175 let mut messages = vec![
1176 make_msg(Role::System, "system"),
1177 make_msg(Role::User, "user message that would normally be scored"),
1178 ];
1179 let original_contents: Vec<String> = messages.iter().map(|m| m.content.clone()).collect();
1180 scorer
1181 .score_and_apply(
1182 &mut messages,
1183 "query text here",
1184 &[],
1185 &cfg,
1186 &tc,
1187 0,
1188 false,
1189 None,
1190 None,
1191 )
1192 .await;
1193 for (msg, orig) in messages.iter().zip(&original_contents) {
1194 assert_eq!(msg.content, *orig);
1195 assert!(msg.metadata.fidelity_tag.is_none());
1196 }
1197 }
1198
1199 #[tokio::test]
1201 async fn score_always_in_range() {
1202 let scorer = FidelityScorer;
1203 let cfg = FidelityConfig {
1204 enabled: true,
1205 w_semantic: 0.0,
1206 w_temporal: 0.0,
1207 w_importance: 0.0,
1208 w_plan: 0.0,
1209 full_threshold: 0.7,
1210 compressed_threshold: 0.3,
1211 compressed_max_tokens: 50,
1212 regrade_threshold: 0.6,
1213 min_query_length: 0,
1214 max_scored_messages: 500,
1215 exempt_tail_messages: 0,
1216 compress_provider: None,
1217 semantic_scoring_provider: None,
1218 lookahead_depth: 3,
1219 embed_concurrency: 32,
1220 max_embed_input_tokens: None,
1221 max_compress_input_tokens: None,
1222 embed_timeout_secs: 30,
1223 compress_timeout_secs: 30,
1224 };
1225 let tc = FixedTc(4);
1226 let mut messages = vec![make_msg(Role::System, ""), make_msg(Role::User, "")];
1227 scorer
1229 .score_and_apply(&mut messages, "", &[], &cfg, &tc, 0, false, None, None)
1230 .await;
1231 }
1232
1233 #[tokio::test]
1235 async fn placeholder_uses_tc_count_tokens() {
1236 let scorer = FidelityScorer;
1237 let cfg = FidelityConfig {
1238 full_threshold: 2.0,
1239 compressed_threshold: 1.5,
1240 ..make_cfg()
1241 };
1242 let tc = FixedTc(1); let mut messages = vec![
1244 make_msg(Role::System, "system"),
1245 make_msg(Role::User, "user message content for placeholder rendering"),
1246 ];
1247 scorer
1248 .score_and_apply(
1249 &mut messages,
1250 "some query text here",
1251 &[],
1252 &cfg,
1253 &tc,
1254 0,
1255 false,
1256 None,
1257 None,
1258 )
1259 .await;
1260 assert_eq!(
1261 messages[1].metadata.fidelity_tag,
1262 Some(ContextFidelity::Placeholder)
1263 );
1264 assert!(messages[1].content.starts_with("[placeholder:"));
1265 }
1266
1267 #[tokio::test]
1274 async fn exempt_tail_messages_large_window() {
1275 let scorer = FidelityScorer;
1276 let cfg = FidelityConfig {
1277 full_threshold: 2.0,
1279 compressed_threshold: 1.5,
1280 max_scored_messages: 10,
1281 exempt_tail_messages: 5,
1282 ..make_cfg()
1283 };
1284 let tc = FixedTc(4);
1285
1286 let mut messages: Vec<Message> = std::iter::once(make_msg(Role::System, "system prompt"))
1293 .chain((1..15).map(|i| make_msg(Role::Assistant, &format!("assistant message {i}"))))
1294 .chain((15..20).map(|i| {
1295 let mut m = make_msg(Role::User, &format!("tail message {i}"));
1296 m.metadata.focus_pinned = true;
1297 m
1298 }))
1299 .collect();
1300
1301 scorer
1302 .score_and_apply(
1303 &mut messages,
1304 "query text here long",
1305 &[],
1306 &cfg,
1307 &tc,
1308 0,
1309 false,
1310 None,
1311 None,
1312 )
1313 .await;
1314
1315 let tail: Vec<_> = messages
1317 .iter()
1318 .filter(|m| m.metadata.focus_pinned)
1319 .collect();
1320 assert_eq!(
1321 tail.len(),
1322 5,
1323 "all 5 tail messages must survive the merge pass"
1324 );
1325 for msg in &tail {
1326 assert!(
1327 msg.metadata.fidelity_tag.is_none(),
1328 "tail message must have no fidelity_tag, got {:?}",
1329 msg.metadata.fidelity_tag
1330 );
1331 }
1332 }
1333
1334 #[tokio::test]
1338 async fn exempt_tail_messages_small_window_no_effect() {
1339 let scorer = FidelityScorer;
1340 let cfg = FidelityConfig {
1341 full_threshold: 2.0,
1342 compressed_threshold: 1.5,
1343 max_scored_messages: 10,
1344 exempt_tail_messages: 5,
1345 ..make_cfg()
1346 };
1347 let tc = FixedTc(4);
1348 let roles = [Role::User, Role::Assistant];
1351 let mut messages: Vec<Message> = std::iter::once(make_msg(Role::System, "system prompt"))
1352 .chain((1..8usize).map(|i| make_msg(roles[i % 2], &format!("message {i}"))))
1353 .collect();
1354 scorer
1355 .score_and_apply(
1356 &mut messages,
1357 "query text here long",
1358 &[],
1359 &cfg,
1360 &tc,
1361 0,
1362 false,
1363 None,
1364 None,
1365 )
1366 .await;
1367 let untagged_count = messages[1..]
1370 .iter()
1371 .filter(|m| m.metadata.fidelity_tag.is_none())
1372 .count();
1373 assert_eq!(
1374 untagged_count, 0,
1375 "all non-system messages must be scored when n <= max_scored_messages"
1376 );
1377 }
1378
1379 #[tokio::test]
1381 async fn compressed_uses_deferred_summary() {
1382 let scorer = FidelityScorer;
1383 let cfg = FidelityConfig {
1384 full_threshold: 2.0, compressed_threshold: 0.0, compressed_max_tokens: 5,
1387 ..make_cfg()
1388 };
1389 let tc = FixedTc(4);
1390 let mut msg_with_summary =
1391 make_msg(Role::User, "original long content that would be truncated");
1392 msg_with_summary.metadata.deferred_summary = Some("short summary".to_string());
1393 let mut messages = vec![make_msg(Role::System, "system"), msg_with_summary];
1394 scorer
1395 .score_and_apply(
1396 &mut messages,
1397 "query text here long",
1398 &[],
1399 &cfg,
1400 &tc,
1401 0,
1402 false,
1403 None,
1404 None,
1405 )
1406 .await;
1407 assert_eq!(
1408 messages[1].metadata.fidelity_tag,
1409 Some(ContextFidelity::Compressed)
1410 );
1411 assert_eq!(messages[1].content, "short summary");
1412 }
1413
1414 fn make_msg_with_fidelity(role: Role, content: &str, tag: Option<ContextFidelity>) -> Message {
1417 let mut m = make_msg(role, content);
1418 m.metadata.fidelity_tag = tag;
1419 m
1420 }
1421
1422 #[tokio::test]
1424 async fn floor_prevents_compressed_upgrade_to_full() {
1425 let scorer = FidelityScorer;
1426 let cfg = FidelityConfig {
1427 full_threshold: 0.0,
1429 compressed_threshold: -1.0,
1430 ..make_cfg()
1431 };
1432 let tc = FixedTc(4);
1433 let mut messages = vec![
1434 make_msg(Role::System, "system"),
1435 make_msg_with_fidelity(
1436 Role::User,
1437 "query text here long keyword",
1438 Some(ContextFidelity::Compressed),
1439 ),
1440 ];
1441 scorer
1442 .score_and_apply(
1443 &mut messages,
1444 "query text here long keyword",
1445 &[],
1446 &cfg,
1447 &tc,
1448 0,
1449 false,
1450 None,
1451 None,
1452 )
1453 .await;
1454 assert_eq!(
1455 messages[1].metadata.fidelity_tag,
1456 Some(ContextFidelity::Compressed),
1457 "Compressed floor must block upgrade to Full"
1458 );
1459 }
1460
1461 #[tokio::test]
1463 async fn floor_prevents_placeholder_upgrade_to_full() {
1464 let scorer = FidelityScorer;
1465 let cfg = FidelityConfig {
1466 full_threshold: 0.0,
1467 compressed_threshold: -1.0,
1468 ..make_cfg()
1469 };
1470 let tc = FixedTc(4);
1471 let mut messages = vec![
1472 make_msg(Role::System, "system"),
1473 make_msg_with_fidelity(
1474 Role::User,
1475 "query text here long keyword",
1476 Some(ContextFidelity::Placeholder),
1477 ),
1478 ];
1479 scorer
1480 .score_and_apply(
1481 &mut messages,
1482 "query text here long keyword",
1483 &[],
1484 &cfg,
1485 &tc,
1486 0,
1487 false,
1488 None,
1489 None,
1490 )
1491 .await;
1492 assert_eq!(
1493 messages[1].metadata.fidelity_tag,
1494 Some(ContextFidelity::Placeholder),
1495 "Placeholder floor must block upgrade to Full"
1496 );
1497 }
1498
1499 #[tokio::test]
1501 async fn floor_prevents_placeholder_upgrade_to_compressed() {
1502 let scorer = FidelityScorer;
1503 let cfg = FidelityConfig {
1505 full_threshold: 2.0,
1506 compressed_threshold: 0.0,
1507 ..make_cfg()
1508 };
1509 let tc = FixedTc(4);
1510 let mut messages = vec![
1511 make_msg(Role::System, "system"),
1512 make_msg_with_fidelity(
1513 Role::User,
1514 "message content",
1515 Some(ContextFidelity::Placeholder),
1516 ),
1517 ];
1518 scorer
1519 .score_and_apply(
1520 &mut messages,
1521 "query text here long",
1522 &[],
1523 &cfg,
1524 &tc,
1525 0,
1526 false,
1527 None,
1528 None,
1529 )
1530 .await;
1531 assert_eq!(
1532 messages[1].metadata.fidelity_tag,
1533 Some(ContextFidelity::Placeholder),
1534 "Placeholder floor must block upgrade to Compressed"
1535 );
1536 }
1537
1538 #[tokio::test]
1540 async fn floor_allows_further_downgrade() {
1541 let scorer = FidelityScorer;
1542 let cfg = FidelityConfig {
1543 full_threshold: 2.0,
1544 compressed_threshold: 2.0, ..make_cfg()
1546 };
1547 let tc = FixedTc(4);
1548 let mut messages = vec![
1549 make_msg(Role::System, "system"),
1550 make_msg_with_fidelity(
1551 Role::User,
1552 "some content",
1553 Some(ContextFidelity::Compressed),
1554 ),
1555 ];
1556 scorer
1557 .score_and_apply(
1558 &mut messages,
1559 "query text here long",
1560 &[],
1561 &cfg,
1562 &tc,
1563 0,
1564 false,
1565 None,
1566 None,
1567 )
1568 .await;
1569 assert_eq!(
1570 messages[1].metadata.fidelity_tag,
1571 Some(ContextFidelity::Placeholder),
1572 "downgrade from Compressed to Placeholder must be allowed"
1573 );
1574 }
1575
1576 #[tokio::test]
1578 async fn floor_no_constraint_when_none() {
1579 let scorer = FidelityScorer;
1580 let cfg = FidelityConfig {
1582 full_threshold: 0.0,
1583 compressed_threshold: -1.0,
1584 ..make_cfg()
1585 };
1586 let tc = FixedTc(4);
1587 let mut messages = vec![
1588 make_msg(Role::System, "system"),
1589 make_msg_with_fidelity(Role::User, "query text here long keyword", None),
1590 ];
1591 scorer
1592 .score_and_apply(
1593 &mut messages,
1594 "query text here long keyword",
1595 &[],
1596 &cfg,
1597 &tc,
1598 0,
1599 false,
1600 None,
1601 None,
1602 )
1603 .await;
1604 assert_eq!(
1605 messages[1].metadata.fidelity_tag,
1606 Some(ContextFidelity::Full),
1607 "None tag must not constrain scoring"
1608 );
1609 }
1610
1611 #[tokio::test]
1613 async fn allow_upgrade_bypasses_floor() {
1614 let scorer = FidelityScorer;
1615 let cfg = FidelityConfig {
1616 full_threshold: 0.0,
1617 compressed_threshold: -1.0,
1618 ..make_cfg()
1619 };
1620 let tc = FixedTc(4);
1621 let mut messages = vec![
1622 make_msg(Role::System, "system"),
1623 make_msg_with_fidelity(
1624 Role::User,
1625 "query text here long keyword",
1626 Some(ContextFidelity::Placeholder),
1627 ),
1628 ];
1629 scorer
1630 .score_and_apply(
1631 &mut messages,
1632 "query text here long keyword",
1633 &[],
1634 &cfg,
1635 &tc,
1636 0,
1637 true,
1638 None,
1639 None,
1640 )
1641 .await;
1642 assert_eq!(
1643 messages[1].metadata.fidelity_tag,
1644 Some(ContextFidelity::Full),
1645 "allow_upgrade=true must bypass the Placeholder floor"
1646 );
1647 }
1648
1649 #[test]
1653 fn truncate_no_op_below_limit() {
1654 let tc = FixedTc(1); let mut s = "hello".to_string(); truncate_to_tokens(&mut s, 10, &tc);
1657 assert_eq!(s, "hello");
1658 }
1659
1660 #[test]
1662 fn truncate_no_op_at_limit() {
1663 let tc = FixedTc(1);
1664 let mut s = "hello".to_string(); truncate_to_tokens(&mut s, 5, &tc);
1666 assert_eq!(s, "hello");
1667 }
1668
1669 #[test]
1671 fn truncate_minimal_one_over_limit() {
1672 let tc = FixedTc(1); let mut s = "abcdef".to_string(); truncate_to_tokens(&mut s, 5, &tc);
1675 assert!(
1676 tc.count_tokens(&s) <= 5,
1677 "result must fit in 5 tokens, got {}",
1678 tc.count_tokens(&s)
1679 );
1680 assert!(!s.is_empty(), "must keep prefix, not empty");
1681 }
1682
1683 #[test]
1685 fn truncate_preserves_90pct_of_limit() {
1686 let tc = FixedTc(1);
1688 let s_orig = "a".repeat(90);
1689 let mut s = s_orig.clone();
1690 truncate_to_tokens(&mut s, 100, &tc);
1691 assert_eq!(s, s_orig, "90% of limit must not be truncated");
1692 }
1693
1694 #[test]
1696 fn truncate_empty_string_no_op() {
1697 let tc = FixedTc(1);
1698 let mut s = String::new();
1699 truncate_to_tokens(&mut s, 5, &tc);
1700 assert!(s.is_empty());
1701 }
1702
1703 #[test]
1705 fn truncate_max_tokens_zero_clears_content() {
1706 let tc = FixedTc(1);
1707 let mut s = "hello world".to_string();
1708 truncate_to_tokens(&mut s, 0, &tc);
1709 assert!(s.is_empty(), "max_tokens=0 must clear content");
1710 }
1711
1712 #[test]
1714 fn truncate_multibyte_stays_on_char_boundary() {
1715 let tc = FixedTc(3);
1719 let mut s = "日本語".to_string();
1720 truncate_to_tokens(&mut s, 2, &tc);
1721 assert!(
1722 s.is_char_boundary(s.len()),
1723 "result must be on a valid char boundary"
1724 );
1725 assert!(tc.count_tokens(&s) <= 2);
1726 assert_eq!(s, "日本");
1727 }
1728
1729 #[tokio::test]
1731 async fn mixed_fidelity_tool_pair_floor_plus_atomicity() {
1732 let scorer = FidelityScorer;
1733 let cfg = FidelityConfig {
1735 full_threshold: 0.0,
1736 compressed_threshold: -1.0,
1737 ..make_cfg()
1738 };
1739 let tc = FixedTc(4);
1740 let tool_id = "tool-42".to_string();
1741
1742 let mut tool_use_msg = make_msg_with_fidelity(Role::Assistant, "call tool", None);
1743 tool_use_msg.parts = vec![MessagePart::ToolUse {
1744 id: tool_id.clone(),
1745 name: "shell".to_string(),
1746 input: serde_json::json!({}),
1747 }];
1748
1749 let mut tool_result_msg =
1750 make_msg_with_fidelity(Role::User, "result body", Some(ContextFidelity::Compressed));
1751 tool_result_msg.parts = vec![MessagePart::ToolResult {
1752 tool_use_id: tool_id.clone(),
1753 content: "output".to_string(),
1754 is_error: false,
1755 }];
1756
1757 let mut messages = vec![
1758 make_msg(Role::System, "system"),
1759 tool_use_msg,
1760 tool_result_msg,
1761 ];
1762
1763 scorer
1764 .score_and_apply(
1765 &mut messages,
1766 "query text here long",
1767 &[],
1768 &cfg,
1769 &tc,
1770 0,
1771 false,
1772 None,
1773 None,
1774 )
1775 .await;
1776
1777 let tag_use = messages[1].metadata.fidelity_tag;
1781 let tag_result = messages[2].metadata.fidelity_tag;
1782 assert_eq!(
1783 tag_use, tag_result,
1784 "tool pair must share the same fidelity level"
1785 );
1786 assert_eq!(
1787 tag_use,
1788 Some(ContextFidelity::Compressed),
1789 "atomicity must bring the tool-use down to the tool-result floor"
1790 );
1791 }
1792
1793 #[tokio::test]
1795 async fn compress_llm_path_stores_deferred_summary() {
1796 use zeph_llm::LlmError;
1797 use zeph_llm::provider::ChatStream;
1798
1799 #[derive(Debug)]
1800 struct MockProvider;
1801
1802 impl zeph_llm::provider::LlmProvider for MockProvider {
1803 async fn chat(&self, _messages: &[Message]) -> Result<String, LlmError> {
1804 Ok("summary text".to_string())
1805 }
1806
1807 async fn chat_stream(&self, _messages: &[Message]) -> Result<ChatStream, LlmError> {
1808 Err(LlmError::Unavailable)
1809 }
1810
1811 fn supports_streaming(&self) -> bool {
1812 false
1813 }
1814
1815 async fn embed(&self, _text: &str) -> Result<Vec<f32>, LlmError> {
1816 Err(LlmError::EmbedUnsupported {
1817 provider: "mock".into(),
1818 })
1819 }
1820
1821 fn supports_embeddings(&self) -> bool {
1822 false
1823 }
1824
1825 fn name(&self) -> &'static str {
1826 "mock"
1827 }
1828 }
1829
1830 let scorer = FidelityScorer;
1831 let cfg = FidelityConfig {
1832 enabled: true,
1833 full_threshold: 2.0,
1835 compressed_threshold: 0.0,
1836 compressed_max_tokens: 5,
1837 compress_provider: Some(ProviderName::new("mock")),
1838 ..make_cfg()
1839 };
1840 let tc = FixedTc(1);
1842 let content = "a".repeat(50); let mut messages = vec![
1844 make_msg(Role::System, "system"),
1845 make_msg(Role::User, &content),
1846 ];
1847
1848 let provider = MockProvider;
1849 scorer
1850 .score_and_apply(
1851 &mut messages,
1852 "some query text here",
1853 &[],
1854 &cfg,
1855 &tc,
1856 0,
1857 false,
1858 None,
1859 Some(&provider),
1860 )
1861 .await;
1862
1863 assert_eq!(
1864 messages[1].metadata.fidelity_tag,
1865 Some(ContextFidelity::Compressed),
1866 );
1867 assert!(
1869 tc.count_tokens(&messages[1].content) <= 5,
1870 "content must be capped to compressed_max_tokens after LLM summary"
1871 );
1872 assert_eq!(
1874 messages[1].metadata.deferred_summary,
1875 Some("summary text".to_string()),
1876 );
1877 }
1878
1879 #[tokio::test]
1881 async fn compress_llm_skipped_when_provider_none() {
1882 let scorer = FidelityScorer;
1883 let cfg = FidelityConfig {
1884 enabled: true,
1885 full_threshold: 2.0,
1886 compressed_threshold: 0.0,
1887 compressed_max_tokens: 5,
1888 compress_provider: Some(ProviderName::new("mock")),
1889 ..make_cfg()
1890 };
1891 let tc = FixedTc(1);
1892 let content = "a".repeat(50);
1893 let mut messages = vec![
1894 make_msg(Role::System, "system"),
1895 make_msg(Role::User, &content),
1896 ];
1897
1898 scorer
1899 .score_and_apply(
1900 &mut messages,
1901 "some query text here",
1902 &[],
1903 &cfg,
1904 &tc,
1905 0,
1906 false,
1907 None,
1908 None,
1909 )
1910 .await;
1911
1912 assert_eq!(
1913 messages[1].metadata.fidelity_tag,
1914 Some(ContextFidelity::Compressed),
1915 );
1916 assert!(
1918 messages[1].metadata.deferred_summary.is_none(),
1919 "deferred_summary must not be populated via truncation path"
1920 );
1921 assert!(
1923 messages[1].content.len() <= 5,
1924 "content must be truncated, got len={}",
1925 messages[1].content.len()
1926 );
1927 }
1928
1929 #[test]
1931 fn cosine_similarity_identical() {
1932 let v = vec![1.0f32, 0.0, 0.0];
1933 assert!((cosine_similarity(&v, &v) - 1.0).abs() < 1e-6);
1934 }
1935
1936 #[test]
1937 fn cosine_similarity_orthogonal() {
1938 let a = vec![1.0f32, 0.0, 0.0];
1939 let b = vec![0.0f32, 1.0, 0.0];
1940 assert!(cosine_similarity(&a, &b).abs() < 1e-6);
1941 }
1942
1943 #[test]
1944 fn cosine_similarity_zero_vector() {
1945 let a = vec![0.0f32, 0.0, 0.0];
1946 let b = vec![1.0f32, 0.0, 0.0];
1947 assert!(cosine_similarity(&a, &b).abs() < f32::EPSILON);
1948 }
1949
1950 #[test]
1951 fn cosine_similarity_empty() {
1952 assert!(cosine_similarity(&[], &[]).abs() < f32::EPSILON);
1953 }
1954
1955 #[test]
1956 fn cosine_similarity_dimension_mismatch() {
1957 let a = vec![1.0f32, 0.0];
1958 let b = vec![1.0f32, 0.0, 0.0];
1959 assert!(cosine_similarity(&a, &b).abs() < f32::EPSILON);
1960 }
1961
1962 #[tokio::test]
1969 async fn semantic_scoring_higher_for_similar_messages() {
1970 use zeph_llm::LlmError;
1971 use zeph_llm::provider::ChatStream;
1972
1973 #[derive(Debug)]
1974 struct EmbedMockProvider;
1975
1976 impl zeph_llm::provider::LlmProvider for EmbedMockProvider {
1977 async fn chat(&self, _: &[Message]) -> Result<String, LlmError> {
1978 Err(LlmError::Unavailable)
1979 }
1980 async fn chat_stream(&self, _: &[Message]) -> Result<ChatStream, LlmError> {
1981 Err(LlmError::Unavailable)
1982 }
1983 fn supports_streaming(&self) -> bool {
1984 false
1985 }
1986 async fn embed(&self, text: &str) -> Result<Vec<f32>, LlmError> {
1987 let v = if text.contains("cat")
1988 || text.contains("mat")
1989 || text.contains("feline")
1990 || text.contains("rug")
1991 {
1992 if text.contains("feline") || text.contains("rug") {
1993 vec![0.9f32, 0.1, 0.0]
1994 } else {
1995 vec![1.0f32, 0.0, 0.0]
1996 }
1997 } else {
1998 vec![0.0f32, 0.0, 1.0]
1999 };
2000 Ok(v)
2001 }
2002 fn supports_embeddings(&self) -> bool {
2003 true
2004 }
2005 fn name(&self) -> &'static str {
2006 "embed-mock"
2007 }
2008 }
2009
2010 let provider = EmbedMockProvider;
2011 let scorer = FidelityScorer;
2012 let cfg = FidelityConfig {
2013 enabled: true,
2014 semantic_scoring_provider: Some(ProviderName::new("embed-mock")),
2015 full_threshold: 0.0,
2019 compressed_threshold: 0.0,
2020 w_semantic: 1.0,
2021 w_temporal: 0.0,
2022 w_importance: 0.0,
2023 w_plan: 0.0,
2024 ..make_cfg()
2025 };
2026 let tc = FixedTc(4);
2027 let cat_msg = make_msg(Role::User, "The cat is on the mat");
2028 let feline_msg = make_msg(Role::User, "A feline rests on the rug");
2029 let stock_msg = make_msg(Role::User, "Stock prices fell today");
2030 let mut messages = vec![
2031 make_msg(Role::System, "system"),
2032 cat_msg,
2033 feline_msg,
2034 stock_msg,
2035 ];
2036
2037 scorer
2038 .score_and_apply(
2039 &mut messages,
2040 "cat mat",
2041 &[],
2042 &cfg,
2043 &tc,
2044 0,
2045 false,
2046 Some(&provider),
2047 None,
2048 )
2049 .await;
2050
2051 assert!(
2054 messages[1].metadata.embedding.is_some(),
2055 "cat message must have embedding"
2056 );
2057 assert!(
2058 messages[2].metadata.embedding.is_some(),
2059 "feline message must have embedding"
2060 );
2061 assert!(
2062 messages[3].metadata.embedding.is_some(),
2063 "stock message must have embedding"
2064 );
2065
2066 let query_emb = [1.0f32, 0.0, 0.0];
2068 let cat_emb = messages[1].metadata.embedding.as_ref().unwrap();
2069 let feline_emb = messages[2].metadata.embedding.as_ref().unwrap();
2070 let stock_emb = messages[3].metadata.embedding.as_ref().unwrap();
2071 assert!(
2072 cosine_similarity(cat_emb, &query_emb) > cosine_similarity(stock_emb, &query_emb),
2073 "cat message must be more similar to query than stock message"
2074 );
2075 assert!(
2076 cosine_similarity(feline_emb, &query_emb) > cosine_similarity(stock_emb, &query_emb),
2077 "feline message must be more similar to query than stock message"
2078 );
2079 }
2080
2081 #[tokio::test]
2083 async fn semantic_scoring_falls_back_to_keyword_when_provider_none() {
2084 let scorer = FidelityScorer;
2085 let cfg = FidelityConfig {
2086 enabled: true,
2087 semantic_scoring_provider: None,
2088 ..make_cfg()
2089 };
2090 let tc = FixedTc(4);
2091 let mut messages = vec![
2092 make_msg(Role::System, "system"),
2093 make_msg(Role::User, "cat mat keyword test"),
2094 make_msg(Role::User, "something unrelated here"),
2095 ];
2096
2097 scorer
2098 .score_and_apply(
2099 &mut messages,
2100 "query text here long",
2101 &[],
2102 &cfg,
2103 &tc,
2104 0,
2105 false,
2106 None,
2107 None,
2108 )
2109 .await;
2110
2111 for msg in &messages {
2113 assert!(
2114 msg.metadata.embedding.is_none(),
2115 "no embedding must be computed when provider is None"
2116 );
2117 }
2118 for msg in &messages[1..] {
2120 assert!(
2121 msg.metadata.fidelity_tag.is_some(),
2122 "all non-system messages must be scored via keyword path"
2123 );
2124 }
2125 }
2126
2127 #[tokio::test]
2129 async fn w_plan_produces_nonzero_score_for_matching_message() {
2130 use zeph_common::PlannedToolHint;
2131
2132 let scorer = FidelityScorer;
2133 let cfg = FidelityConfig {
2136 w_semantic: 0.0,
2137 w_temporal: 0.0,
2138 w_importance: 0.0,
2139 w_plan: 1.0,
2140 full_threshold: 0.5,
2141 compressed_threshold: 0.1,
2142 min_query_length: 100, ..make_cfg()
2144 };
2145 let tc = FixedTc(4);
2146
2147 let hint = PlannedToolHint::new("shell", vec!["cargo".to_string(), "build".to_string()], 1);
2149
2150 let mut messages = vec![
2153 make_msg(Role::System, "system prompt"),
2154 make_msg(Role::User, "run cargo build to compile"),
2155 make_msg(Role::User, "what is the weather today"),
2156 ];
2157
2158 scorer
2159 .score_and_apply(
2160 &mut messages,
2161 "q", &[hint],
2163 &cfg,
2164 &tc,
2165 0,
2166 false,
2167 None,
2168 None,
2169 )
2170 .await;
2171
2172 assert_eq!(
2174 messages[1].metadata.fidelity_tag,
2175 Some(ContextFidelity::Full),
2176 "message matching planned tool keywords must reach Full fidelity"
2177 );
2178
2179 assert_ne!(
2181 messages[2].metadata.fidelity_tag,
2182 Some(ContextFidelity::Full),
2183 "message with no keyword overlap must not reach Full fidelity via w_plan"
2184 );
2185 }
2186
2187 #[test]
2190 fn truncate_to_byte_limit_no_op_when_short() {
2191 assert_eq!(truncate_to_byte_limit("hello", 10), "hello");
2192 }
2193
2194 #[test]
2195 fn truncate_to_byte_limit_exact_limit_no_op() {
2196 assert_eq!(truncate_to_byte_limit("hello", 5), "hello");
2197 }
2198
2199 #[test]
2200 fn truncate_to_byte_limit_over_limit() {
2201 let s = truncate_to_byte_limit("abcdefgh", 5);
2202 assert_eq!(s.len(), 5);
2203 assert_eq!(s, "abcde");
2204 }
2205
2206 #[test]
2207 fn truncate_to_byte_limit_multibyte_boundary() {
2208 let s = truncate_to_byte_limit("日本語", 6);
2210 assert!(s.is_char_boundary(s.len()));
2211 assert_eq!(s, "日本");
2212 }
2213
2214 #[test]
2217 fn apply_input_cap_no_op_below_limit() {
2218 let mut s = "hello".to_string();
2219 apply_input_cap(&mut s, 10); assert_eq!(s, "hello");
2221 }
2222
2223 #[test]
2224 fn apply_input_cap_truncates_over_limit() {
2225 let mut s = "abcdefgh".to_string();
2227 apply_input_cap(&mut s, 1);
2228 assert_eq!(s, "abcd");
2229 }
2230
2231 #[test]
2232 fn apply_input_cap_multibyte() {
2233 let mut s = "日本語".to_string();
2236 apply_input_cap(&mut s, 1);
2237 assert!(s.is_char_boundary(s.len()));
2238 assert_eq!(s, "日");
2239 }
2240
2241 #[tokio::test]
2244 async fn embed_prepass_returns_embeddings_for_non_exempt() {
2245 let messages = vec![
2246 make_msg(Role::System, "system prompt"), make_msg(Role::User, "user message"),
2248 make_msg(Role::Assistant, "assistant reply"),
2249 ];
2250 let cfg = FidelityConfig::default();
2251 let embed = |_text: &str| -> EmbedFuture { Box::pin(async { Ok(vec![1.0f32, 2.0, 3.0]) }) };
2252 let result = embed_prepass(&messages, &embed, &cfg, 0).await;
2253 assert!(!result.contains_key(&0));
2255 assert_eq!(result[&1], vec![1.0, 2.0, 3.0]);
2256 assert_eq!(result[&2], vec![1.0, 2.0, 3.0]);
2257 }
2258
2259 #[tokio::test]
2260 async fn embed_prepass_skips_empty_content() {
2261 let messages = vec![make_msg(Role::System, "system"), make_msg(Role::User, "")];
2262 let cfg = FidelityConfig::default();
2263 let embed = |_text: &str| -> EmbedFuture { Box::pin(async { Ok(vec![1.0f32]) }) };
2264 let result = embed_prepass(&messages, &embed, &cfg, 0).await;
2265 assert!(!result.contains_key(&1), "empty content must be skipped");
2266 }
2267
2268 #[tokio::test]
2269 async fn embed_prepass_skips_inserted_memory() {
2270 let messages = vec![
2271 make_msg(Role::System, "system"),
2272 make_msg(Role::User, "injected memory"),
2273 make_msg(Role::User, "real user message"),
2274 ];
2275 let cfg = FidelityConfig::default();
2276 let embed = |_text: &str| -> EmbedFuture { Box::pin(async { Ok(vec![1.0f32]) }) };
2277 let result = embed_prepass(&messages, &embed, &cfg, 1).await;
2279 assert!(!result.contains_key(&0), "system is exempt");
2280 assert!(!result.contains_key(&1), "inserted memory is exempt");
2281 assert!(
2282 result.contains_key(&2),
2283 "real user message must be embedded"
2284 );
2285 }
2286
2287 #[tokio::test]
2288 async fn embed_prepass_silently_skips_errors() {
2289 let messages = vec![
2290 make_msg(Role::System, "system"),
2291 make_msg(Role::User, "user"),
2292 ];
2293 let cfg = FidelityConfig::default();
2294 let embed = |_text: &str| -> EmbedFuture {
2295 Box::pin(async {
2296 Err(zeph_llm::LlmError::EmbedUnsupported {
2297 provider: "mock".to_string(),
2298 })
2299 })
2300 };
2301 let result = embed_prepass(&messages, &embed, &cfg, 0).await;
2302 assert!(result.is_empty(), "errors must be silently skipped");
2303 }
2304
2305 #[tokio::test]
2306 async fn embed_prepass_truncates_content_when_cap_set() {
2307 let long_content = "a".repeat(100);
2309 let messages = vec![
2310 make_msg(Role::System, "system"),
2311 make_msg(Role::User, &long_content),
2312 ];
2313 let cfg = FidelityConfig {
2314 max_embed_input_tokens: Some(1), ..FidelityConfig::default()
2316 };
2317 let seen_len = std::sync::Arc::new(std::sync::Mutex::new(0usize));
2318 let seen_len_clone = seen_len.clone();
2319 let embed = move |text: &str| -> EmbedFuture {
2320 let len = text.len();
2321 let seen = seen_len_clone.clone();
2322 Box::pin(async move {
2323 *seen.lock().unwrap() = len;
2324 Ok(vec![1.0f32])
2325 })
2326 };
2327 embed_prepass(&messages, &embed, &cfg, 0).await;
2328 assert_eq!(
2329 *seen_len.lock().unwrap(),
2330 4,
2331 "content must be truncated to max_embed_input_tokens * 4 bytes"
2332 );
2333 }
2334
2335 #[tokio::test]
2336 async fn embed_prepass_concurrency_zero_clamped_to_one() {
2337 let messages = vec![
2339 make_msg(Role::System, "system"),
2340 make_msg(Role::User, "user message"),
2341 ];
2342 let cfg = FidelityConfig {
2343 embed_concurrency: 0,
2344 ..FidelityConfig::default()
2345 };
2346 let embed = |_text: &str| -> EmbedFuture { Box::pin(async { Ok(vec![1.0f32]) }) };
2347 let result = embed_prepass(&messages, &embed, &cfg, 0).await;
2348 assert!(
2349 result.contains_key(&1),
2350 "result must be produced even with concurrency=0"
2351 );
2352 }
2353
2354 #[tokio::test]
2355 async fn embed_prepass_skips_cached_embeddings() {
2356 let mut msg_with_cache = make_msg(Role::User, "already embedded");
2357 msg_with_cache.metadata.embedding = Some(vec![9.0f32]);
2358 let messages = vec![
2359 make_msg(Role::System, "system"),
2360 msg_with_cache,
2361 make_msg(Role::User, "needs embedding"),
2362 ];
2363 let cfg = FidelityConfig::default();
2364 let embed = |_text: &str| -> EmbedFuture { Box::pin(async { Ok(vec![1.0f32]) }) };
2365 let result = embed_prepass(&messages, &embed, &cfg, 0).await;
2366 assert!(
2367 !result.contains_key(&1),
2368 "message with cached embedding must be skipped"
2369 );
2370 assert!(
2371 result.contains_key(&2),
2372 "message without embedding must be processed"
2373 );
2374 }
2375
2376 #[tokio::test(start_paused = true)]
2377 async fn embed_prepass_timeout_skips_message() {
2378 let messages = vec![
2379 make_msg(Role::System, "system"),
2380 make_msg(Role::User, "user"),
2381 ];
2382 let cfg = FidelityConfig::default();
2383 let embed = |_text: &str| -> EmbedFuture {
2384 Box::pin(async {
2385 tokio::time::sleep(Duration::from_secs(45)).await;
2387 Ok(vec![1.0f32])
2388 })
2389 };
2390 let result = embed_prepass(&messages, &embed, &cfg, 0).await;
2392 assert!(result.is_empty(), "timed-out embed must be skipped");
2393 }
2394
2395 #[tokio::test(start_paused = true)]
2396 async fn embed_prepass_custom_timeout_respected() {
2397 let messages = vec![
2398 make_msg(Role::System, "system"),
2399 make_msg(Role::User, "user"),
2400 ];
2401 let cfg = FidelityConfig {
2403 embed_timeout_secs: 5,
2404 ..FidelityConfig::default()
2405 };
2406 let embed = |_text: &str| -> EmbedFuture {
2407 Box::pin(async {
2408 tokio::time::sleep(Duration::from_secs(10)).await;
2409 Ok(vec![1.0f32])
2410 })
2411 };
2412 let result = embed_prepass(&messages, &embed, &cfg, 0).await;
2413 assert!(
2414 result.is_empty(),
2415 "custom embed_timeout_secs must be honored"
2416 );
2417 }
2418
2419 #[test]
2422 fn fidelity_config_new_fields_defaults() {
2423 let cfg = FidelityConfig::default();
2424 assert_eq!(cfg.embed_concurrency, 32);
2425 assert!(cfg.max_embed_input_tokens.is_none());
2426 assert!(cfg.max_compress_input_tokens.is_none());
2427 }
2428
2429 #[test]
2430 fn fidelity_config_timeout_defaults() {
2431 let cfg = FidelityConfig::default();
2432 assert_eq!(cfg.embed_timeout_secs, 30);
2433 assert_eq!(cfg.compress_timeout_secs, 30);
2434 }
2435
2436 #[test]
2437 fn fidelity_config_new_fields_custom() {
2438 let cfg = FidelityConfig {
2439 embed_concurrency: 8,
2440 max_embed_input_tokens: Some(512),
2441 max_compress_input_tokens: Some(1024),
2442 ..FidelityConfig::default()
2443 };
2444 assert_eq!(cfg.embed_concurrency, 8);
2445 assert_eq!(cfg.max_embed_input_tokens, Some(512));
2446 assert_eq!(cfg.max_compress_input_tokens, Some(1024));
2447 }
2448
2449 #[tokio::test]
2451 async fn render_compressed_truncates_oversized_deferred_summary() {
2452 let scorer = FidelityScorer;
2453 let cfg = FidelityConfig {
2456 full_threshold: 2.0,
2457 compressed_threshold: 0.0,
2458 compressed_max_tokens: 3,
2459 ..make_cfg()
2460 };
2461 let tc = FixedTc(1);
2462 let mut msg = make_msg(Role::User, "original long content");
2463 msg.metadata.deferred_summary = Some("ten chars!".to_string());
2465 let mut messages = vec![make_msg(Role::System, "sys"), msg];
2466 scorer
2467 .score_and_apply(
2468 &mut messages,
2469 "query text here long",
2470 &[],
2471 &cfg,
2472 &tc,
2473 0,
2474 false,
2475 None,
2476 None,
2477 )
2478 .await;
2479 let compressed = &messages[1];
2480 assert_eq!(
2481 compressed.metadata.fidelity_tag,
2482 Some(ContextFidelity::Compressed)
2483 );
2484 assert!(
2485 tc.count_tokens(&compressed.content) <= 3,
2486 "deferred_summary result must be truncated to compressed_max_tokens"
2487 );
2488 }
2489
2490 #[tokio::test]
2492 async fn render_compressed_applies_max_compress_input_tokens() {
2493 let scorer = FidelityScorer;
2494 let cfg = FidelityConfig {
2497 full_threshold: 2.0,
2498 compressed_threshold: 0.0,
2499 compressed_max_tokens: 100,
2500 max_compress_input_tokens: Some(2), ..make_cfg()
2502 };
2503 let tc = FixedTc(1);
2504 let content_20 = "a".repeat(20); let mut msg = make_msg(Role::User, &content_20);
2506 let mut messages = vec![make_msg(Role::System, "sys"), msg.clone()];
2508 scorer
2509 .score_and_apply(
2510 &mut messages,
2511 "query text here long",
2512 &[],
2513 &cfg,
2514 &tc,
2515 0,
2516 false,
2517 None,
2518 None,
2519 )
2520 .await;
2521 let compressed = &messages[1];
2522 assert_eq!(
2523 compressed.metadata.fidelity_tag,
2524 Some(ContextFidelity::Compressed)
2525 );
2526 assert_eq!(
2528 compressed.content.len(),
2529 8,
2530 "content must be capped to max_compress_input_tokens * 4 bytes"
2531 );
2532 msg.metadata.deferred_summary = Some("short".to_string());
2534 let mut messages2 = vec![make_msg(Role::System, "sys"), msg];
2535 scorer
2536 .score_and_apply(
2537 &mut messages2,
2538 "query text here long",
2539 &[],
2540 &cfg,
2541 &tc,
2542 0,
2543 false,
2544 None,
2545 None,
2546 )
2547 .await;
2548 assert_eq!(
2549 messages2[1].content, "short",
2550 "deferred_summary must bypass input cap"
2551 );
2552 }
2553}