1use std::time::Duration;
23
24use futures::StreamExt as _;
25use tracing::info_span;
26use zeph_common::memory::TokenCounting;
27use zeph_common::{ContextFidelity, PlannedToolHint};
28use zeph_llm::LlmProviderDyn;
29use zeph_llm::provider::{EmbedFuture, Message, MessageMetadata, MessagePart, Role};
30
31use crate::assembler::CORRECTIONS_PREFIX;
32
33pub use zeph_config::FidelityConfig;
35
36pub async fn embed_prepass<F>(
73 messages: &[Message],
74 embed: &F,
75 config: &FidelityConfig,
76 inserted_count: usize,
77) -> std::collections::HashMap<usize, Vec<f32>>
78where
79 F: Fn(&str) -> EmbedFuture + Send + Sync,
80{
81 let concurrency = if config.embed_concurrency == 0 {
82 tracing::warn!(
83 "embed_concurrency is 0, clamping to 1; set a positive value in [context.fidelity]"
84 );
85 1
86 } else {
87 config.embed_concurrency
88 };
89
90 let tasks = messages.iter().enumerate().filter_map(|(i, msg)| {
91 if is_exempt(msg, i, inserted_count)
92 || msg.content.is_empty()
93 || msg.metadata.embedding.is_some()
94 {
95 return None;
96 }
97 let content = match config.max_embed_input_tokens {
98 Some(n) => truncate_to_byte_limit(&msg.content, n.saturating_mul(4)),
99 None => msg.content.clone(),
100 };
101 Some((i, content))
102 });
103
104 futures::stream::iter(tasks)
105 .map(|(i, content)| async move {
106 let result = tokio::time::timeout(Duration::from_secs(30), embed(&content)).await;
107 match result {
108 Ok(Ok(vec)) => Some((i, vec)),
109 Ok(Err(e)) => {
110 tracing::debug!(idx = i, err = %e, "embed_prepass: embed failed, skipping");
111 None
112 }
113 Err(_) => {
114 tracing::warn!(idx = i, "embed_prepass: embed timed out, skipping");
115 None
116 }
117 }
118 })
119 .buffer_unordered(concurrency)
120 .filter_map(|opt| async move { opt })
121 .collect()
122 .await
123}
124
125fn truncate_to_byte_limit(s: &str, max_bytes: usize) -> String {
134 if s.len() <= max_bytes {
135 return s.to_string();
136 }
137 let boundary = s.floor_char_boundary(max_bytes);
138 s[..boundary].to_string()
139}
140
141struct FidelityScore {
142 score: f32,
143 level: ContextFidelity,
144 original_tokens: u32,
145}
146
147pub struct FidelityScorer;
173
174impl FidelityScorer {
175 #[allow(clippy::too_many_arguments)]
207 pub async fn score_and_apply(
208 &self,
209 messages: &mut Vec<Message>,
210 query: &str,
211 planned_tools: &[PlannedToolHint],
212 config: &FidelityConfig,
213 tc: &dyn TokenCounting,
214 inserted_count: usize,
215 allow_upgrade: bool,
216 embed_provider: Option<&dyn LlmProviderDyn>,
217 compress_provider: Option<&dyn LlmProviderDyn>,
218 ) {
219 if !config.enabled || messages.is_empty() {
220 return;
221 }
222
223 let query_embedding: Option<Vec<f32>> = if let (true, Some(p)) =
225 (config.semantic_scoring_provider.is_some(), embed_provider)
226 && p.supports_embeddings()
227 {
228 let _span = info_span!("context.fidelity.embed_query").entered();
229 match tokio::time::timeout(Duration::from_secs(30), p.embed(query)).await {
230 Ok(Ok(v)) => Some(v),
231 Ok(Err(e)) => {
232 tracing::warn!(error = %e, "semantic scoring provider unavailable, falling back to keyword");
233 None
234 }
235 Err(_) => {
236 tracing::warn!("fidelity query embed timed out, falling back to keyword");
237 None
238 }
239 }
240 } else {
241 None
242 };
243
244 if let (Some(q_emb), Some(p)) = (&query_embedding, embed_provider) {
248 let n = messages.len();
249 let score_end = if n > config.max_scored_messages {
250 n.saturating_sub(config.exempt_tail_messages)
251 } else {
252 n
253 };
254 let concurrency = if config.embed_concurrency == 0 {
255 1
256 } else {
257 config.embed_concurrency
258 };
259 let _span = info_span!("context.fidelity.embed_prepass").entered();
260 let embeddings: std::collections::HashMap<usize, Vec<f32>> =
261 futures::stream::iter(messages[..score_end].iter().enumerate().filter_map(
262 |(i, msg)| {
263 if msg.metadata.embedding.is_none()
264 && !is_exempt(msg, i, inserted_count)
265 && !msg.content.is_empty()
266 {
267 let content = match config.max_embed_input_tokens {
268 Some(n) => {
269 truncate_to_byte_limit(&msg.content, n.saturating_mul(4))
270 }
271 None => msg.content.clone(),
272 };
273 Some((i, content))
274 } else {
275 None
276 }
277 },
278 ))
279 .map(|(i, content)| async move {
280 let result =
281 tokio::time::timeout(Duration::from_secs(30), p.embed(&content)).await;
282 match result {
283 Ok(Ok(v)) => Some((i, v)),
284 Ok(Err(e)) => {
285 tracing::warn!(error = %e, "message embed failed, skipping");
286 None
287 }
288 Err(_) => {
289 tracing::warn!(idx = i, "fidelity message embed timed out, skipping");
290 None
291 }
292 }
293 })
294 .buffer_unordered(concurrency)
295 .filter_map(|opt| async move { opt })
296 .collect()
297 .await;
298 for (i, emb) in embeddings {
299 messages[i].metadata.embedding = Some(emb);
300 }
301 let _ = q_emb; }
303
304 let scores = compute_scores(
305 messages,
306 query,
307 planned_tools,
308 config,
309 tc,
310 inserted_count,
311 allow_upgrade,
312 query_embedding.as_deref(),
313 );
314 apply_scores(messages, &scores, config, tc, compress_provider).await;
315
316 let _merge_span = info_span!("context.fidelity.merge").entered();
317 let merged_count = merge_consecutive_placeholders(messages);
318 tracing::debug!(merged_count, "fidelity merge complete");
319 }
320}
321
322#[allow(clippy::too_many_arguments)]
323fn compute_scores(
324 messages: &[Message],
325 query: &str,
326 planned_tools: &[PlannedToolHint],
327 config: &FidelityConfig,
328 tc: &dyn TokenCounting,
329 inserted_count: usize,
330 allow_upgrade: bool,
331 query_embedding: Option<&[f32]>,
332) -> Vec<Option<FidelityScore>> {
333 let n = messages.len();
334
335 let score_end = if n > config.max_scored_messages {
337 n.saturating_sub(config.exempt_tail_messages)
338 } else {
339 n
340 };
341
342 let semantic_active = query.len() >= config.min_query_length;
343 let plan_active = !planned_tools.is_empty();
344 let query_words: std::collections::HashSet<&str> = if semantic_active {
346 query.split_whitespace().collect()
347 } else {
348 std::collections::HashSet::default()
349 };
350
351 let mut weight_sum = config.w_temporal + config.w_importance;
353 if semantic_active {
354 weight_sum += config.w_semantic;
355 }
356 if plan_active {
357 weight_sum += config.w_plan;
358 }
359 if weight_sum <= 0.0 {
360 weight_sum = 1.0;
361 }
362
363 #[allow(clippy::cast_precision_loss)]
364 let max_dist = score_end.saturating_sub(1) as f32;
365
366 let mut scores: Vec<Option<FidelityScore>> = (0..n).map(|_| None).collect();
367
368 for (i, msg) in messages.iter().enumerate().take(score_end) {
369 if is_exempt(msg, i, inserted_count) {
370 continue;
371 }
372
373 #[allow(clippy::cast_possible_truncation)]
374 let original_tokens = tc.count_tokens(&msg.content) as u32;
375
376 #[allow(clippy::cast_precision_loss)]
379 let temporal = if max_dist > 0.0 {
380 let distance_from_end = (score_end - 1 - i) as f32;
381 1.0 - distance_from_end / max_dist
382 } else {
383 1.0
384 };
385 let importance = if msg
387 .parts
388 .iter()
389 .any(|p| matches!(p, MessagePart::ToolResult { .. }))
390 {
391 0.4
392 } else {
393 role_weight(msg.role)
394 };
395 let semantic = if semantic_active {
396 match (query_embedding, msg.metadata.embedding.as_deref()) {
397 (Some(q_emb), Some(m_emb)) => semantic_overlap(m_emb, q_emb),
398 _ => keyword_overlap(&msg.content, &query_words),
399 }
400 } else {
401 0.0
402 };
403 let plan = if plan_active {
404 plan_relevance(&msg.content, planned_tools)
405 } else {
406 0.0
407 };
408
409 let raw = config.w_temporal * temporal
410 + config.w_importance * importance
411 + if semantic_active {
412 config.w_semantic * semantic
413 } else {
414 0.0
415 }
416 + if plan_active {
417 config.w_plan * plan
418 } else {
419 0.0
420 };
421
422 let score = (raw / weight_sum).clamp(0.0, 1.0);
423 let candidate_level = score_to_level(score, config);
424
425 let level = if allow_upgrade {
430 candidate_level
431 } else {
432 match msg.metadata.fidelity_tag {
433 Some(ContextFidelity::Placeholder) => ContextFidelity::Placeholder,
434 Some(ContextFidelity::Compressed) => {
435 if candidate_level == ContextFidelity::Full {
436 ContextFidelity::Compressed
437 } else {
438 candidate_level
439 }
440 }
441 _ => candidate_level,
442 }
443 };
444
445 scores[i] = Some(FidelityScore {
446 score,
447 level,
448 original_tokens,
449 });
450 }
451
452 apply_tool_pair_atomicity(messages, &mut scores, config);
453 scores
454}
455
456async fn apply_scores(
457 messages: &mut [Message],
458 scores: &[Option<FidelityScore>],
459 config: &FidelityConfig,
460 tc: &dyn TokenCounting,
461 provider: Option<&dyn LlmProviderDyn>,
462) {
463 let _apply_span = info_span!("context.fidelity.apply").entered();
464 let (mut full_count, mut compressed_count, mut placeholder_count, mut tokens_saved) =
465 (0u32, 0u32, 0u32, 0u32);
466
467 for (i, msg) in messages.iter_mut().enumerate() {
468 let Some(ref fs) = scores[i] else { continue };
469 match fs.level {
470 ContextFidelity::Compressed => {
471 #[allow(clippy::cast_possible_truncation)]
472 let original_tokens = fs.original_tokens;
473 render_compressed(msg, config, tc, provider).await;
474 #[allow(clippy::cast_possible_truncation)]
475 let new_tokens = tc.count_tokens(&msg.content) as u32;
476 tokens_saved += original_tokens.saturating_sub(new_tokens);
477 compressed_count += 1;
478 }
479 ContextFidelity::Placeholder => {
480 render_placeholder(msg, fs.score, fs.original_tokens);
481 placeholder_count += 1;
482 }
483 _ => {
485 msg.metadata.fidelity_tag = Some(ContextFidelity::Full);
486 full_count += 1;
487 }
488 }
489 }
490
491 tracing::debug!(
492 full_count,
493 compressed_count,
494 placeholder_count,
495 tokens_saved,
496 "fidelity apply complete"
497 );
498}
499
500fn is_exempt(msg: &Message, idx: usize, inserted_count: usize) -> bool {
501 (idx == 0 && msg.role == Role::System)
506 || msg.metadata.focus_pinned
507 || msg.content.starts_with(CORRECTIONS_PREFIX)
508 || (idx >= 1 && idx < 1 + inserted_count)
509}
510
511fn role_weight(role: Role) -> f32 {
512 match role {
513 Role::System => 1.0,
514 Role::User => 0.8,
515 Role::Assistant => 0.6,
516 }
517}
518
519fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
520 if a.len() != b.len() || a.is_empty() {
521 return 0.0;
522 }
523 let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
524 let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
525 let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
526 if norm_a == 0.0 || norm_b == 0.0 {
527 return 0.0;
528 }
529 (dot / (norm_a * norm_b)).clamp(0.0, 1.0)
530}
531
532fn semantic_overlap(msg_embedding: &[f32], query_embedding: &[f32]) -> f32 {
533 cosine_similarity(msg_embedding, query_embedding)
534}
535
536fn keyword_overlap(content: &str, query_words: &std::collections::HashSet<&str>) -> f32 {
540 let content_words: std::collections::HashSet<&str> = content.split_whitespace().collect();
541 let min_len = content_words.len().min(query_words.len());
542 if min_len == 0 {
543 return 0.0;
544 }
545 #[allow(clippy::cast_precision_loss)]
546 let result = content_words.intersection(query_words).count() as f32 / min_len as f32;
547 result.clamp(0.0, 1.0)
548}
549
550fn plan_relevance(content: &str, planned_tools: &[PlannedToolHint]) -> f32 {
554 if planned_tools.is_empty() {
555 return 0.0;
556 }
557 let content_words: std::collections::HashSet<&str> = content.split_whitespace().collect();
558 let mut weighted_sum = 0.0f32;
559 let mut weight_total = 0.0f32;
560 for hint in planned_tools {
561 let dist = f32::from(hint.distance_from_current.max(1));
562 let weight = 1.0 / dist;
563 weight_total += weight;
564 let hint_words: std::collections::HashSet<&str> =
565 hint.keywords.iter().map(String::as_str).collect();
566 let min_len = content_words.len().min(hint_words.len());
567 if min_len == 0 {
568 continue;
569 }
570 #[allow(clippy::cast_precision_loss)]
571 let overlap = content_words.intersection(&hint_words).count() as f32 / min_len as f32;
572 weighted_sum += weight * overlap.clamp(0.0, 1.0);
573 }
574 if weight_total <= 0.0 {
575 return 0.0;
576 }
577 (weighted_sum / weight_total).clamp(0.0, 1.0)
578}
579
580fn apply_tool_pair_atomicity(
585 messages: &[Message],
586 scores: &mut [Option<FidelityScore>],
587 config: &FidelityConfig,
588) {
589 let mut tool_result_map: std::collections::HashMap<&str, usize> =
591 std::collections::HashMap::new();
592 for (i, msg) in messages.iter().enumerate() {
593 for part in &msg.parts {
594 if let MessagePart::ToolResult { tool_use_id, .. } = part {
595 tool_result_map.insert(tool_use_id.as_str(), i);
596 }
597 }
598 }
599
600 for (i, msg) in messages.iter().enumerate().rev() {
602 for part in &msg.parts {
603 if let MessagePart::ToolUse { id, .. } = part
604 && let Some(&result_idx) = tool_result_map.get(id.as_str())
605 {
606 let score_a = scores[i].as_ref().map_or(1.0, |s| s.score);
607 let score_b = scores[result_idx].as_ref().map_or(1.0, |s| s.score);
608 let min_score = score_a.min(score_b);
609
610 let level_a = scores[i]
614 .as_ref()
615 .map_or(ContextFidelity::Full, |s| s.level);
616 let level_b = scores[result_idx]
617 .as_ref()
618 .map_or(ContextFidelity::Full, |s| s.level);
619 let float_level = score_to_level(min_score, config);
620 let min_level = more_restrictive(more_restrictive(level_a, level_b), float_level);
621
622 let tokens_a = scores[i].as_ref().map_or(0, |s| s.original_tokens);
623 let tokens_b = scores[result_idx].as_ref().map_or(0, |s| s.original_tokens);
624 scores[i] = Some(FidelityScore {
625 score: min_score,
626 level: min_level,
627 original_tokens: tokens_a,
628 });
629 scores[result_idx] = Some(FidelityScore {
630 score: min_score,
631 level: min_level,
632 original_tokens: tokens_b,
633 });
634 }
635 }
636 }
637}
638
639fn more_restrictive(a: ContextFidelity, b: ContextFidelity) -> ContextFidelity {
643 use ContextFidelity::{Compressed, Full, Placeholder};
644 match (a, b) {
645 (Placeholder, _) | (_, Placeholder) => Placeholder,
646 (Compressed, _) | (_, Compressed) => Compressed,
647 _ => Full,
648 }
649}
650
651fn score_to_level(score: f32, config: &FidelityConfig) -> ContextFidelity {
652 if score >= config.full_threshold {
653 ContextFidelity::Full
654 } else if score >= config.compressed_threshold {
655 ContextFidelity::Compressed
656 } else {
657 ContextFidelity::Placeholder
658 }
659}
660
661async fn render_compressed(
662 msg: &mut Message,
663 config: &FidelityConfig,
664 tc: &dyn TokenCounting,
665 provider: Option<&dyn LlmProviderDyn>,
666) {
667 if let Some(summary) = msg.metadata.deferred_summary.take() {
669 msg.content = summary;
670 } else if config.compress_provider.is_some()
671 && let Some(p) = provider
672 {
673 let input_tokens = tc.count_tokens(&msg.content);
675 if input_tokens > config.compressed_max_tokens * 2 && input_tokens > 0 {
677 if let Some(max_in) = config.max_compress_input_tokens {
679 apply_input_cap(&mut msg.content, max_in);
680 }
681
682 let prompt = format!(
683 "Summarize in {} tokens or fewer: {}",
684 config.compressed_max_tokens, msg.content
685 );
686 let req = vec![Message {
687 role: Role::User,
688 content: prompt,
689 parts: vec![],
690 metadata: MessageMetadata::default(),
691 }];
692
693 let span = info_span!(
694 "context.fidelity.compress_llm",
695 input_tokens,
696 cached = false,
697 );
698 let result = {
699 let _enter = span.enter();
700 tokio::time::timeout(Duration::from_secs(30), p.chat(&req)).await
701 };
702
703 match result {
704 Ok(Ok(summary)) => {
705 msg.metadata.deferred_summary = Some(summary.clone());
706 msg.content = summary;
707 }
708 Ok(Err(e)) => {
709 tracing::debug!(error = %e, "compress_llm failed, falling back to truncation");
710 }
711 Err(_) => {
712 tracing::warn!("compress_llm timed out, falling back to truncation");
713 }
714 }
715 }
716 } else if let Some(max_in) = config.max_compress_input_tokens {
717 apply_input_cap(&mut msg.content, max_in);
719 }
720
721 truncate_to_tokens(&mut msg.content, config.compressed_max_tokens, tc);
724 msg.parts.clear();
725 msg.metadata.fidelity_tag = Some(ContextFidelity::Compressed);
726}
727
728pub fn apply_input_cap(content: &mut String, max_tokens: usize) {
736 let max_bytes = max_tokens.saturating_mul(4);
737 if content.len() > max_bytes {
738 let boundary = content.floor_char_boundary(max_bytes);
739 content.truncate(boundary);
740 }
741}
742
743fn truncate_to_tokens(content: &mut String, max_tokens: usize, tc: &dyn TokenCounting) {
744 if tc.count_tokens(content) <= max_tokens {
745 return;
746 }
747 let mut lo: usize = 0;
752 let mut hi: usize = content.len();
753 while hi - lo > 1 {
754 let mid = content.floor_char_boundary(usize::midpoint(lo, hi));
755 if mid == lo {
756 hi = mid;
759 } else if tc.count_tokens(&content[..mid]) <= max_tokens {
760 lo = mid;
761 } else {
762 hi = mid;
763 }
764 }
765 content.truncate(lo);
766}
767
768fn render_placeholder(msg: &mut Message, score: f32, original_tokens: u32) {
769 let role_str = match msg.role {
770 Role::System => "system",
771 Role::User => "user",
772 Role::Assistant => "assistant",
773 };
774 msg.content = format!(
775 "[placeholder: role={role_str}, original_tokens={original_tokens}, importance={score:.2}]"
776 );
777 msg.parts.clear();
778 msg.metadata.fidelity_tag = Some(ContextFidelity::Placeholder);
779}
780
781fn merge_consecutive_placeholders(messages: &mut Vec<Message>) -> usize {
785 let mut merged_count = 0usize;
786 let mut i = 0;
787 while i < messages.len() {
788 if messages[i].metadata.fidelity_tag != Some(ContextFidelity::Placeholder)
789 || messages[i].role == Role::System
790 {
791 i += 1;
792 continue;
793 }
794 let role = messages[i].role;
795 let mut j = i + 1;
796 while j < messages.len()
797 && messages[j].metadata.fidelity_tag == Some(ContextFidelity::Placeholder)
798 && messages[j].role == role
799 {
800 j += 1;
801 }
802 if j - i <= 1 {
803 i += 1;
804 continue;
805 }
806 let count = j - i;
807 let mut total_tokens = 0u32;
808 let mut importance_sum = 0.0f32;
809 for msg in &messages[i..j] {
810 total_tokens += parse_placeholder_tokens(&msg.content);
811 importance_sum += parse_placeholder_importance(&msg.content);
812 }
813 debug_assert!(count >= 2, "placeholder merge triggered with count={count}");
814 #[allow(clippy::cast_precision_loss)]
815 let avg_importance = if count > 0 {
816 importance_sum / count as f32
817 } else {
818 0.0
819 };
820 let role_str = match role {
821 Role::System => "system",
822 Role::User => "user",
823 Role::Assistant => "assistant",
824 };
825 let merged_content = format!(
826 "[placeholder: {count} messages, role={role_str}, total_tokens={total_tokens}, avg_importance={avg_importance:.2}]"
827 );
828 let first = messages[i].clone();
829 messages.drain(i..j);
830 messages.insert(
831 i,
832 Message {
833 role: first.role,
834 content: merged_content,
835 parts: vec![],
836 metadata: {
837 let mut m = first.metadata;
838 m.fidelity_tag = Some(ContextFidelity::Placeholder);
839 m
840 },
841 },
842 );
843 merged_count += count - 1;
844 i += 1;
845 }
846 merged_count
847}
848
849fn parse_placeholder_tokens(content: &str) -> u32 {
850 for part in content.split(',') {
851 let part = part.trim();
852 for prefix in &["original_tokens=", "total_tokens="] {
853 if let Some(rest) = part.strip_prefix(prefix)
854 && let Ok(n) = rest.trim_end_matches(']').trim().parse::<u32>()
855 {
856 return n;
857 }
858 }
859 }
860 0
861}
862
863fn parse_placeholder_importance(content: &str) -> f32 {
864 for part in content.split(',') {
865 let part = part.trim();
866 for prefix in &["importance=", "avg_importance="] {
867 if let Some(rest) = part.strip_prefix(prefix)
868 && let Ok(v) = rest.trim_end_matches(']').trim().parse::<f32>()
869 {
870 return v;
871 }
872 }
873 }
874 0.0
875}
876
877#[cfg(test)]
878mod tests {
879 use super::*;
880 use zeph_llm::provider::{Message, MessageMetadata, MessagePart, Role};
881
882 struct FixedTc(usize);
883 impl TokenCounting for FixedTc {
884 fn count_tokens(&self, text: &str) -> usize {
885 text.len() / self.0.max(1)
886 }
887
888 fn count_tool_schema_tokens(&self, _schema: &serde_json::Value) -> usize {
889 0
890 }
891 }
892
893 fn make_msg(role: Role, content: &str) -> Message {
894 Message {
895 role,
896 content: content.to_string(),
897 parts: vec![],
898 metadata: MessageMetadata::default(),
899 }
900 }
901
902 fn make_cfg() -> FidelityConfig {
903 FidelityConfig {
904 enabled: true,
905 w_semantic: 0.3,
906 w_temporal: 0.3,
907 w_importance: 0.2,
908 w_plan: 0.2,
909 full_threshold: 0.7,
910 compressed_threshold: 0.3,
911 compressed_max_tokens: 50,
912 regrade_threshold: 0.6,
913 min_query_length: 8,
914 max_scored_messages: 500,
915 exempt_tail_messages: 0,
916 compress_provider: None,
917 semantic_scoring_provider: None,
918 lookahead_depth: 3,
919 embed_concurrency: 32,
920 max_embed_input_tokens: None,
921 max_compress_input_tokens: None,
922 }
923 }
924
925 #[tokio::test]
927 async fn empty_window_no_change() {
928 let scorer = FidelityScorer;
929 let cfg = make_cfg();
930 let tc = FixedTc(4);
931 let mut messages: Vec<Message> = vec![];
932 scorer
933 .score_and_apply(
934 &mut messages,
935 "query text",
936 &[],
937 &cfg,
938 &tc,
939 0,
940 false,
941 None,
942 None,
943 )
944 .await;
945 assert!(messages.is_empty());
946 }
947
948 #[tokio::test]
950 async fn all_exempt_no_downgrade() {
951 let scorer = FidelityScorer;
952 let cfg = make_cfg();
953 let tc = FixedTc(4);
954 let mut messages = vec![
955 make_msg(Role::System, "system prompt"),
956 make_msg(Role::User, "memory context"),
958 ];
959 scorer
960 .score_and_apply(&mut messages, "short", &[], &cfg, &tc, 1, false, None, None)
961 .await;
962 for msg in &messages {
963 assert!(
964 msg.metadata.fidelity_tag.is_none()
965 || msg.metadata.fidelity_tag == Some(ContextFidelity::Full)
966 );
967 }
968 }
969
970 #[tokio::test]
972 async fn tool_pair_atomicity() {
973 let scorer = FidelityScorer;
974 let cfg = FidelityConfig {
976 full_threshold: 0.9,
977 compressed_threshold: 0.5,
978 ..make_cfg()
979 };
980 let tc = FixedTc(4);
981 let tool_use_id = "abc123".to_string();
982 let mut tool_use_msg = make_msg(Role::Assistant, "calling tool");
983 tool_use_msg.parts = vec![MessagePart::ToolUse {
984 id: tool_use_id.clone(),
985 name: "shell".to_string(),
986 input: serde_json::json!({}),
987 }];
988 let mut tool_result_msg = make_msg(Role::User, "tool result body");
989 tool_result_msg.parts = vec![MessagePart::ToolResult {
990 tool_use_id: tool_use_id.clone(),
991 content: "result".to_string(),
992 is_error: false,
993 }];
994 let mut messages = vec![
995 make_msg(Role::System, "system"),
996 tool_use_msg,
997 tool_result_msg,
998 ];
999 scorer
1000 .score_and_apply(
1001 &mut messages,
1002 "completely unrelated query blah",
1003 &[],
1004 &cfg,
1005 &tc,
1006 0,
1007 false,
1008 None,
1009 None,
1010 )
1011 .await;
1012 let tag_a = messages[1].metadata.fidelity_tag;
1013 let tag_b = messages[2].metadata.fidelity_tag;
1014 assert_eq!(tag_a, tag_b, "tool pair must share fidelity level");
1015 }
1016
1017 #[tokio::test]
1019 async fn same_role_placeholder_merge() {
1020 let scorer = FidelityScorer;
1021 let cfg = FidelityConfig {
1023 full_threshold: 2.0, compressed_threshold: 1.5, ..make_cfg()
1026 };
1027 let tc = FixedTc(4);
1028 let mut messages: Vec<Message> = std::iter::once(make_msg(Role::System, "system"))
1029 .chain((0..5).map(|i| make_msg(Role::Assistant, &format!("msg {i}"))))
1030 .collect();
1031 scorer
1032 .score_and_apply(
1033 &mut messages,
1034 "some query here",
1035 &[],
1036 &cfg,
1037 &tc,
1038 0,
1039 false,
1040 None,
1041 None,
1042 )
1043 .await;
1044 assert_eq!(
1046 messages.len(),
1047 2,
1048 "5 assistant placeholders must merge to 1"
1049 );
1050 assert!(messages[1].content.contains("5 messages"));
1051 }
1052
1053 #[tokio::test]
1055 async fn score_normalization_no_panic() {
1056 let scorer = FidelityScorer;
1057 let cfg = make_cfg();
1058 let tc = FixedTc(4);
1059 let mut messages = vec![
1060 make_msg(Role::System, "system"),
1061 make_msg(Role::User, "hello"),
1062 make_msg(Role::Assistant, "world response"),
1063 ];
1064 scorer
1065 .score_and_apply(
1066 &mut messages,
1067 "hello world signal",
1068 &[],
1069 &cfg,
1070 &tc,
1071 0,
1072 false,
1073 None,
1074 None,
1075 )
1076 .await;
1077 for msg in &messages {
1078 let _ = msg.metadata.fidelity_tag;
1079 }
1080 }
1081
1082 #[tokio::test]
1084 async fn short_query_fallback() {
1085 let scorer = FidelityScorer;
1086 let cfg = FidelityConfig {
1087 min_query_length: 8,
1088 ..make_cfg()
1089 };
1090 let tc = FixedTc(4);
1091 let mut messages = vec![
1092 make_msg(Role::System, "system"),
1093 make_msg(Role::User, "test"),
1094 ];
1095 scorer
1097 .score_and_apply(&mut messages, "short", &[], &cfg, &tc, 0, false, None, None)
1098 .await;
1099 }
1100
1101 #[tokio::test]
1107 async fn memory_first_bypass_is_callers_responsibility() {
1108 let scorer = FidelityScorer;
1109 let cfg = FidelityConfig {
1112 enabled: false,
1113 ..make_cfg()
1114 };
1115 let tc = FixedTc(4);
1116 let mut messages = vec![
1117 make_msg(Role::System, "system prompt"),
1118 make_msg(Role::User, "memory-injected context"),
1119 make_msg(Role::Assistant, "response"),
1120 ];
1121 let before: Vec<_> = messages.iter().map(|m| m.content.clone()).collect();
1122 scorer
1124 .score_and_apply(
1125 &mut messages,
1126 "some user query text here",
1127 &[],
1128 &cfg,
1129 &tc,
1130 2,
1131 false,
1132 None,
1133 None,
1134 )
1135 .await;
1136 for (msg, orig) in messages.iter().zip(&before) {
1137 assert_eq!(msg.content, *orig, "content must be unchanged");
1138 assert!(
1139 msg.metadata.fidelity_tag.is_none(),
1140 "no fidelity tag must be set"
1141 );
1142 }
1143 }
1144
1145 #[tokio::test]
1147 async fn enabled_false_guard() {
1148 let scorer = FidelityScorer;
1149 let cfg = FidelityConfig {
1150 enabled: false,
1151 ..make_cfg()
1152 };
1153 let tc = FixedTc(4);
1154 let mut messages = vec![
1155 make_msg(Role::System, "system"),
1156 make_msg(Role::User, "user message that would normally be scored"),
1157 ];
1158 let original_contents: Vec<String> = messages.iter().map(|m| m.content.clone()).collect();
1159 scorer
1160 .score_and_apply(
1161 &mut messages,
1162 "query text here",
1163 &[],
1164 &cfg,
1165 &tc,
1166 0,
1167 false,
1168 None,
1169 None,
1170 )
1171 .await;
1172 for (msg, orig) in messages.iter().zip(&original_contents) {
1173 assert_eq!(msg.content, *orig);
1174 assert!(msg.metadata.fidelity_tag.is_none());
1175 }
1176 }
1177
1178 #[tokio::test]
1180 async fn score_always_in_range() {
1181 let scorer = FidelityScorer;
1182 let cfg = FidelityConfig {
1183 enabled: true,
1184 w_semantic: 0.0,
1185 w_temporal: 0.0,
1186 w_importance: 0.0,
1187 w_plan: 0.0,
1188 full_threshold: 0.7,
1189 compressed_threshold: 0.3,
1190 compressed_max_tokens: 50,
1191 regrade_threshold: 0.6,
1192 min_query_length: 0,
1193 max_scored_messages: 500,
1194 exempt_tail_messages: 0,
1195 compress_provider: None,
1196 semantic_scoring_provider: None,
1197 lookahead_depth: 3,
1198 embed_concurrency: 32,
1199 max_embed_input_tokens: None,
1200 max_compress_input_tokens: None,
1201 };
1202 let tc = FixedTc(4);
1203 let mut messages = vec![make_msg(Role::System, ""), make_msg(Role::User, "")];
1204 scorer
1206 .score_and_apply(&mut messages, "", &[], &cfg, &tc, 0, false, None, None)
1207 .await;
1208 }
1209
1210 #[tokio::test]
1212 async fn placeholder_uses_tc_count_tokens() {
1213 let scorer = FidelityScorer;
1214 let cfg = FidelityConfig {
1215 full_threshold: 2.0,
1216 compressed_threshold: 1.5,
1217 ..make_cfg()
1218 };
1219 let tc = FixedTc(1); let mut messages = vec![
1221 make_msg(Role::System, "system"),
1222 make_msg(Role::User, "user message content for placeholder rendering"),
1223 ];
1224 scorer
1225 .score_and_apply(
1226 &mut messages,
1227 "some query text here",
1228 &[],
1229 &cfg,
1230 &tc,
1231 0,
1232 false,
1233 None,
1234 None,
1235 )
1236 .await;
1237 assert_eq!(
1238 messages[1].metadata.fidelity_tag,
1239 Some(ContextFidelity::Placeholder)
1240 );
1241 assert!(messages[1].content.starts_with("[placeholder:"));
1242 }
1243
1244 #[tokio::test]
1251 async fn exempt_tail_messages_large_window() {
1252 let scorer = FidelityScorer;
1253 let cfg = FidelityConfig {
1254 full_threshold: 2.0,
1256 compressed_threshold: 1.5,
1257 max_scored_messages: 10,
1258 exempt_tail_messages: 5,
1259 ..make_cfg()
1260 };
1261 let tc = FixedTc(4);
1262
1263 let mut messages: Vec<Message> = std::iter::once(make_msg(Role::System, "system prompt"))
1270 .chain((1..15).map(|i| make_msg(Role::Assistant, &format!("assistant message {i}"))))
1271 .chain((15..20).map(|i| {
1272 let mut m = make_msg(Role::User, &format!("tail message {i}"));
1273 m.metadata.focus_pinned = true;
1274 m
1275 }))
1276 .collect();
1277
1278 scorer
1279 .score_and_apply(
1280 &mut messages,
1281 "query text here long",
1282 &[],
1283 &cfg,
1284 &tc,
1285 0,
1286 false,
1287 None,
1288 None,
1289 )
1290 .await;
1291
1292 let tail: Vec<_> = messages
1294 .iter()
1295 .filter(|m| m.metadata.focus_pinned)
1296 .collect();
1297 assert_eq!(
1298 tail.len(),
1299 5,
1300 "all 5 tail messages must survive the merge pass"
1301 );
1302 for msg in &tail {
1303 assert!(
1304 msg.metadata.fidelity_tag.is_none(),
1305 "tail message must have no fidelity_tag, got {:?}",
1306 msg.metadata.fidelity_tag
1307 );
1308 }
1309 }
1310
1311 #[tokio::test]
1315 async fn exempt_tail_messages_small_window_no_effect() {
1316 let scorer = FidelityScorer;
1317 let cfg = FidelityConfig {
1318 full_threshold: 2.0,
1319 compressed_threshold: 1.5,
1320 max_scored_messages: 10,
1321 exempt_tail_messages: 5,
1322 ..make_cfg()
1323 };
1324 let tc = FixedTc(4);
1325 let roles = [Role::User, Role::Assistant];
1328 let mut messages: Vec<Message> = std::iter::once(make_msg(Role::System, "system prompt"))
1329 .chain((1..8usize).map(|i| make_msg(roles[i % 2], &format!("message {i}"))))
1330 .collect();
1331 scorer
1332 .score_and_apply(
1333 &mut messages,
1334 "query text here long",
1335 &[],
1336 &cfg,
1337 &tc,
1338 0,
1339 false,
1340 None,
1341 None,
1342 )
1343 .await;
1344 let untagged_count = messages[1..]
1347 .iter()
1348 .filter(|m| m.metadata.fidelity_tag.is_none())
1349 .count();
1350 assert_eq!(
1351 untagged_count, 0,
1352 "all non-system messages must be scored when n <= max_scored_messages"
1353 );
1354 }
1355
1356 #[tokio::test]
1358 async fn compressed_uses_deferred_summary() {
1359 let scorer = FidelityScorer;
1360 let cfg = FidelityConfig {
1361 full_threshold: 2.0, compressed_threshold: 0.0, compressed_max_tokens: 5,
1364 ..make_cfg()
1365 };
1366 let tc = FixedTc(4);
1367 let mut msg_with_summary =
1368 make_msg(Role::User, "original long content that would be truncated");
1369 msg_with_summary.metadata.deferred_summary = Some("short summary".to_string());
1370 let mut messages = vec![make_msg(Role::System, "system"), msg_with_summary];
1371 scorer
1372 .score_and_apply(
1373 &mut messages,
1374 "query text here long",
1375 &[],
1376 &cfg,
1377 &tc,
1378 0,
1379 false,
1380 None,
1381 None,
1382 )
1383 .await;
1384 assert_eq!(
1385 messages[1].metadata.fidelity_tag,
1386 Some(ContextFidelity::Compressed)
1387 );
1388 assert_eq!(messages[1].content, "short summary");
1389 }
1390
1391 fn make_msg_with_fidelity(role: Role, content: &str, tag: Option<ContextFidelity>) -> Message {
1394 let mut m = make_msg(role, content);
1395 m.metadata.fidelity_tag = tag;
1396 m
1397 }
1398
1399 #[tokio::test]
1401 async fn floor_prevents_compressed_upgrade_to_full() {
1402 let scorer = FidelityScorer;
1403 let cfg = FidelityConfig {
1404 full_threshold: 0.0,
1406 compressed_threshold: -1.0,
1407 ..make_cfg()
1408 };
1409 let tc = FixedTc(4);
1410 let mut messages = vec![
1411 make_msg(Role::System, "system"),
1412 make_msg_with_fidelity(
1413 Role::User,
1414 "query text here long keyword",
1415 Some(ContextFidelity::Compressed),
1416 ),
1417 ];
1418 scorer
1419 .score_and_apply(
1420 &mut messages,
1421 "query text here long keyword",
1422 &[],
1423 &cfg,
1424 &tc,
1425 0,
1426 false,
1427 None,
1428 None,
1429 )
1430 .await;
1431 assert_eq!(
1432 messages[1].metadata.fidelity_tag,
1433 Some(ContextFidelity::Compressed),
1434 "Compressed floor must block upgrade to Full"
1435 );
1436 }
1437
1438 #[tokio::test]
1440 async fn floor_prevents_placeholder_upgrade_to_full() {
1441 let scorer = FidelityScorer;
1442 let cfg = FidelityConfig {
1443 full_threshold: 0.0,
1444 compressed_threshold: -1.0,
1445 ..make_cfg()
1446 };
1447 let tc = FixedTc(4);
1448 let mut messages = vec![
1449 make_msg(Role::System, "system"),
1450 make_msg_with_fidelity(
1451 Role::User,
1452 "query text here long keyword",
1453 Some(ContextFidelity::Placeholder),
1454 ),
1455 ];
1456 scorer
1457 .score_and_apply(
1458 &mut messages,
1459 "query text here long keyword",
1460 &[],
1461 &cfg,
1462 &tc,
1463 0,
1464 false,
1465 None,
1466 None,
1467 )
1468 .await;
1469 assert_eq!(
1470 messages[1].metadata.fidelity_tag,
1471 Some(ContextFidelity::Placeholder),
1472 "Placeholder floor must block upgrade to Full"
1473 );
1474 }
1475
1476 #[tokio::test]
1478 async fn floor_prevents_placeholder_upgrade_to_compressed() {
1479 let scorer = FidelityScorer;
1480 let cfg = FidelityConfig {
1482 full_threshold: 2.0,
1483 compressed_threshold: 0.0,
1484 ..make_cfg()
1485 };
1486 let tc = FixedTc(4);
1487 let mut messages = vec![
1488 make_msg(Role::System, "system"),
1489 make_msg_with_fidelity(
1490 Role::User,
1491 "message content",
1492 Some(ContextFidelity::Placeholder),
1493 ),
1494 ];
1495 scorer
1496 .score_and_apply(
1497 &mut messages,
1498 "query text here long",
1499 &[],
1500 &cfg,
1501 &tc,
1502 0,
1503 false,
1504 None,
1505 None,
1506 )
1507 .await;
1508 assert_eq!(
1509 messages[1].metadata.fidelity_tag,
1510 Some(ContextFidelity::Placeholder),
1511 "Placeholder floor must block upgrade to Compressed"
1512 );
1513 }
1514
1515 #[tokio::test]
1517 async fn floor_allows_further_downgrade() {
1518 let scorer = FidelityScorer;
1519 let cfg = FidelityConfig {
1520 full_threshold: 2.0,
1521 compressed_threshold: 2.0, ..make_cfg()
1523 };
1524 let tc = FixedTc(4);
1525 let mut messages = vec![
1526 make_msg(Role::System, "system"),
1527 make_msg_with_fidelity(
1528 Role::User,
1529 "some content",
1530 Some(ContextFidelity::Compressed),
1531 ),
1532 ];
1533 scorer
1534 .score_and_apply(
1535 &mut messages,
1536 "query text here long",
1537 &[],
1538 &cfg,
1539 &tc,
1540 0,
1541 false,
1542 None,
1543 None,
1544 )
1545 .await;
1546 assert_eq!(
1547 messages[1].metadata.fidelity_tag,
1548 Some(ContextFidelity::Placeholder),
1549 "downgrade from Compressed to Placeholder must be allowed"
1550 );
1551 }
1552
1553 #[tokio::test]
1555 async fn floor_no_constraint_when_none() {
1556 let scorer = FidelityScorer;
1557 let cfg = FidelityConfig {
1559 full_threshold: 0.0,
1560 compressed_threshold: -1.0,
1561 ..make_cfg()
1562 };
1563 let tc = FixedTc(4);
1564 let mut messages = vec![
1565 make_msg(Role::System, "system"),
1566 make_msg_with_fidelity(Role::User, "query text here long keyword", None),
1567 ];
1568 scorer
1569 .score_and_apply(
1570 &mut messages,
1571 "query text here long keyword",
1572 &[],
1573 &cfg,
1574 &tc,
1575 0,
1576 false,
1577 None,
1578 None,
1579 )
1580 .await;
1581 assert_eq!(
1582 messages[1].metadata.fidelity_tag,
1583 Some(ContextFidelity::Full),
1584 "None tag must not constrain scoring"
1585 );
1586 }
1587
1588 #[tokio::test]
1590 async fn allow_upgrade_bypasses_floor() {
1591 let scorer = FidelityScorer;
1592 let cfg = FidelityConfig {
1593 full_threshold: 0.0,
1594 compressed_threshold: -1.0,
1595 ..make_cfg()
1596 };
1597 let tc = FixedTc(4);
1598 let mut messages = vec![
1599 make_msg(Role::System, "system"),
1600 make_msg_with_fidelity(
1601 Role::User,
1602 "query text here long keyword",
1603 Some(ContextFidelity::Placeholder),
1604 ),
1605 ];
1606 scorer
1607 .score_and_apply(
1608 &mut messages,
1609 "query text here long keyword",
1610 &[],
1611 &cfg,
1612 &tc,
1613 0,
1614 true,
1615 None,
1616 None,
1617 )
1618 .await;
1619 assert_eq!(
1620 messages[1].metadata.fidelity_tag,
1621 Some(ContextFidelity::Full),
1622 "allow_upgrade=true must bypass the Placeholder floor"
1623 );
1624 }
1625
1626 #[test]
1630 fn truncate_no_op_below_limit() {
1631 let tc = FixedTc(1); let mut s = "hello".to_string(); truncate_to_tokens(&mut s, 10, &tc);
1634 assert_eq!(s, "hello");
1635 }
1636
1637 #[test]
1639 fn truncate_no_op_at_limit() {
1640 let tc = FixedTc(1);
1641 let mut s = "hello".to_string(); truncate_to_tokens(&mut s, 5, &tc);
1643 assert_eq!(s, "hello");
1644 }
1645
1646 #[test]
1648 fn truncate_minimal_one_over_limit() {
1649 let tc = FixedTc(1); let mut s = "abcdef".to_string(); truncate_to_tokens(&mut s, 5, &tc);
1652 assert!(
1653 tc.count_tokens(&s) <= 5,
1654 "result must fit in 5 tokens, got {}",
1655 tc.count_tokens(&s)
1656 );
1657 assert!(!s.is_empty(), "must keep prefix, not empty");
1658 }
1659
1660 #[test]
1662 fn truncate_preserves_90pct_of_limit() {
1663 let tc = FixedTc(1);
1665 let s_orig = "a".repeat(90);
1666 let mut s = s_orig.clone();
1667 truncate_to_tokens(&mut s, 100, &tc);
1668 assert_eq!(s, s_orig, "90% of limit must not be truncated");
1669 }
1670
1671 #[test]
1673 fn truncate_empty_string_no_op() {
1674 let tc = FixedTc(1);
1675 let mut s = String::new();
1676 truncate_to_tokens(&mut s, 5, &tc);
1677 assert!(s.is_empty());
1678 }
1679
1680 #[test]
1682 fn truncate_max_tokens_zero_clears_content() {
1683 let tc = FixedTc(1);
1684 let mut s = "hello world".to_string();
1685 truncate_to_tokens(&mut s, 0, &tc);
1686 assert!(s.is_empty(), "max_tokens=0 must clear content");
1687 }
1688
1689 #[test]
1691 fn truncate_multibyte_stays_on_char_boundary() {
1692 let tc = FixedTc(3);
1696 let mut s = "日本語".to_string();
1697 truncate_to_tokens(&mut s, 2, &tc);
1698 assert!(
1699 s.is_char_boundary(s.len()),
1700 "result must be on a valid char boundary"
1701 );
1702 assert!(tc.count_tokens(&s) <= 2);
1703 assert_eq!(s, "日本");
1704 }
1705
1706 #[tokio::test]
1708 async fn mixed_fidelity_tool_pair_floor_plus_atomicity() {
1709 let scorer = FidelityScorer;
1710 let cfg = FidelityConfig {
1712 full_threshold: 0.0,
1713 compressed_threshold: -1.0,
1714 ..make_cfg()
1715 };
1716 let tc = FixedTc(4);
1717 let tool_id = "tool-42".to_string();
1718
1719 let mut tool_use_msg = make_msg_with_fidelity(Role::Assistant, "call tool", None);
1720 tool_use_msg.parts = vec![MessagePart::ToolUse {
1721 id: tool_id.clone(),
1722 name: "shell".to_string(),
1723 input: serde_json::json!({}),
1724 }];
1725
1726 let mut tool_result_msg =
1727 make_msg_with_fidelity(Role::User, "result body", Some(ContextFidelity::Compressed));
1728 tool_result_msg.parts = vec![MessagePart::ToolResult {
1729 tool_use_id: tool_id.clone(),
1730 content: "output".to_string(),
1731 is_error: false,
1732 }];
1733
1734 let mut messages = vec![
1735 make_msg(Role::System, "system"),
1736 tool_use_msg,
1737 tool_result_msg,
1738 ];
1739
1740 scorer
1741 .score_and_apply(
1742 &mut messages,
1743 "query text here long",
1744 &[],
1745 &cfg,
1746 &tc,
1747 0,
1748 false,
1749 None,
1750 None,
1751 )
1752 .await;
1753
1754 let tag_use = messages[1].metadata.fidelity_tag;
1758 let tag_result = messages[2].metadata.fidelity_tag;
1759 assert_eq!(
1760 tag_use, tag_result,
1761 "tool pair must share the same fidelity level"
1762 );
1763 assert_eq!(
1764 tag_use,
1765 Some(ContextFidelity::Compressed),
1766 "atomicity must bring the tool-use down to the tool-result floor"
1767 );
1768 }
1769
1770 #[tokio::test]
1772 async fn compress_llm_path_stores_deferred_summary() {
1773 use zeph_llm::LlmError;
1774 use zeph_llm::provider::ChatStream;
1775
1776 #[derive(Debug)]
1777 struct MockProvider;
1778
1779 impl zeph_llm::provider::LlmProvider for MockProvider {
1780 async fn chat(&self, _messages: &[Message]) -> Result<String, LlmError> {
1781 Ok("summary text".to_string())
1782 }
1783
1784 async fn chat_stream(&self, _messages: &[Message]) -> Result<ChatStream, LlmError> {
1785 Err(LlmError::Unavailable)
1786 }
1787
1788 fn supports_streaming(&self) -> bool {
1789 false
1790 }
1791
1792 async fn embed(&self, _text: &str) -> Result<Vec<f32>, LlmError> {
1793 Err(LlmError::EmbedUnsupported {
1794 provider: "mock".into(),
1795 })
1796 }
1797
1798 fn supports_embeddings(&self) -> bool {
1799 false
1800 }
1801
1802 fn name(&self) -> &'static str {
1803 "mock"
1804 }
1805 }
1806
1807 let scorer = FidelityScorer;
1808 let cfg = FidelityConfig {
1809 enabled: true,
1810 full_threshold: 2.0,
1812 compressed_threshold: 0.0,
1813 compressed_max_tokens: 5,
1814 compress_provider: Some("mock".to_string()),
1815 ..make_cfg()
1816 };
1817 let tc = FixedTc(1);
1819 let content = "a".repeat(50); let mut messages = vec![
1821 make_msg(Role::System, "system"),
1822 make_msg(Role::User, &content),
1823 ];
1824
1825 let provider = MockProvider;
1826 scorer
1827 .score_and_apply(
1828 &mut messages,
1829 "some query text here",
1830 &[],
1831 &cfg,
1832 &tc,
1833 0,
1834 false,
1835 None,
1836 Some(&provider),
1837 )
1838 .await;
1839
1840 assert_eq!(
1841 messages[1].metadata.fidelity_tag,
1842 Some(ContextFidelity::Compressed),
1843 );
1844 assert!(
1846 tc.count_tokens(&messages[1].content) <= 5,
1847 "content must be capped to compressed_max_tokens after LLM summary"
1848 );
1849 assert_eq!(
1851 messages[1].metadata.deferred_summary,
1852 Some("summary text".to_string()),
1853 );
1854 }
1855
1856 #[tokio::test]
1858 async fn compress_llm_skipped_when_provider_none() {
1859 let scorer = FidelityScorer;
1860 let cfg = FidelityConfig {
1861 enabled: true,
1862 full_threshold: 2.0,
1863 compressed_threshold: 0.0,
1864 compressed_max_tokens: 5,
1865 compress_provider: Some("mock".to_string()),
1866 ..make_cfg()
1867 };
1868 let tc = FixedTc(1);
1869 let content = "a".repeat(50);
1870 let mut messages = vec![
1871 make_msg(Role::System, "system"),
1872 make_msg(Role::User, &content),
1873 ];
1874
1875 scorer
1876 .score_and_apply(
1877 &mut messages,
1878 "some query text here",
1879 &[],
1880 &cfg,
1881 &tc,
1882 0,
1883 false,
1884 None,
1885 None,
1886 )
1887 .await;
1888
1889 assert_eq!(
1890 messages[1].metadata.fidelity_tag,
1891 Some(ContextFidelity::Compressed),
1892 );
1893 assert!(
1895 messages[1].metadata.deferred_summary.is_none(),
1896 "deferred_summary must not be populated via truncation path"
1897 );
1898 assert!(
1900 messages[1].content.len() <= 5,
1901 "content must be truncated, got len={}",
1902 messages[1].content.len()
1903 );
1904 }
1905
1906 #[test]
1908 fn cosine_similarity_identical() {
1909 let v = vec![1.0f32, 0.0, 0.0];
1910 assert!((cosine_similarity(&v, &v) - 1.0).abs() < 1e-6);
1911 }
1912
1913 #[test]
1914 fn cosine_similarity_orthogonal() {
1915 let a = vec![1.0f32, 0.0, 0.0];
1916 let b = vec![0.0f32, 1.0, 0.0];
1917 assert!(cosine_similarity(&a, &b).abs() < 1e-6);
1918 }
1919
1920 #[test]
1921 fn cosine_similarity_zero_vector() {
1922 let a = vec![0.0f32, 0.0, 0.0];
1923 let b = vec![1.0f32, 0.0, 0.0];
1924 assert!(cosine_similarity(&a, &b).abs() < f32::EPSILON);
1925 }
1926
1927 #[test]
1928 fn cosine_similarity_empty() {
1929 assert!(cosine_similarity(&[], &[]).abs() < f32::EPSILON);
1930 }
1931
1932 #[test]
1933 fn cosine_similarity_dimension_mismatch() {
1934 let a = vec![1.0f32, 0.0];
1935 let b = vec![1.0f32, 0.0, 0.0];
1936 assert!(cosine_similarity(&a, &b).abs() < f32::EPSILON);
1937 }
1938
1939 #[tokio::test]
1946 async fn semantic_scoring_higher_for_similar_messages() {
1947 use zeph_llm::LlmError;
1948 use zeph_llm::provider::ChatStream;
1949
1950 #[derive(Debug)]
1951 struct EmbedMockProvider;
1952
1953 impl zeph_llm::provider::LlmProvider for EmbedMockProvider {
1954 async fn chat(&self, _: &[Message]) -> Result<String, LlmError> {
1955 Err(LlmError::Unavailable)
1956 }
1957 async fn chat_stream(&self, _: &[Message]) -> Result<ChatStream, LlmError> {
1958 Err(LlmError::Unavailable)
1959 }
1960 fn supports_streaming(&self) -> bool {
1961 false
1962 }
1963 async fn embed(&self, text: &str) -> Result<Vec<f32>, LlmError> {
1964 let v = if text.contains("cat")
1965 || text.contains("mat")
1966 || text.contains("feline")
1967 || text.contains("rug")
1968 {
1969 if text.contains("feline") || text.contains("rug") {
1970 vec![0.9f32, 0.1, 0.0]
1971 } else {
1972 vec![1.0f32, 0.0, 0.0]
1973 }
1974 } else {
1975 vec![0.0f32, 0.0, 1.0]
1976 };
1977 Ok(v)
1978 }
1979 fn supports_embeddings(&self) -> bool {
1980 true
1981 }
1982 fn name(&self) -> &'static str {
1983 "embed-mock"
1984 }
1985 }
1986
1987 let provider = EmbedMockProvider;
1988 let scorer = FidelityScorer;
1989 let cfg = FidelityConfig {
1990 enabled: true,
1991 semantic_scoring_provider: Some("embed-mock".to_string()),
1992 full_threshold: 0.0,
1996 compressed_threshold: 0.0,
1997 w_semantic: 1.0,
1998 w_temporal: 0.0,
1999 w_importance: 0.0,
2000 w_plan: 0.0,
2001 ..make_cfg()
2002 };
2003 let tc = FixedTc(4);
2004 let cat_msg = make_msg(Role::User, "The cat is on the mat");
2005 let feline_msg = make_msg(Role::User, "A feline rests on the rug");
2006 let stock_msg = make_msg(Role::User, "Stock prices fell today");
2007 let mut messages = vec![
2008 make_msg(Role::System, "system"),
2009 cat_msg,
2010 feline_msg,
2011 stock_msg,
2012 ];
2013
2014 scorer
2015 .score_and_apply(
2016 &mut messages,
2017 "cat mat",
2018 &[],
2019 &cfg,
2020 &tc,
2021 0,
2022 false,
2023 Some(&provider),
2024 None,
2025 )
2026 .await;
2027
2028 assert!(
2031 messages[1].metadata.embedding.is_some(),
2032 "cat message must have embedding"
2033 );
2034 assert!(
2035 messages[2].metadata.embedding.is_some(),
2036 "feline message must have embedding"
2037 );
2038 assert!(
2039 messages[3].metadata.embedding.is_some(),
2040 "stock message must have embedding"
2041 );
2042
2043 let query_emb = [1.0f32, 0.0, 0.0];
2045 let cat_emb = messages[1].metadata.embedding.as_ref().unwrap();
2046 let feline_emb = messages[2].metadata.embedding.as_ref().unwrap();
2047 let stock_emb = messages[3].metadata.embedding.as_ref().unwrap();
2048 assert!(
2049 cosine_similarity(cat_emb, &query_emb) > cosine_similarity(stock_emb, &query_emb),
2050 "cat message must be more similar to query than stock message"
2051 );
2052 assert!(
2053 cosine_similarity(feline_emb, &query_emb) > cosine_similarity(stock_emb, &query_emb),
2054 "feline message must be more similar to query than stock message"
2055 );
2056 }
2057
2058 #[tokio::test]
2060 async fn semantic_scoring_falls_back_to_keyword_when_provider_none() {
2061 let scorer = FidelityScorer;
2062 let cfg = FidelityConfig {
2063 enabled: true,
2064 semantic_scoring_provider: None,
2065 ..make_cfg()
2066 };
2067 let tc = FixedTc(4);
2068 let mut messages = vec![
2069 make_msg(Role::System, "system"),
2070 make_msg(Role::User, "cat mat keyword test"),
2071 make_msg(Role::User, "something unrelated here"),
2072 ];
2073
2074 scorer
2075 .score_and_apply(
2076 &mut messages,
2077 "query text here long",
2078 &[],
2079 &cfg,
2080 &tc,
2081 0,
2082 false,
2083 None,
2084 None,
2085 )
2086 .await;
2087
2088 for msg in &messages {
2090 assert!(
2091 msg.metadata.embedding.is_none(),
2092 "no embedding must be computed when provider is None"
2093 );
2094 }
2095 for msg in &messages[1..] {
2097 assert!(
2098 msg.metadata.fidelity_tag.is_some(),
2099 "all non-system messages must be scored via keyword path"
2100 );
2101 }
2102 }
2103
2104 #[tokio::test]
2106 async fn w_plan_produces_nonzero_score_for_matching_message() {
2107 use zeph_common::PlannedToolHint;
2108
2109 let scorer = FidelityScorer;
2110 let cfg = FidelityConfig {
2113 w_semantic: 0.0,
2114 w_temporal: 0.0,
2115 w_importance: 0.0,
2116 w_plan: 1.0,
2117 full_threshold: 0.5,
2118 compressed_threshold: 0.1,
2119 min_query_length: 100, ..make_cfg()
2121 };
2122 let tc = FixedTc(4);
2123
2124 let hint = PlannedToolHint::new("shell", vec!["cargo".to_string(), "build".to_string()], 1);
2126
2127 let mut messages = vec![
2130 make_msg(Role::System, "system prompt"),
2131 make_msg(Role::User, "run cargo build to compile"),
2132 make_msg(Role::User, "what is the weather today"),
2133 ];
2134
2135 scorer
2136 .score_and_apply(
2137 &mut messages,
2138 "q", &[hint],
2140 &cfg,
2141 &tc,
2142 0,
2143 false,
2144 None,
2145 None,
2146 )
2147 .await;
2148
2149 assert_eq!(
2151 messages[1].metadata.fidelity_tag,
2152 Some(ContextFidelity::Full),
2153 "message matching planned tool keywords must reach Full fidelity"
2154 );
2155
2156 assert_ne!(
2158 messages[2].metadata.fidelity_tag,
2159 Some(ContextFidelity::Full),
2160 "message with no keyword overlap must not reach Full fidelity via w_plan"
2161 );
2162 }
2163
2164 #[test]
2167 fn truncate_to_byte_limit_no_op_when_short() {
2168 assert_eq!(truncate_to_byte_limit("hello", 10), "hello");
2169 }
2170
2171 #[test]
2172 fn truncate_to_byte_limit_exact_limit_no_op() {
2173 assert_eq!(truncate_to_byte_limit("hello", 5), "hello");
2174 }
2175
2176 #[test]
2177 fn truncate_to_byte_limit_over_limit() {
2178 let s = truncate_to_byte_limit("abcdefgh", 5);
2179 assert_eq!(s.len(), 5);
2180 assert_eq!(s, "abcde");
2181 }
2182
2183 #[test]
2184 fn truncate_to_byte_limit_multibyte_boundary() {
2185 let s = truncate_to_byte_limit("日本語", 6);
2187 assert!(s.is_char_boundary(s.len()));
2188 assert_eq!(s, "日本");
2189 }
2190
2191 #[test]
2194 fn apply_input_cap_no_op_below_limit() {
2195 let mut s = "hello".to_string();
2196 apply_input_cap(&mut s, 10); assert_eq!(s, "hello");
2198 }
2199
2200 #[test]
2201 fn apply_input_cap_truncates_over_limit() {
2202 let mut s = "abcdefgh".to_string();
2204 apply_input_cap(&mut s, 1);
2205 assert_eq!(s, "abcd");
2206 }
2207
2208 #[test]
2209 fn apply_input_cap_multibyte() {
2210 let mut s = "日本語".to_string();
2213 apply_input_cap(&mut s, 1);
2214 assert!(s.is_char_boundary(s.len()));
2215 assert_eq!(s, "日");
2216 }
2217
2218 #[tokio::test]
2221 async fn embed_prepass_returns_embeddings_for_non_exempt() {
2222 let messages = vec![
2223 make_msg(Role::System, "system prompt"), make_msg(Role::User, "user message"),
2225 make_msg(Role::Assistant, "assistant reply"),
2226 ];
2227 let cfg = FidelityConfig::default();
2228 let embed = |_text: &str| -> EmbedFuture { Box::pin(async { Ok(vec![1.0f32, 2.0, 3.0]) }) };
2229 let result = embed_prepass(&messages, &embed, &cfg, 0).await;
2230 assert!(!result.contains_key(&0));
2232 assert_eq!(result[&1], vec![1.0, 2.0, 3.0]);
2233 assert_eq!(result[&2], vec![1.0, 2.0, 3.0]);
2234 }
2235
2236 #[tokio::test]
2237 async fn embed_prepass_skips_empty_content() {
2238 let messages = vec![make_msg(Role::System, "system"), make_msg(Role::User, "")];
2239 let cfg = FidelityConfig::default();
2240 let embed = |_text: &str| -> EmbedFuture { Box::pin(async { Ok(vec![1.0f32]) }) };
2241 let result = embed_prepass(&messages, &embed, &cfg, 0).await;
2242 assert!(!result.contains_key(&1), "empty content must be skipped");
2243 }
2244
2245 #[tokio::test]
2246 async fn embed_prepass_skips_inserted_memory() {
2247 let messages = vec![
2248 make_msg(Role::System, "system"),
2249 make_msg(Role::User, "injected memory"),
2250 make_msg(Role::User, "real user message"),
2251 ];
2252 let cfg = FidelityConfig::default();
2253 let embed = |_text: &str| -> EmbedFuture { Box::pin(async { Ok(vec![1.0f32]) }) };
2254 let result = embed_prepass(&messages, &embed, &cfg, 1).await;
2256 assert!(!result.contains_key(&0), "system is exempt");
2257 assert!(!result.contains_key(&1), "inserted memory is exempt");
2258 assert!(
2259 result.contains_key(&2),
2260 "real user message must be embedded"
2261 );
2262 }
2263
2264 #[tokio::test]
2265 async fn embed_prepass_silently_skips_errors() {
2266 let messages = vec![
2267 make_msg(Role::System, "system"),
2268 make_msg(Role::User, "user"),
2269 ];
2270 let cfg = FidelityConfig::default();
2271 let embed = |_text: &str| -> EmbedFuture {
2272 Box::pin(async {
2273 Err(zeph_llm::LlmError::EmbedUnsupported {
2274 provider: "mock".to_string(),
2275 })
2276 })
2277 };
2278 let result = embed_prepass(&messages, &embed, &cfg, 0).await;
2279 assert!(result.is_empty(), "errors must be silently skipped");
2280 }
2281
2282 #[tokio::test]
2283 async fn embed_prepass_truncates_content_when_cap_set() {
2284 let long_content = "a".repeat(100);
2286 let messages = vec![
2287 make_msg(Role::System, "system"),
2288 make_msg(Role::User, &long_content),
2289 ];
2290 let cfg = FidelityConfig {
2291 max_embed_input_tokens: Some(1), ..FidelityConfig::default()
2293 };
2294 let seen_len = std::sync::Arc::new(std::sync::Mutex::new(0usize));
2295 let seen_len_clone = seen_len.clone();
2296 let embed = move |text: &str| -> EmbedFuture {
2297 let len = text.len();
2298 let seen = seen_len_clone.clone();
2299 Box::pin(async move {
2300 *seen.lock().unwrap() = len;
2301 Ok(vec![1.0f32])
2302 })
2303 };
2304 embed_prepass(&messages, &embed, &cfg, 0).await;
2305 assert_eq!(
2306 *seen_len.lock().unwrap(),
2307 4,
2308 "content must be truncated to max_embed_input_tokens * 4 bytes"
2309 );
2310 }
2311
2312 #[tokio::test]
2313 async fn embed_prepass_concurrency_zero_clamped_to_one() {
2314 let messages = vec![
2316 make_msg(Role::System, "system"),
2317 make_msg(Role::User, "user message"),
2318 ];
2319 let cfg = FidelityConfig {
2320 embed_concurrency: 0,
2321 ..FidelityConfig::default()
2322 };
2323 let embed = |_text: &str| -> EmbedFuture { Box::pin(async { Ok(vec![1.0f32]) }) };
2324 let result = embed_prepass(&messages, &embed, &cfg, 0).await;
2325 assert!(
2326 result.contains_key(&1),
2327 "result must be produced even with concurrency=0"
2328 );
2329 }
2330
2331 #[tokio::test]
2332 async fn embed_prepass_skips_cached_embeddings() {
2333 let mut msg_with_cache = make_msg(Role::User, "already embedded");
2334 msg_with_cache.metadata.embedding = Some(vec![9.0f32]);
2335 let messages = vec![
2336 make_msg(Role::System, "system"),
2337 msg_with_cache,
2338 make_msg(Role::User, "needs embedding"),
2339 ];
2340 let cfg = FidelityConfig::default();
2341 let embed = |_text: &str| -> EmbedFuture { Box::pin(async { Ok(vec![1.0f32]) }) };
2342 let result = embed_prepass(&messages, &embed, &cfg, 0).await;
2343 assert!(
2344 !result.contains_key(&1),
2345 "message with cached embedding must be skipped"
2346 );
2347 assert!(
2348 result.contains_key(&2),
2349 "message without embedding must be processed"
2350 );
2351 }
2352
2353 #[tokio::test(start_paused = true)]
2354 async fn embed_prepass_timeout_skips_message() {
2355 let messages = vec![
2356 make_msg(Role::System, "system"),
2357 make_msg(Role::User, "user"),
2358 ];
2359 let cfg = FidelityConfig::default();
2360 let embed = |_text: &str| -> EmbedFuture {
2361 Box::pin(async {
2362 tokio::time::sleep(Duration::from_secs(45)).await;
2364 Ok(vec![1.0f32])
2365 })
2366 };
2367 let result = embed_prepass(&messages, &embed, &cfg, 0).await;
2369 assert!(result.is_empty(), "timed-out embed must be skipped");
2370 }
2371
2372 #[test]
2375 fn fidelity_config_new_fields_defaults() {
2376 let cfg = FidelityConfig::default();
2377 assert_eq!(cfg.embed_concurrency, 32);
2378 assert!(cfg.max_embed_input_tokens.is_none());
2379 assert!(cfg.max_compress_input_tokens.is_none());
2380 }
2381
2382 #[test]
2383 fn fidelity_config_new_fields_custom() {
2384 let cfg = FidelityConfig {
2385 embed_concurrency: 8,
2386 max_embed_input_tokens: Some(512),
2387 max_compress_input_tokens: Some(1024),
2388 ..FidelityConfig::default()
2389 };
2390 assert_eq!(cfg.embed_concurrency, 8);
2391 assert_eq!(cfg.max_embed_input_tokens, Some(512));
2392 assert_eq!(cfg.max_compress_input_tokens, Some(1024));
2393 }
2394
2395 #[tokio::test]
2397 async fn render_compressed_truncates_oversized_deferred_summary() {
2398 let scorer = FidelityScorer;
2399 let cfg = FidelityConfig {
2402 full_threshold: 2.0,
2403 compressed_threshold: 0.0,
2404 compressed_max_tokens: 3,
2405 ..make_cfg()
2406 };
2407 let tc = FixedTc(1);
2408 let mut msg = make_msg(Role::User, "original long content");
2409 msg.metadata.deferred_summary = Some("ten chars!".to_string());
2411 let mut messages = vec![make_msg(Role::System, "sys"), msg];
2412 scorer
2413 .score_and_apply(
2414 &mut messages,
2415 "query text here long",
2416 &[],
2417 &cfg,
2418 &tc,
2419 0,
2420 false,
2421 None,
2422 None,
2423 )
2424 .await;
2425 let compressed = &messages[1];
2426 assert_eq!(
2427 compressed.metadata.fidelity_tag,
2428 Some(ContextFidelity::Compressed)
2429 );
2430 assert!(
2431 tc.count_tokens(&compressed.content) <= 3,
2432 "deferred_summary result must be truncated to compressed_max_tokens"
2433 );
2434 }
2435
2436 #[tokio::test]
2438 async fn render_compressed_applies_max_compress_input_tokens() {
2439 let scorer = FidelityScorer;
2440 let cfg = FidelityConfig {
2443 full_threshold: 2.0,
2444 compressed_threshold: 0.0,
2445 compressed_max_tokens: 100,
2446 max_compress_input_tokens: Some(2), ..make_cfg()
2448 };
2449 let tc = FixedTc(1);
2450 let content_20 = "a".repeat(20); let mut msg = make_msg(Role::User, &content_20);
2452 let mut messages = vec![make_msg(Role::System, "sys"), msg.clone()];
2454 scorer
2455 .score_and_apply(
2456 &mut messages,
2457 "query text here long",
2458 &[],
2459 &cfg,
2460 &tc,
2461 0,
2462 false,
2463 None,
2464 None,
2465 )
2466 .await;
2467 let compressed = &messages[1];
2468 assert_eq!(
2469 compressed.metadata.fidelity_tag,
2470 Some(ContextFidelity::Compressed)
2471 );
2472 assert_eq!(
2474 compressed.content.len(),
2475 8,
2476 "content must be capped to max_compress_input_tokens * 4 bytes"
2477 );
2478 msg.metadata.deferred_summary = Some("short".to_string());
2480 let mut messages2 = vec![make_msg(Role::System, "sys"), msg];
2481 scorer
2482 .score_and_apply(
2483 &mut messages2,
2484 "query text here long",
2485 &[],
2486 &cfg,
2487 &tc,
2488 0,
2489 false,
2490 None,
2491 None,
2492 )
2493 .await;
2494 assert_eq!(
2495 messages2[1].content, "short",
2496 "deferred_summary must bypass input cap"
2497 );
2498 }
2499}