1use std::borrow::Cow;
7use std::cmp::Ordering;
8use std::collections::{BTreeMap, BTreeSet, HashSet};
9
10use once_cell::sync::Lazy;
11use regex::{Captures, Regex};
12use serde::{Deserialize, Serialize};
13use sha2::{Digest, Sha256};
14
15use crate::indexer::redact_secrets::redact_text;
16
17use super::query::{MatchType, SearchHit};
18
19const TOKEN_ESTIMATE_CHARS_PER_TOKEN: usize = 4;
20const DEFAULT_FRESHNESS_WINDOW_SECONDS: i64 = 30 * 24 * 60 * 60;
21const PACK_CANDIDATE_LIMIT_CAP: usize = 2_048;
22const REDACTED_VALUE_MARKER: &str = "[REDACTED]";
23const REDACTED_PATH_PREFIX: &str = "[REDACTED_PATH]";
24const REDACTED_REMOTE_HOST_MARKER: &str = "[REDACTED_REMOTE_HOST]";
25const REDACTED_SOURCE_MARKER: &str = "[REDACTED_SOURCE]";
26const REDACTED_ENCRYPTED_PAYLOAD_MARKER: &str = "[REDACTED_ENCRYPTED_PAYLOAD]";
27
28static PRIVATE_PATH_RE: Lazy<Regex> = Lazy::new(|| {
29 Regex::new(
30 r#"(?x)
31 (?:
32 (?:/home/[^/\s"'`<>\[\](){}:,;]+|/Users/[^/\s"'`<>\[\](){}:,;]+|~)
33 (?:/[^\s"'`<>\[\](){}:,;]+)*
34 |
35 [A-Za-z]:\\Users\\[^\\\s"'`<>\[\](){}:,;]+
36 (?:\\[^\s"'`<>\[\](){}:,;]+)*
37 )
38 "#,
39 )
40 .expect("private path redaction regex")
41});
42
43static ENCRYPTED_PAYLOAD_RE: Lazy<Regex> = Lazy::new(|| {
44 Regex::new(
45 r"(?i)\b(?:encrypted_(?:payload(?:_material)?|content|blob|material)|ciphertext)\b\s*[:=]\s*[A-Za-z0-9+/=_:.-]{16,}",
46 )
47 .expect("encrypted payload redaction regex")
48});
49
50static PRIVATE_HOST_LABEL_RE: Lazy<Regex> = Lazy::new(|| {
51 Regex::new(
52 r"(?i)\b(?:[A-Za-z0-9._%+-]+@)?[A-Za-z0-9-]+(?:\.[A-Za-z0-9-]+)*(?:\.internal|\.local)\b",
53 )
54 .expect("private host label redaction regex")
55});
56
57#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
58pub struct PackPlannerLimits {
59 pub max_tokens: usize,
60 pub max_sessions: usize,
61 pub max_evidence: usize,
62 pub context_lines: usize,
63 pub max_excerpt_chars: usize,
64}
65
66impl Default for PackPlannerLimits {
67 fn default() -> Self {
68 Self {
69 max_tokens: 12_000,
70 max_sessions: 8,
71 max_evidence: 24,
72 context_lines: 3,
73 max_excerpt_chars: 1_600,
74 }
75 }
76}
77
78impl PackPlannerLimits {
79 pub fn validate(&self) -> Result<(), PackPlannerLimitError> {
80 validate_range("max_tokens", self.max_tokens, 1_024, 200_000)?;
81 validate_range("max_sessions", self.max_sessions, 1, 64)?;
82 validate_range("max_evidence", self.max_evidence, 1, 256)?;
83 validate_range("context_lines", self.context_lines, 0, 40)?;
84 validate_range("max_excerpt_chars", self.max_excerpt_chars, 80, 8_000)?;
85 Ok(())
86 }
87}
88
89fn validate_range(
90 field: &'static str,
91 value: usize,
92 min: usize,
93 max: usize,
94) -> Result<(), PackPlannerLimitError> {
95 if (min..=max).contains(&value) {
96 Ok(())
97 } else {
98 Err(PackPlannerLimitError {
99 field,
100 value,
101 min,
102 max,
103 })
104 }
105}
106
107#[derive(Debug, Clone, PartialEq, Eq)]
108pub struct PackPlannerLimitError {
109 pub field: &'static str,
110 pub value: usize,
111 pub min: usize,
112 pub max: usize,
113}
114
115#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
116#[serde(rename_all = "kebab-case")]
117pub enum PackFreshnessPolicy {
118 #[default]
119 PreferRecent,
120 Strict,
121 AllowStale,
122}
123
124#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
125#[serde(rename_all = "snake_case")]
126pub enum PackEvidenceRole {
127 AssistantConclusion,
128 ToolResult,
129 UserRequirement,
130 ToolCallArgument,
131 #[default]
132 Unknown,
133}
134
135#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
136#[serde(rename_all = "snake_case")]
137pub enum PackSourceReadiness {
138 #[default]
139 Healthy,
140 StaleReadable,
141 IncompleteMetadata,
142 Unavailable,
143}
144
145#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
146#[serde(rename_all = "snake_case")]
147pub enum PackLexicalReadiness {
148 #[default]
149 Ready,
150 Stale,
151 Missing,
152 Rebuilding,
153 Unknown,
154}
155
156#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
157#[serde(rename_all = "snake_case")]
158pub enum PackSemanticReadiness {
159 #[default]
160 NotReported,
161 Joined,
162 FallbackLexical,
163 Unavailable,
164 Disabled,
165}
166
167#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
168#[serde(rename_all = "snake_case")]
169pub enum PackSourceSyncGapKind {
170 RemoteStale,
171 SourcePruned,
172 SyncDeferred,
173 #[default]
174 Unknown,
175}
176
177#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
178pub struct PackSourceSyncGap {
179 pub source_id: String,
180 pub origin_kind: String,
181 pub kind: PackSourceSyncGapKind,
182 pub lag_seconds: Option<i64>,
183 pub last_synced_at_ms: Option<i64>,
184 pub recommended_action: Option<String>,
185}
186
187#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
188pub struct PackReadinessSnapshot {
189 pub index_generation: Option<String>,
190 pub lexical_readiness: PackLexicalReadiness,
191 pub semantic_readiness: PackSemanticReadiness,
192 pub active_rebuild: bool,
193 pub lock_state: Option<String>,
194 pub missing_database: bool,
195 pub source_sync_gaps: Vec<PackSourceSyncGap>,
196 pub recommended_action: Option<String>,
197}
198
199impl Default for PackReadinessSnapshot {
200 fn default() -> Self {
201 Self {
202 index_generation: None,
203 lexical_readiness: PackLexicalReadiness::Ready,
204 semantic_readiness: PackSemanticReadiness::NotReported,
205 active_rebuild: false,
206 lock_state: None,
207 missing_database: false,
208 source_sync_gaps: Vec::new(),
209 recommended_action: None,
210 }
211 }
212}
213
214#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
215pub struct PackCandidate {
216 pub candidate_id: String,
217 pub source_path: String,
218 pub source_id: String,
219 pub origin_kind: String,
220 pub origin_host: Option<String>,
221 pub workspace: String,
222 pub workspace_original: Option<String>,
223 pub agent: String,
224 pub line_start: Option<usize>,
225 pub line_end: Option<usize>,
226 pub conversation_id: Option<i64>,
227 pub message_index: Option<usize>,
228 pub content_hash: String,
229 pub span_hash: String,
230 pub created_at_ms: Option<i64>,
231 pub indexed_at_ms: Option<i64>,
232 pub match_type: String,
233 pub excerpt: String,
234 pub role: PackEvidenceRole,
235 pub lexical_score: Option<f64>,
236 pub semantic_score: Option<f64>,
237 pub hybrid_rank: Option<usize>,
238 pub matched_terms: Vec<String>,
239 pub matched_phrases: Vec<String>,
240 pub query_term_count: usize,
241 pub query_phrase_count: usize,
242 pub source_readiness: PackSourceReadiness,
243 pub source_explicitly_requested: bool,
244}
245
246impl PackCandidate {
247 pub fn from_search_hit(
248 hit: &SearchHit,
249 query_term_count: usize,
250 query_phrase_count: usize,
251 ) -> Self {
252 let line_start = hit.line_number;
253 let source_id = if hit.source_id.trim().is_empty() {
254 "local".to_string()
255 } else {
256 hit.source_id.trim().to_string()
257 };
258 let origin_kind = if hit.origin_kind.trim().is_empty() {
259 "local".to_string()
260 } else {
261 hit.origin_kind.trim().to_string()
262 };
263 let content_hash = format!("{:016x}", hit.content_hash);
264 let candidate_id = format!(
265 "{}:{}:{}",
266 source_id,
267 hit.source_path,
268 line_start.unwrap_or_default()
269 );
270 Self {
271 candidate_id,
272 source_path: hit.source_path.clone(),
273 source_id,
274 origin_kind,
275 origin_host: hit.origin_host.clone(),
276 workspace: hit.workspace.clone(),
277 workspace_original: hit.workspace_original.clone(),
278 agent: hit.agent.clone(),
279 line_start,
280 line_end: line_start,
281 conversation_id: hit.conversation_id,
282 message_index: None,
283 content_hash: content_hash.clone(),
284 span_hash: content_hash,
285 created_at_ms: hit.created_at,
286 indexed_at_ms: None,
287 match_type: match_type_robot_name(hit.match_type).to_string(),
288 excerpt: if hit.content.is_empty() {
289 hit.snippet.clone()
290 } else {
291 hit.content.clone()
292 },
293 role: PackEvidenceRole::Unknown,
294 lexical_score: Some(hit.score as f64),
295 semantic_score: None,
296 hybrid_rank: None,
297 matched_terms: Vec::new(),
298 matched_phrases: Vec::new(),
299 query_term_count,
300 query_phrase_count,
301 source_readiness: PackSourceReadiness::Healthy,
302 source_explicitly_requested: false,
303 }
304 }
305
306 fn session_key(&self) -> (&str, &str) {
307 (&self.source_id, &self.source_path)
308 }
309}
310
311fn match_type_robot_name(match_type: MatchType) -> &'static str {
312 match match_type {
313 MatchType::Exact => "exact",
314 MatchType::Prefix => "prefix",
315 MatchType::Suffix => "suffix",
316 MatchType::Substring => "substring",
317 MatchType::Wildcard => "wildcard",
318 MatchType::ImplicitWildcard => "implicit_wildcard",
319 }
320}
321
322#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
323pub struct PackPlanRequest {
324 pub now_ms: i64,
325 pub limits: PackPlannerLimits,
326 pub freshness_policy: PackFreshnessPolicy,
327 pub freshness_window_seconds: i64,
328 pub candidates: Vec<PackCandidate>,
329 pub explain_selection: bool,
330}
331
332impl Default for PackPlanRequest {
333 fn default() -> Self {
334 Self {
335 now_ms: 0,
336 limits: PackPlannerLimits::default(),
337 freshness_policy: PackFreshnessPolicy::PreferRecent,
338 freshness_window_seconds: DEFAULT_FRESHNESS_WINDOW_SECONDS,
339 candidates: Vec::new(),
340 explain_selection: false,
341 }
342 }
343}
344
345#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
346pub struct PlannedAnswerPack {
347 pub candidate_count: usize,
348 pub selected_evidence_count: usize,
349 pub selected_session_count: usize,
350 pub estimated_tokens: usize,
351 pub diagnostics: PackPlannerDiagnostics,
352 pub evidence: Vec<PlannedPackEvidence>,
353 pub omitted: Vec<OmittedPackCandidate>,
354}
355
356#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
357pub struct PackPlannerDiagnostics {
358 pub candidate_fetch_limit: usize,
359 pub budget: PackPlannerBudget,
360}
361
362#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
363pub struct PackPlannerBudget {
364 pub max_tokens: usize,
365 pub metadata_tokens: usize,
366 pub outline_tokens: usize,
367 pub evidence_tokens: usize,
368 pub omitted_tokens: usize,
369 pub max_output_tokens_with_overflow: usize,
370}
371
372#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
373pub struct PlannedPackEvidence {
374 pub id: String,
375 pub rank: usize,
376 pub excerpt: String,
377 pub excerpt_truncated: bool,
378 pub estimated_tokens: usize,
379 pub candidate: PackCandidate,
380 pub selection: PackSelectionScore,
381}
382
383#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
384pub struct OmittedPackCandidate {
385 pub candidate_id: String,
386 pub source_path: String,
387 pub line_start: Option<usize>,
388 pub agent: String,
389 pub reason: PackOmittedReason,
390 pub score: f64,
391 pub estimated_tokens: usize,
392}
393
394#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
395#[serde(rename_all = "snake_case")]
396pub enum PackOmittedReason {
397 TokenBudgetExhausted,
398 MaxSessionsReached,
399 MaxEvidenceReached,
400 DuplicateContent,
401 SameSessionLowerRank,
402 StaleUnderStrictPolicy,
403 SourceUnavailable,
404 RedactedToEmpty,
405 FieldMaskExcluded,
406}
407
408#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
409pub struct PackSelectionScore {
410 pub score: f64,
411 pub relevance_score: f64,
412 pub coverage_score: f64,
413 pub freshness_score: f64,
414 pub source_diversity_score: f64,
415 pub source_authority_score: f64,
416 pub role_score: f64,
417 pub citation_quality_score: f64,
418 pub duplicate_penalty: f64,
419 pub token_cost: usize,
420 pub selected_reason: PackSelectedReason,
421}
422
423#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
424#[serde(rename_all = "snake_case")]
425pub enum PackSelectedReason {
426 HighRelevance,
427 FreshEvidence,
428 SourceDiversity,
429 StrongCitation,
430 BudgetFit,
431}
432
433#[derive(Debug, Clone, Copy, PartialEq, Eq)]
434pub enum PackRenderFormat {
435 Json,
436 CompactJson,
437 Jsonl,
438 Toon,
439 Markdown,
440}
441
442impl PackRenderFormat {
443 fn label(self) -> &'static str {
444 match self {
445 Self::Json => "json",
446 Self::CompactJson => "compact",
447 Self::Jsonl => "jsonl",
448 Self::Toon => "toon",
449 Self::Markdown => "markdown",
450 }
451 }
452}
453
454#[derive(Debug, Clone, PartialEq, Eq)]
455pub struct PackRenderRequest {
456 pub query_text: String,
457 pub normalized_query: String,
458 pub generated_at_ms: i64,
459 pub elapsed_ms: u64,
460 pub request_id: Option<String>,
461 pub format: PackRenderFormat,
462 pub limits: PackPlannerLimits,
463 pub search_mode: String,
464 pub fallback_mode: Option<String>,
465 pub semantic_joined: bool,
466 pub freshness_policy: PackFreshnessPolicy,
467 pub freshness_window_seconds: i64,
468 pub redaction_policy: String,
469 pub sensitive_output: bool,
470 pub skill_content_included: bool,
471 pub explain_selection: bool,
472 pub readiness: PackReadinessSnapshot,
473}
474
475impl Default for PackRenderRequest {
476 fn default() -> Self {
477 Self {
478 query_text: String::new(),
479 normalized_query: String::new(),
480 generated_at_ms: 0,
481 elapsed_ms: 0,
482 request_id: None,
483 format: PackRenderFormat::Json,
484 limits: PackPlannerLimits::default(),
485 search_mode: "hybrid".to_string(),
486 fallback_mode: None,
487 semantic_joined: false,
488 freshness_policy: PackFreshnessPolicy::PreferRecent,
489 freshness_window_seconds: DEFAULT_FRESHNESS_WINDOW_SECONDS,
490 redaction_policy: "strict".to_string(),
491 sensitive_output: false,
492 skill_content_included: false,
493 explain_selection: false,
494 readiness: PackReadinessSnapshot::default(),
495 }
496 }
497}
498
499#[derive(Debug, Clone, PartialEq, Eq)]
500pub struct PackRenderError {
501 pub format: &'static str,
502 pub message: String,
503}
504
505#[derive(Debug, Clone, PartialEq, Serialize)]
506struct RenderedAnswerPack {
507 schema_version: &'static str,
508 query: RenderedQuery,
509 #[serde(rename = "_meta")]
510 meta: RenderedMeta,
511 limits: RenderedLimits,
512 realized: RenderedRealized,
513 health: RenderedHealth,
514 freshness: RenderedFreshness,
515 pack: RenderedPack,
516 evidence: Vec<RenderedEvidence>,
517 omitted: RenderedOmitted,
518 privacy: RenderedPrivacy,
519 warnings: Vec<String>,
520}
521
522#[derive(Debug, Clone, PartialEq, Serialize)]
523struct RenderedQuery {
524 text: String,
525 normalized: String,
526 filters: BTreeMap<String, String>,
527}
528
529#[derive(Debug, Clone, PartialEq, Serialize)]
530struct RenderedMeta {
531 request_id: Option<String>,
532 generated_at_ms: i64,
533 elapsed_ms: u64,
534 partial: bool,
535 format: &'static str,
536 warnings: Vec<String>,
537}
538
539#[derive(Debug, Clone, PartialEq, Serialize)]
540struct RenderedLimits {
541 max_tokens: usize,
542 estimated_tokens: usize,
543 max_sessions: usize,
544 max_evidence: usize,
545 context_lines: usize,
546 max_excerpt_chars: usize,
547 field_mask: &'static str,
548}
549
550#[derive(Debug, Clone, PartialEq, Serialize)]
551struct RenderedRealized {
552 search_mode: String,
553 fallback_mode: Option<String>,
554 semantic_joined: bool,
555 candidate_count: usize,
556 selected_evidence_count: usize,
557 selected_session_count: usize,
558}
559
560#[derive(Debug, Clone, PartialEq, Serialize)]
561struct RenderedHealth {
562 healthy: bool,
563 recommended_action: Option<String>,
564 index_state: &'static str,
565 index_generation: Option<String>,
566 lexical_readiness: &'static str,
567 semantic_state: &'static str,
568 active_rebuild: bool,
569 lock_state: Option<String>,
570 missing_database: bool,
571 source_sync_gaps: Vec<RenderedSourceSyncGap>,
572 source_readiness: Vec<RenderedSourceReadiness>,
573}
574
575#[derive(Debug, Clone, PartialEq, Serialize)]
576struct RenderedSourceReadiness {
577 source_id: String,
578 origin_kind: String,
579 readiness: &'static str,
580 healthy: bool,
581 evidence_count: usize,
582}
583
584#[derive(Debug, Clone, PartialEq, Serialize)]
585struct RenderedSourceSyncGap {
586 source_id: String,
587 origin_kind: String,
588 kind: &'static str,
589 lag_seconds: Option<i64>,
590 last_synced_at_ms: Option<i64>,
591 recommended_action: Option<String>,
592}
593
594#[derive(Debug, Clone, PartialEq, Serialize)]
595struct RenderedFreshness {
596 policy: &'static str,
597 window_seconds: i64,
598 newest_evidence_at_ms: Option<i64>,
599 oldest_evidence_at_ms: Option<i64>,
600 stale_evidence_count: usize,
601}
602
603#[derive(Debug, Clone, PartialEq, Serialize)]
604struct RenderedPack {
605 title: String,
606 answer_outline: Vec<RenderedOutlineItem>,
607 source_summary: Vec<RenderedSourceSummary>,
608 handoff: Vec<RenderedHandoffItem>,
609}
610
611#[derive(Debug, Clone, PartialEq, Serialize)]
612struct RenderedOutlineItem {
613 rank: usize,
614 heading: String,
615 evidence_ids: Vec<String>,
616}
617
618#[derive(Debug, Clone, PartialEq, Serialize)]
619struct RenderedSourceSummary {
620 source_id: String,
621 origin_kind: String,
622 session_count: usize,
623 evidence_count: usize,
624 newest_evidence_at_ms: Option<i64>,
625 healthy: bool,
626}
627
628#[derive(Debug, Clone, PartialEq, Serialize)]
629struct RenderedHandoffItem {
630 rank: usize,
631 kind: &'static str,
632 text: String,
633 evidence_ids: Vec<String>,
634}
635
636#[derive(Debug, Clone, PartialEq, Serialize)]
637struct RenderedEvidence {
638 id: String,
639 rank: usize,
640 excerpt: String,
641 excerpt_truncated: bool,
642 estimated_tokens: usize,
643 citation: RenderedCitation,
644 selection: RenderedSelection,
645 roles: Vec<&'static str>,
646 matched_terms: Vec<String>,
647 redactions: Vec<RenderedRedaction>,
648 #[serde(skip)]
649 source_readiness: PackSourceReadiness,
650}
651
652#[derive(Debug, Clone, PartialEq, Serialize)]
653struct RenderedCitation {
654 source_path: String,
655 source_id: String,
656 origin_kind: String,
657 origin_host: Option<String>,
658 workspace: String,
659 workspace_original: Option<String>,
660 agent: String,
661 line_start: Option<usize>,
662 line_end: Option<usize>,
663 message_index: Option<usize>,
664 conversation_id: Option<i64>,
665 content_hash: String,
666 span_hash: String,
667 excerpt_sha256: String,
668 created_at_ms: Option<i64>,
669 indexed_at_ms: Option<i64>,
670 freshness_age_seconds: Option<i64>,
671 match_type: String,
672 verified: bool,
673}
674
675#[derive(Debug, Clone, PartialEq, Serialize)]
676struct RenderedSelection {
677 score: f64,
678 token_cost: usize,
679 selected_reason: PackSelectedReason,
680 #[serde(skip_serializing_if = "Option::is_none")]
681 relevance_score: Option<f64>,
682 #[serde(skip_serializing_if = "Option::is_none")]
683 coverage_score: Option<f64>,
684 #[serde(skip_serializing_if = "Option::is_none")]
685 freshness_score: Option<f64>,
686 #[serde(skip_serializing_if = "Option::is_none")]
687 source_diversity_score: Option<f64>,
688 #[serde(skip_serializing_if = "Option::is_none")]
689 source_authority_score: Option<f64>,
690 #[serde(skip_serializing_if = "Option::is_none")]
691 role_score: Option<f64>,
692 #[serde(skip_serializing_if = "Option::is_none")]
693 citation_quality_score: Option<f64>,
694 #[serde(skip_serializing_if = "Option::is_none")]
695 duplicate_penalty: Option<f64>,
696}
697
698#[derive(Debug, Clone, PartialEq, Serialize)]
699struct RenderedRedaction {
700 kind: String,
701 start_char: usize,
702 end_char: usize,
703 replacement: String,
704}
705
706#[derive(Debug, Clone, PartialEq, Serialize)]
707struct RenderedOmitted {
708 count: usize,
709 items: Vec<OmittedPackCandidate>,
710}
711
712#[derive(Debug, Clone, PartialEq, Serialize)]
713struct RenderedPrivacy {
714 redaction_policy: String,
715 redaction_applied: bool,
716 sensitive_output: bool,
717 skill_content_included: bool,
718 redaction_counts: BTreeMap<String, usize>,
719}
720
721#[derive(Debug, Default)]
722struct SourceAccumulator {
723 origin_kind: String,
724 sessions: BTreeSet<String>,
725 evidence_count: usize,
726 newest_evidence_at_ms: Option<i64>,
727 healthy: bool,
728 worst_readiness: PackSourceReadiness,
729}
730
731pub fn pack_candidate_fetch_limit(
732 limits: &PackPlannerLimits,
733) -> Result<usize, PackPlannerLimitError> {
734 limits.validate()?;
735 Ok(limits
736 .max_evidence
737 .saturating_mul(8)
738 .max(limits.max_sessions.saturating_mul(16))
739 .clamp(64, PACK_CANDIDATE_LIMIT_CAP))
740}
741
742pub fn pack_planner_budget(
743 limits: &PackPlannerLimits,
744) -> Result<PackPlannerBudget, PackPlannerLimitError> {
745 limits.validate()?;
746 Ok(pack_planner_budget_unchecked(limits.max_tokens))
747}
748
749fn pack_planner_budget_unchecked(max_tokens: usize) -> PackPlannerBudget {
750 let metadata_tokens = percent_tokens(max_tokens, 15);
751 let outline_tokens = percent_tokens(max_tokens, 15);
752 let evidence_tokens = percent_tokens(max_tokens, 60);
753 let omitted_tokens = max_tokens
754 .saturating_sub(metadata_tokens)
755 .saturating_sub(outline_tokens)
756 .saturating_sub(evidence_tokens);
757 PackPlannerBudget {
758 max_tokens,
759 metadata_tokens,
760 outline_tokens,
761 evidence_tokens,
762 omitted_tokens,
763 max_output_tokens_with_overflow: max_tokens.saturating_add(max_tokens / 20),
764 }
765}
766
767fn percent_tokens(max_tokens: usize, percent: usize) -> usize {
768 max_tokens.saturating_mul(percent) / 100
769}
770
771#[derive(Debug, Clone)]
772struct ScoredCandidate {
773 index: usize,
774 score: PackSelectionScore,
775 excerpt: String,
776 excerpt_truncated: bool,
777}
778
779#[derive(Debug, Default)]
780struct SelectedState {
781 source_ids: HashSet<String>,
782 sessions: HashSet<(String, String)>,
783 span_hashes: HashSet<String>,
784 content_hashes: HashSet<String>,
785 ranges: Vec<(String, Option<usize>, Option<usize>)>,
786}
787
788pub fn plan_answer_pack(
789 request: PackPlanRequest,
790) -> Result<PlannedAnswerPack, PackPlannerLimitError> {
791 request.limits.validate()?;
792
793 let candidate_count = request.candidates.len();
794 let diagnostics = PackPlannerDiagnostics {
795 candidate_fetch_limit: pack_candidate_fetch_limit(&request.limits)?,
796 budget: pack_planner_budget_unchecked(request.limits.max_tokens),
797 };
798 let lexical_range = ScoreRange::from_values(
799 request
800 .candidates
801 .iter()
802 .filter_map(|candidate| finite_score(candidate.lexical_score)),
803 );
804 let semantic_range = ScoreRange::from_values(
805 request
806 .candidates
807 .iter()
808 .filter_map(|candidate| finite_score(candidate.semantic_score)),
809 );
810
811 let mut remaining: Vec<usize> = (0..request.candidates.len()).collect();
812 let mut selected = Vec::new();
813 let mut omitted = Vec::new();
814 let mut selected_state = SelectedState::default();
815 let mut used_tokens = 0usize;
816
817 while !remaining.is_empty() && selected.len() < request.limits.max_evidence {
818 let mut best: Option<ScoredCandidate> = None;
819 let mut next_remaining = Vec::with_capacity(remaining.len());
820
821 for candidate_index in remaining.iter().copied() {
822 let candidate = &request.candidates[candidate_index];
823 if let Some(reason) = hard_omission_reason(candidate, &request, &selected_state) {
824 let score = score_candidate(
825 candidate,
826 &request,
827 &selected_state,
828 lexical_range,
829 semantic_range,
830 0,
831 );
832 omitted.push(omitted_candidate(candidate, reason, score));
833 continue;
834 }
835
836 let (excerpt, excerpt_truncated) =
837 truncate_excerpt(&candidate.excerpt, request.limits.max_excerpt_chars);
838 if excerpt.trim().is_empty() {
839 let score = score_candidate(
840 candidate,
841 &request,
842 &selected_state,
843 lexical_range,
844 semantic_range,
845 0,
846 );
847 omitted.push(omitted_candidate(
848 candidate,
849 PackOmittedReason::RedactedToEmpty,
850 score,
851 ));
852 continue;
853 }
854
855 next_remaining.push(candidate_index);
856 let token_cost = estimated_tokens(&excerpt);
857 let score = score_candidate(
858 candidate,
859 &request,
860 &selected_state,
861 lexical_range,
862 semantic_range,
863 token_cost,
864 );
865 let scored = ScoredCandidate {
866 index: candidate_index,
867 score,
868 excerpt,
869 excerpt_truncated,
870 };
871
872 if best.as_ref().is_none_or(|current| {
873 candidate_ordering(
874 &scored,
875 &request.candidates[scored.index],
876 current,
877 &request.candidates[current.index],
878 )
879 .is_lt()
880 }) {
881 best = Some(scored);
882 }
883 }
884
885 let Some(best_candidate) = best else {
886 remaining = next_remaining;
887 break;
888 };
889
890 next_remaining.retain(|candidate_index| *candidate_index != best_candidate.index);
891 remaining = next_remaining;
892 let candidate = &request.candidates[best_candidate.index];
893
894 if used_tokens.saturating_add(best_candidate.score.token_cost)
895 > diagnostics.budget.evidence_tokens
896 {
897 omitted.push(omitted_candidate(
898 candidate,
899 PackOmittedReason::TokenBudgetExhausted,
900 best_candidate.score,
901 ));
902 continue;
903 }
904
905 let session_key = candidate.session_key();
906 if !selected_state
907 .sessions
908 .contains(&(session_key.0.to_string(), session_key.1.to_string()))
909 && selected_state.sessions.len() >= request.limits.max_sessions
910 {
911 omitted.push(omitted_candidate(
912 candidate,
913 PackOmittedReason::MaxSessionsReached,
914 best_candidate.score,
915 ));
916 continue;
917 }
918
919 used_tokens = used_tokens.saturating_add(best_candidate.score.token_cost);
920 selected_state
921 .source_ids
922 .insert(candidate.source_id.clone());
923 selected_state
924 .sessions
925 .insert((candidate.source_id.clone(), candidate.source_path.clone()));
926 selected_state
927 .span_hashes
928 .insert(candidate.span_hash.clone());
929 selected_state
930 .content_hashes
931 .insert(candidate.content_hash.clone());
932 selected_state.ranges.push((
933 candidate.source_path.clone(),
934 candidate.line_start,
935 candidate.line_end,
936 ));
937
938 selected.push(PlannedPackEvidence {
939 id: evidence_id(candidate),
940 rank: selected.len() + 1,
941 excerpt: best_candidate.excerpt,
942 excerpt_truncated: best_candidate.excerpt_truncated,
943 estimated_tokens: best_candidate.score.token_cost,
944 candidate: candidate.clone(),
945 selection: best_candidate.score,
946 });
947 }
948
949 for candidate_index in remaining {
950 let candidate = &request.candidates[candidate_index];
951 let score = score_candidate(
952 candidate,
953 &request,
954 &selected_state,
955 lexical_range,
956 semantic_range,
957 estimated_tokens(&candidate.excerpt),
958 );
959 omitted.push(omitted_candidate(
960 candidate,
961 PackOmittedReason::MaxEvidenceReached,
962 score,
963 ));
964 }
965
966 Ok(PlannedAnswerPack {
967 candidate_count,
968 selected_evidence_count: selected.len(),
969 selected_session_count: selected_state.sessions.len(),
970 estimated_tokens: used_tokens,
971 diagnostics,
972 evidence: selected,
973 omitted,
974 })
975}
976
977fn hard_omission_reason(
978 candidate: &PackCandidate,
979 request: &PackPlanRequest,
980 selected_state: &SelectedState,
981) -> Option<PackOmittedReason> {
982 if matches!(candidate.source_readiness, PackSourceReadiness::Unavailable) {
983 return Some(PackOmittedReason::SourceUnavailable);
984 }
985 if is_stale_under_strict_policy(candidate, request) {
986 return Some(PackOmittedReason::StaleUnderStrictPolicy);
987 }
988 if selected_state.span_hashes.contains(&candidate.span_hash)
989 || selected_state
990 .content_hashes
991 .contains(&candidate.content_hash)
992 || selected_state
993 .ranges
994 .iter()
995 .any(|(source_path, start, end)| {
996 source_path == &candidate.source_path
997 && line_ranges_overlap(*start, *end, candidate.line_start, candidate.line_end)
998 })
999 {
1000 return Some(PackOmittedReason::DuplicateContent);
1001 }
1002 None
1003}
1004
1005fn is_stale_under_strict_policy(candidate: &PackCandidate, request: &PackPlanRequest) -> bool {
1006 if !matches!(request.freshness_policy, PackFreshnessPolicy::Strict) {
1007 return false;
1008 }
1009 let Some(created_at_ms) = candidate.created_at_ms else {
1010 return true;
1011 };
1012 let max_age_ms = request.freshness_window_seconds.saturating_mul(1_000);
1013 request.now_ms.saturating_sub(created_at_ms) > max_age_ms
1014}
1015
1016fn line_ranges_overlap(
1017 left_start: Option<usize>,
1018 left_end: Option<usize>,
1019 right_start: Option<usize>,
1020 right_end: Option<usize>,
1021) -> bool {
1022 let (Some(left_start), Some(right_start)) = (left_start, right_start) else {
1023 return false;
1024 };
1025 let left_end = left_end.unwrap_or(left_start);
1026 let right_end = right_end.unwrap_or(right_start);
1027 left_start <= right_end && right_start <= left_end
1028}
1029
1030fn score_candidate(
1031 candidate: &PackCandidate,
1032 request: &PackPlanRequest,
1033 selected_state: &SelectedState,
1034 lexical_range: ScoreRange,
1035 semantic_range: ScoreRange,
1036 token_cost: usize,
1037) -> PackSelectionScore {
1038 let relevance_score = relevance_score(candidate, lexical_range, semantic_range);
1039 let coverage_score = coverage_score(candidate);
1040 let freshness_score = freshness_score(candidate, request);
1041 let source_diversity_score = source_diversity_score(candidate, selected_state);
1042 let source_authority_score = source_authority_score(candidate);
1043 let role_score = role_score(candidate.role);
1044 let citation_quality_score = citation_quality_score(candidate);
1045 let duplicate_penalty = duplicate_penalty(candidate, selected_state);
1046 let score = 0.35 * relevance_score
1047 + 0.20 * coverage_score
1048 + 0.15 * freshness_score
1049 + 0.10 * source_diversity_score
1050 + 0.10 * role_score
1051 + 0.05 * source_authority_score
1052 + 0.05 * citation_quality_score
1053 - duplicate_penalty;
1054
1055 PackSelectionScore {
1056 score,
1057 relevance_score,
1058 coverage_score,
1059 freshness_score,
1060 source_diversity_score,
1061 source_authority_score,
1062 role_score,
1063 citation_quality_score,
1064 duplicate_penalty,
1065 token_cost,
1066 selected_reason: selected_reason(
1067 relevance_score,
1068 freshness_score,
1069 source_diversity_score,
1070 citation_quality_score,
1071 ),
1072 }
1073}
1074
1075#[derive(Debug, Clone, Copy)]
1076struct ScoreRange {
1077 min: f64,
1078 max: f64,
1079 has_value: bool,
1080}
1081
1082impl ScoreRange {
1083 fn from_values(values: impl Iterator<Item = f64>) -> Self {
1084 let mut range = Self {
1085 min: f64::INFINITY,
1086 max: f64::NEG_INFINITY,
1087 has_value: false,
1088 };
1089 for value in values {
1090 range.has_value = true;
1091 range.min = range.min.min(value);
1092 range.max = range.max.max(value);
1093 }
1094 range
1095 }
1096
1097 fn normalize(self, value: Option<f64>) -> f64 {
1098 let Some(value) = finite_score(value) else {
1099 return 0.0;
1100 };
1101 if !self.has_value {
1102 return 0.0;
1103 }
1104 if (self.max - self.min).abs() < f64::EPSILON {
1105 return if value > 0.0 { 1.0 } else { 0.0 };
1106 }
1107 ((value - self.min) / (self.max - self.min)).clamp(0.0, 1.0)
1108 }
1109}
1110
1111fn finite_score(score: Option<f64>) -> Option<f64> {
1112 score.filter(|value| value.is_finite())
1113}
1114
1115fn relevance_score(
1116 candidate: &PackCandidate,
1117 lexical_range: ScoreRange,
1118 semantic_range: ScoreRange,
1119) -> f64 {
1120 let lexical = lexical_range.normalize(candidate.lexical_score);
1121 let semantic = semantic_range.normalize(candidate.semantic_score);
1122 let hybrid = candidate
1123 .hybrid_rank
1124 .map(|rank| 1.0 / rank.max(1) as f64)
1125 .unwrap_or(0.0);
1126 lexical.max(semantic).max(hybrid).clamp(0.0, 1.0)
1127}
1128
1129fn coverage_score(candidate: &PackCandidate) -> f64 {
1130 let denominator = candidate
1131 .query_term_count
1132 .saturating_add(candidate.query_phrase_count.saturating_mul(2));
1133 if denominator == 0 {
1134 return 0.0;
1135 }
1136 let numerator = candidate
1137 .matched_terms
1138 .len()
1139 .saturating_add(candidate.matched_phrases.len().saturating_mul(2));
1140 (numerator as f64 / denominator as f64).clamp(0.0, 1.0)
1141}
1142
1143fn freshness_score(candidate: &PackCandidate, request: &PackPlanRequest) -> f64 {
1144 let Some(created_at_ms) = candidate.created_at_ms else {
1145 return match request.freshness_policy {
1146 PackFreshnessPolicy::PreferRecent => 0.25,
1147 PackFreshnessPolicy::Strict => 0.0,
1148 PackFreshnessPolicy::AllowStale => 1.0,
1149 };
1150 };
1151 let age_ms = request.now_ms.saturating_sub(created_at_ms).max(0);
1152 let window_ms = request
1153 .freshness_window_seconds
1154 .max(1)
1155 .saturating_mul(1_000);
1156 if age_ms <= window_ms {
1157 return 1.0;
1158 }
1159 let max_decay_ms = window_ms.saturating_mul(4);
1160 if age_ms >= max_decay_ms {
1161 0.0
1162 } else {
1163 1.0 - ((age_ms - window_ms) as f64 / (max_decay_ms - window_ms) as f64)
1164 }
1165}
1166
1167fn source_diversity_score(candidate: &PackCandidate, selected_state: &SelectedState) -> f64 {
1168 let session_key = (candidate.source_id.clone(), candidate.source_path.clone());
1169 if selected_state.sessions.contains(&session_key) {
1170 0.0
1171 } else if selected_state.source_ids.contains(&candidate.source_id) {
1172 0.5
1173 } else {
1174 1.0
1175 }
1176}
1177
1178fn source_authority_score(candidate: &PackCandidate) -> f64 {
1179 match (
1180 candidate.source_explicitly_requested,
1181 candidate.origin_kind.as_str(),
1182 candidate.source_readiness,
1183 ) {
1184 (true, _, PackSourceReadiness::Healthy) => 1.0,
1185 (_, "local", PackSourceReadiness::Healthy) => 1.0,
1186 (_, _, PackSourceReadiness::Healthy) => 0.9,
1187 (_, _, PackSourceReadiness::StaleReadable) => 0.6,
1188 (_, _, PackSourceReadiness::IncompleteMetadata) => 0.4,
1189 (_, _, PackSourceReadiness::Unavailable) => 0.0,
1190 }
1191}
1192
1193fn role_score(role: PackEvidenceRole) -> f64 {
1194 match role {
1195 PackEvidenceRole::AssistantConclusion | PackEvidenceRole::ToolResult => 1.0,
1196 PackEvidenceRole::UserRequirement => 0.85,
1197 PackEvidenceRole::ToolCallArgument => 0.65,
1198 PackEvidenceRole::Unknown => 0.5,
1199 }
1200}
1201
1202fn citation_quality_score(candidate: &PackCandidate) -> f64 {
1203 let has_path = !candidate.source_path.trim().is_empty();
1204 let has_source = !candidate.source_id.trim().is_empty();
1205 let has_agent = !candidate.agent.trim().is_empty();
1206 let has_line_span = candidate.line_start.is_some() && candidate.line_end.is_some();
1207 if has_path && has_source && has_agent && has_line_span {
1208 1.0
1209 } else if has_path && has_source && has_agent {
1210 0.75
1211 } else if has_path && has_agent {
1212 0.5
1213 } else {
1214 0.0
1215 }
1216}
1217
1218fn duplicate_penalty(candidate: &PackCandidate, selected_state: &SelectedState) -> f64 {
1219 if selected_state.span_hashes.contains(&candidate.span_hash) {
1220 return 1.0;
1221 }
1222 if selected_state
1223 .content_hashes
1224 .contains(&candidate.content_hash)
1225 {
1226 return 0.5;
1227 }
1228 if selected_state
1229 .ranges
1230 .iter()
1231 .any(|(source_path, start, end)| {
1232 source_path == &candidate.source_path
1233 && line_ranges_overlap(*start, *end, candidate.line_start, candidate.line_end)
1234 })
1235 {
1236 return 0.25;
1237 }
1238 0.0
1239}
1240
1241fn selected_reason(
1242 relevance_score: f64,
1243 freshness_score: f64,
1244 source_diversity_score: f64,
1245 citation_quality_score: f64,
1246) -> PackSelectedReason {
1247 let scores = [
1248 (relevance_score, PackSelectedReason::HighRelevance),
1249 (freshness_score, PackSelectedReason::FreshEvidence),
1250 (source_diversity_score, PackSelectedReason::SourceDiversity),
1251 (citation_quality_score, PackSelectedReason::StrongCitation),
1252 (0.0, PackSelectedReason::BudgetFit),
1253 ];
1254 scores
1255 .into_iter()
1256 .max_by(|(left, _), (right, _)| left.total_cmp(right))
1257 .map(|(_, reason)| reason)
1258 .unwrap_or(PackSelectedReason::BudgetFit)
1259}
1260
1261fn candidate_ordering(
1262 left: &ScoredCandidate,
1263 left_candidate: &PackCandidate,
1264 right: &ScoredCandidate,
1265 right_candidate: &PackCandidate,
1266) -> Ordering {
1267 right
1268 .score
1269 .score
1270 .total_cmp(&left.score.score)
1271 .then_with(|| {
1272 right
1273 .score
1274 .relevance_score
1275 .total_cmp(&left.score.relevance_score)
1276 })
1277 .then_with(|| {
1278 compare_newer_first(left_candidate.created_at_ms, right_candidate.created_at_ms)
1279 })
1280 .then_with(|| left_candidate.source_id.cmp(&right_candidate.source_id))
1281 .then_with(|| left_candidate.source_path.cmp(&right_candidate.source_path))
1282 .then_with(|| {
1283 compare_optional_usize_low_first(left_candidate.line_start, right_candidate.line_start)
1284 })
1285 .then_with(|| {
1286 left_candidate
1287 .content_hash
1288 .cmp(&right_candidate.content_hash)
1289 })
1290}
1291
1292fn compare_newer_first(left: Option<i64>, right: Option<i64>) -> Ordering {
1293 match (left, right) {
1294 (Some(left), Some(right)) => right.cmp(&left),
1295 (Some(_), None) => Ordering::Less,
1296 (None, Some(_)) => Ordering::Greater,
1297 (None, None) => Ordering::Equal,
1298 }
1299}
1300
1301fn compare_optional_usize_low_first(left: Option<usize>, right: Option<usize>) -> Ordering {
1302 match (left, right) {
1303 (Some(left), Some(right)) => left.cmp(&right),
1304 (Some(_), None) => Ordering::Less,
1305 (None, Some(_)) => Ordering::Greater,
1306 (None, None) => Ordering::Equal,
1307 }
1308}
1309
1310fn omitted_candidate(
1311 candidate: &PackCandidate,
1312 reason: PackOmittedReason,
1313 score: PackSelectionScore,
1314) -> OmittedPackCandidate {
1315 OmittedPackCandidate {
1316 candidate_id: candidate.candidate_id.clone(),
1317 source_path: candidate.source_path.clone(),
1318 line_start: candidate.line_start,
1319 agent: candidate.agent.clone(),
1320 reason,
1321 score: score.score,
1322 estimated_tokens: score.token_cost,
1323 }
1324}
1325
1326fn truncate_excerpt(excerpt: &str, max_chars: usize) -> (String, bool) {
1327 if excerpt.chars().count() <= max_chars {
1328 return (excerpt.to_string(), false);
1329 }
1330 let keep_chars = max_chars.saturating_sub(3);
1331 let mut out: String = excerpt.chars().take(keep_chars).collect();
1332 out.push_str("...");
1333 (out, true)
1334}
1335
1336fn estimated_tokens(text: &str) -> usize {
1337 text.chars()
1338 .count()
1339 .div_ceil(TOKEN_ESTIMATE_CHARS_PER_TOKEN)
1340}
1341
1342fn evidence_id(candidate: &PackCandidate) -> String {
1343 let mut hasher_input = String::new();
1344 hasher_input.push_str(&candidate.source_id);
1345 hasher_input.push('\n');
1346 hasher_input.push_str(&candidate.source_path);
1347 hasher_input.push('\n');
1348 hasher_input.push_str(&candidate.line_start.unwrap_or_default().to_string());
1349 hasher_input.push('\n');
1350 hasher_input.push_str(&candidate.line_end.unwrap_or_default().to_string());
1351 hasher_input.push('\n');
1352 hasher_input.push_str(&candidate.span_hash);
1353 let hash = blake3::hash(hasher_input.as_bytes());
1354 format!("ev_{}", &hash.to_hex()[..16])
1355}
1356
1357pub fn render_answer_pack(
1358 plan: &PlannedAnswerPack,
1359 request: &PackRenderRequest,
1360) -> Result<String, PackRenderError> {
1361 let envelope = rendered_answer_pack(plan, request);
1362 match request.format {
1363 PackRenderFormat::Json => {
1364 serde_json::to_string_pretty(&envelope).map_err(|err| render_error(request, err))
1365 }
1366 PackRenderFormat::CompactJson => {
1367 serde_json::to_string(&envelope).map_err(|err| render_error(request, err))
1368 }
1369 PackRenderFormat::Jsonl => render_answer_pack_jsonl(&envelope, request),
1370 PackRenderFormat::Toon => {
1371 let value =
1372 serde_json::to_value(&envelope).map_err(|err| render_error(request, err))?;
1373 Ok(toon::encode(value, Some(pack_toon_encode_options())))
1374 }
1375 PackRenderFormat::Markdown => Ok(render_answer_pack_markdown(&envelope)),
1376 }
1377}
1378
1379pub fn render_answer_pack_value(
1380 plan: &PlannedAnswerPack,
1381 request: &PackRenderRequest,
1382) -> Result<serde_json::Value, PackRenderError> {
1383 serde_json::to_value(rendered_answer_pack(plan, request))
1384 .map_err(|err| render_error(request, err))
1385}
1386
1387fn render_error(error: &PackRenderRequest, err: serde_json::Error) -> PackRenderError {
1388 PackRenderError {
1389 format: error.format.label(),
1390 message: err.to_string(),
1391 }
1392}
1393
1394fn rendered_answer_pack(
1395 plan: &PlannedAnswerPack,
1396 request: &PackRenderRequest,
1397) -> RenderedAnswerPack {
1398 let evidence = plan
1399 .evidence
1400 .iter()
1401 .map(|item| rendered_evidence(item, request))
1402 .collect::<Vec<_>>();
1403 let mut envelope_redactions = Vec::new();
1404 let query_text = redact_pack_output_text(&request.query_text, &mut envelope_redactions);
1405 let normalized_query =
1406 redact_pack_output_text(&normalized_query(request), &mut envelope_redactions);
1407 let pack_title = redact_pack_output_text(&pack_title(request), &mut envelope_redactions);
1408 let source_summary = rendered_source_summary(&evidence);
1409 let source_readiness = rendered_source_readiness(&evidence);
1410 let source_sync_gaps = rendered_source_sync_gaps(
1411 &request.readiness.source_sync_gaps,
1412 &mut envelope_redactions,
1413 );
1414 let stale_evidence_count = stale_evidence_count(&evidence, request);
1415 let redacted_count = plan
1416 .omitted
1417 .iter()
1418 .filter(|omitted| omitted.reason == PackOmittedReason::RedactedToEmpty)
1419 .count();
1420 let (omitted_items, omitted_redactions) = rendered_omitted_items(&plan.omitted);
1421 let semantic_readiness = effective_semantic_readiness(request);
1422 let health_is_healthy = health_is_healthy(request, &source_readiness);
1423 let mut warnings = readiness_warnings(
1424 request,
1425 semantic_readiness,
1426 &source_readiness,
1427 evidence.is_empty(),
1428 &mut envelope_redactions,
1429 );
1430 let recommended_action =
1431 readiness_recommended_action(request, semantic_readiness, health_is_healthy)
1432 .map(|action| redact_pack_output_text(&action, &mut envelope_redactions));
1433 let index_generation = request
1434 .readiness
1435 .index_generation
1436 .as_deref()
1437 .map(|generation| redact_pack_output_text(generation, &mut envelope_redactions));
1438 let lock_state = request
1439 .readiness
1440 .lock_state
1441 .as_deref()
1442 .map(|state| redact_pack_output_text(state, &mut envelope_redactions));
1443 let redaction_counts = redaction_counts(
1444 redacted_count,
1445 &evidence,
1446 &omitted_redactions,
1447 &envelope_redactions,
1448 );
1449 let redaction_applied = !redaction_counts.is_empty();
1450 if redaction_applied {
1451 warnings.push("privacy_redactions_applied".to_string());
1452 }
1453
1454 RenderedAnswerPack {
1455 schema_version: "cass.pack.v1",
1456 query: RenderedQuery {
1457 text: query_text,
1458 normalized: normalized_query,
1459 filters: BTreeMap::new(),
1460 },
1461 meta: RenderedMeta {
1462 request_id: request.request_id.clone(),
1463 generated_at_ms: request.generated_at_ms,
1464 elapsed_ms: request.elapsed_ms,
1465 partial: false,
1466 format: request.format.label(),
1467 warnings: warnings.clone(),
1468 },
1469 limits: RenderedLimits {
1470 max_tokens: request.limits.max_tokens,
1471 estimated_tokens: plan.estimated_tokens,
1472 max_sessions: request.limits.max_sessions,
1473 max_evidence: request.limits.max_evidence,
1474 context_lines: request.limits.context_lines,
1475 max_excerpt_chars: request.limits.max_excerpt_chars,
1476 field_mask: "standard",
1477 },
1478 realized: RenderedRealized {
1479 search_mode: request.search_mode.clone(),
1480 fallback_mode: request.fallback_mode.clone(),
1481 semantic_joined: request.semantic_joined,
1482 candidate_count: plan.candidate_count,
1483 selected_evidence_count: plan.selected_evidence_count,
1484 selected_session_count: plan.selected_session_count,
1485 },
1486 health: RenderedHealth {
1487 healthy: health_is_healthy,
1488 recommended_action,
1489 index_state: lexical_readiness_label(request.readiness.lexical_readiness),
1490 index_generation,
1491 lexical_readiness: lexical_readiness_label(request.readiness.lexical_readiness),
1492 semantic_state: semantic_readiness_label(semantic_readiness),
1493 active_rebuild: request.readiness.active_rebuild,
1494 lock_state,
1495 missing_database: request.readiness.missing_database,
1496 source_sync_gaps,
1497 source_readiness,
1498 },
1499 freshness: RenderedFreshness {
1500 policy: freshness_policy_label(request.freshness_policy),
1501 window_seconds: request.freshness_window_seconds,
1502 newest_evidence_at_ms: evidence
1503 .iter()
1504 .filter_map(|item| item.citation.created_at_ms)
1505 .max(),
1506 oldest_evidence_at_ms: evidence
1507 .iter()
1508 .filter_map(|item| item.citation.created_at_ms)
1509 .min(),
1510 stale_evidence_count,
1511 },
1512 pack: RenderedPack {
1513 title: pack_title,
1514 answer_outline: rendered_outline(&evidence),
1515 source_summary,
1516 handoff: rendered_handoff(&evidence),
1517 },
1518 evidence,
1519 omitted: RenderedOmitted {
1520 count: plan.omitted.len(),
1521 items: omitted_items,
1522 },
1523 privacy: RenderedPrivacy {
1524 redaction_policy: request.redaction_policy.clone(),
1525 redaction_applied,
1526 sensitive_output: request.sensitive_output,
1527 skill_content_included: request.skill_content_included,
1528 redaction_counts,
1529 },
1530 warnings,
1531 }
1532}
1533
1534fn health_is_healthy(
1535 request: &PackRenderRequest,
1536 source_readiness: &[RenderedSourceReadiness],
1537) -> bool {
1538 matches!(
1539 request.readiness.lexical_readiness,
1540 PackLexicalReadiness::Ready
1541 ) && !request.readiness.active_rebuild
1542 && !request.readiness.missing_database
1543 && request.readiness.lock_state.is_none()
1544 && request.readiness.source_sync_gaps.is_empty()
1545 && source_readiness.iter().all(|source| source.healthy)
1546}
1547
1548fn readiness_warnings(
1549 request: &PackRenderRequest,
1550 semantic_readiness: PackSemanticReadiness,
1551 source_readiness: &[RenderedSourceReadiness],
1552 no_evidence: bool,
1553 redactions: &mut Vec<RenderedRedaction>,
1554) -> Vec<String> {
1555 let mut warnings = Vec::new();
1556 if no_evidence {
1557 warnings.push("no_evidence_found".to_string());
1558 }
1559 match request.readiness.lexical_readiness {
1560 PackLexicalReadiness::Ready => {}
1561 PackLexicalReadiness::Stale => warnings.push("lexical_index_stale".to_string()),
1562 PackLexicalReadiness::Missing => warnings.push("lexical_index_missing".to_string()),
1563 PackLexicalReadiness::Rebuilding => warnings.push("lexical_index_rebuilding".to_string()),
1564 PackLexicalReadiness::Unknown => warnings.push("lexical_index_unknown".to_string()),
1565 }
1566 match semantic_readiness {
1567 PackSemanticReadiness::FallbackLexical => {
1568 warnings.push("semantic_fallback_lexical".to_string());
1569 }
1570 PackSemanticReadiness::Unavailable => {
1571 warnings.push("semantic_unavailable_lexical_fallback".to_string());
1572 }
1573 PackSemanticReadiness::Disabled => warnings.push("semantic_disabled".to_string()),
1574 PackSemanticReadiness::Joined | PackSemanticReadiness::NotReported => {}
1575 }
1576 if request.readiness.active_rebuild {
1577 warnings.push("active_rebuild".to_string());
1578 }
1579 if request.readiness.lock_state.is_some() {
1580 warnings.push("index_lock_active".to_string());
1581 }
1582 if request.readiness.missing_database {
1583 warnings.push("missing_database".to_string());
1584 }
1585 for gap in &request.readiness.source_sync_gaps {
1586 let source_id = redacted_source_label(&gap.source_id, &gap.origin_kind, redactions);
1587 warnings.push(format!(
1588 "source_sync_gap:{}:{}",
1589 source_id,
1590 source_sync_gap_kind_label(gap.kind)
1591 ));
1592 }
1593 for source in source_readiness.iter().filter(|source| !source.healthy) {
1594 warnings.push(format!(
1595 "source_readiness:{}:{}",
1596 source.source_id, source.readiness
1597 ));
1598 }
1599 warnings
1600}
1601
1602fn readiness_recommended_action(
1603 request: &PackRenderRequest,
1604 semantic_readiness: PackSemanticReadiness,
1605 health_is_healthy: bool,
1606) -> Option<String> {
1607 if let Some(action) = trimmed_optional_string(request.readiness.recommended_action.as_deref()) {
1608 return Some(action);
1609 }
1610 if request.readiness.missing_database {
1611 return Some("run cass index --full".to_string());
1612 }
1613 match request.readiness.lexical_readiness {
1614 PackLexicalReadiness::Ready => {}
1615 PackLexicalReadiness::Stale => {
1616 return Some("refresh lexical index with cass index --full".to_string());
1617 }
1618 PackLexicalReadiness::Missing => {
1619 return Some("build lexical index with cass index --full".to_string());
1620 }
1621 PackLexicalReadiness::Rebuilding => {
1622 return Some("wait for active rebuild or inspect cass status --json".to_string());
1623 }
1624 PackLexicalReadiness::Unknown => {
1625 return Some("inspect cass health --json".to_string());
1626 }
1627 }
1628 if request.readiness.active_rebuild {
1629 return Some("wait for active rebuild or inspect cass status --json".to_string());
1630 }
1631 if request.readiness.lock_state.is_some() {
1632 return Some("inspect cass status --json for active locks".to_string());
1633 }
1634 if !request.readiness.source_sync_gaps.is_empty() {
1635 return Some("inspect cass sources sync --json and source status".to_string());
1636 }
1637 if !health_is_healthy {
1638 return Some("inspect cass health --json and source sync status".to_string());
1639 }
1640 if matches!(
1641 semantic_readiness,
1642 PackSemanticReadiness::FallbackLexical | PackSemanticReadiness::Unavailable
1643 ) {
1644 return Some(
1645 "continue with lexical evidence or install semantic model explicitly".to_string(),
1646 );
1647 }
1648 None
1649}
1650
1651fn trimmed_optional_string(value: Option<&str>) -> Option<String> {
1652 let value = value?.trim();
1653 (!value.is_empty()).then(|| value.to_string())
1654}
1655
1656fn normalized_query(request: &PackRenderRequest) -> String {
1657 if request.normalized_query.trim().is_empty() {
1658 request.query_text.trim().to_string()
1659 } else {
1660 request.normalized_query.trim().to_string()
1661 }
1662}
1663
1664fn pack_title(request: &PackRenderRequest) -> String {
1665 let normalized = normalized_query(request);
1666 if normalized.is_empty() {
1667 "answer pack".to_string()
1668 } else {
1669 normalized
1670 }
1671}
1672
1673fn redact_pack_output_text(input: &str, redactions: &mut Vec<RenderedRedaction>) -> String {
1674 let mut output = input.to_string();
1675
1676 let encrypted_redacted = ENCRYPTED_PAYLOAD_RE.replace_all(&output, |_: &Captures<'_>| {
1677 REDACTED_ENCRYPTED_PAYLOAD_MARKER
1678 });
1679 if let Cow::Owned(redacted) = encrypted_redacted {
1680 push_full_redaction(
1681 redactions,
1682 "encrypted_payload",
1683 &output,
1684 REDACTED_ENCRYPTED_PAYLOAD_MARKER,
1685 );
1686 output = redacted;
1687 }
1688
1689 if let Cow::Owned(redacted) = redact_text(&output) {
1690 push_full_redaction(redactions, "secret", &output, REDACTED_VALUE_MARKER);
1691 output = redacted;
1692 }
1693
1694 let host_redacted =
1695 PRIVATE_HOST_LABEL_RE.replace_all(&output, |_: &Captures<'_>| REDACTED_REMOTE_HOST_MARKER);
1696 if let Cow::Owned(redacted) = host_redacted {
1697 push_full_redaction(
1698 redactions,
1699 "remote_host",
1700 &output,
1701 REDACTED_REMOTE_HOST_MARKER,
1702 );
1703 output = redacted;
1704 }
1705
1706 let path_redacted = PRIVATE_PATH_RE.replace_all(&output, |captures: &Captures<'_>| {
1707 redacted_private_path_marker(&captures[0])
1708 });
1709 if let Cow::Owned(redacted) = path_redacted {
1710 push_full_redaction(
1711 redactions,
1712 "private_path",
1713 &output,
1714 &format!("{REDACTED_PATH_PREFIX}/<name>"),
1715 );
1716 output = redacted;
1717 }
1718
1719 output
1720}
1721
1722fn redacted_source_label(
1723 source_id: &str,
1724 origin_kind: &str,
1725 redactions: &mut Vec<RenderedRedaction>,
1726) -> String {
1727 if is_remote_origin(origin_kind) && !source_id.trim().is_empty() {
1728 push_full_redaction(redactions, "remote_host", source_id, REDACTED_SOURCE_MARKER);
1729 REDACTED_SOURCE_MARKER.to_string()
1730 } else {
1731 redact_pack_output_text(source_id, redactions)
1732 }
1733}
1734
1735fn rendered_origin_host(
1736 origin_host: Option<&str>,
1737 redactions: &mut Vec<RenderedRedaction>,
1738) -> Option<String> {
1739 let host = origin_host?;
1740 let trimmed = host.trim();
1741 if trimmed.is_empty() {
1742 return Some(String::new());
1743 }
1744 push_full_redaction(redactions, "remote_host", host, REDACTED_REMOTE_HOST_MARKER);
1745 Some(REDACTED_REMOTE_HOST_MARKER.to_string())
1746}
1747
1748fn is_remote_origin(origin_kind: &str) -> bool {
1749 !origin_kind.trim().eq_ignore_ascii_case("local")
1750}
1751
1752fn redacted_private_path_marker(path: &str) -> String {
1753 let trimmed = path.trim_end_matches(['/', '\\']);
1754 let components = trimmed
1755 .split(['/', '\\'])
1756 .filter(|component| !component.is_empty())
1757 .collect::<Vec<_>>();
1758 let basename = match components.as_slice() {
1759 ["home", _] | ["Users", _] | [_, "Users", _] | ["~"] => "home",
1760 _ => components
1761 .last()
1762 .copied()
1763 .filter(|component| *component != "~")
1764 .unwrap_or("path"),
1765 };
1766 format!("{REDACTED_PATH_PREFIX}/{basename}")
1767}
1768
1769fn push_full_redaction(
1770 redactions: &mut Vec<RenderedRedaction>,
1771 kind: &str,
1772 original: &str,
1773 replacement: &str,
1774) {
1775 if original.is_empty() {
1776 return;
1777 }
1778 redactions.push(RenderedRedaction {
1779 kind: kind.to_string(),
1780 start_char: 0,
1781 end_char: original.chars().count(),
1782 replacement: replacement.to_string(),
1783 });
1784}
1785
1786fn rendered_evidence(item: &PlannedPackEvidence, request: &PackRenderRequest) -> RenderedEvidence {
1787 let candidate = &item.candidate;
1788 let mut redactions = Vec::new();
1789 let excerpt = redact_pack_output_text(&item.excerpt, &mut redactions);
1790 let source_id = redacted_source_label(
1791 &candidate.source_id,
1792 &candidate.origin_kind,
1793 &mut redactions,
1794 );
1795 let source_path = redact_pack_output_text(&candidate.source_path, &mut redactions);
1796 let workspace = redact_pack_output_text(&candidate.workspace, &mut redactions);
1797 let workspace_original = candidate
1798 .workspace_original
1799 .as_deref()
1800 .map(|workspace| redact_pack_output_text(workspace, &mut redactions));
1801 let origin_host = rendered_origin_host(candidate.origin_host.as_deref(), &mut redactions);
1802 let citation = RenderedCitation {
1803 source_path,
1804 source_id,
1805 origin_kind: candidate.origin_kind.clone(),
1806 origin_host,
1807 workspace,
1808 workspace_original,
1809 agent: redact_pack_output_text(&candidate.agent, &mut redactions),
1810 line_start: candidate.line_start,
1811 line_end: candidate.line_end,
1812 message_index: candidate.message_index,
1813 conversation_id: candidate.conversation_id,
1814 content_hash: candidate.content_hash.clone(),
1815 span_hash: candidate.span_hash.clone(),
1816 excerpt_sha256: sha256_hex(&item.excerpt),
1817 created_at_ms: candidate.created_at_ms,
1818 indexed_at_ms: candidate.indexed_at_ms,
1819 freshness_age_seconds: candidate
1820 .created_at_ms
1821 .map(|created| request.generated_at_ms.saturating_sub(created).max(0) / 1_000),
1822 match_type: candidate.match_type.clone(),
1823 verified: candidate.line_start.is_some() && !candidate.source_path.trim().is_empty(),
1824 };
1825 RenderedEvidence {
1826 id: item.id.clone(),
1827 rank: item.rank,
1828 excerpt,
1829 excerpt_truncated: item.excerpt_truncated,
1830 estimated_tokens: item.estimated_tokens,
1831 citation,
1832 selection: rendered_selection(item.selection, request.explain_selection),
1833 roles: rendered_roles(candidate.role),
1834 matched_terms: candidate
1835 .matched_terms
1836 .iter()
1837 .map(|term| redact_pack_output_text(term, &mut redactions))
1838 .collect(),
1839 redactions,
1840 source_readiness: candidate.source_readiness,
1841 }
1842}
1843
1844fn rendered_selection(selection: PackSelectionScore, explain: bool) -> RenderedSelection {
1845 RenderedSelection {
1846 score: selection.score,
1847 token_cost: selection.token_cost,
1848 selected_reason: selection.selected_reason,
1849 relevance_score: explain.then_some(selection.relevance_score),
1850 coverage_score: explain.then_some(selection.coverage_score),
1851 freshness_score: explain.then_some(selection.freshness_score),
1852 source_diversity_score: explain.then_some(selection.source_diversity_score),
1853 source_authority_score: explain.then_some(selection.source_authority_score),
1854 role_score: explain.then_some(selection.role_score),
1855 citation_quality_score: explain.then_some(selection.citation_quality_score),
1856 duplicate_penalty: explain.then_some(selection.duplicate_penalty),
1857 }
1858}
1859
1860fn rendered_roles(role: PackEvidenceRole) -> Vec<&'static str> {
1861 if matches!(role, PackEvidenceRole::Unknown) {
1862 Vec::new()
1863 } else {
1864 vec![evidence_role_label(role)]
1865 }
1866}
1867
1868fn rendered_outline(evidence: &[RenderedEvidence]) -> Vec<RenderedOutlineItem> {
1869 evidence
1870 .iter()
1871 .map(|item| RenderedOutlineItem {
1872 rank: item.rank,
1873 heading: outline_heading(item),
1874 evidence_ids: vec![item.id.clone()],
1875 })
1876 .collect()
1877}
1878
1879fn outline_heading(item: &RenderedEvidence) -> String {
1880 item.matched_terms
1881 .first()
1882 .map(|term| format!("Evidence for {term}"))
1883 .unwrap_or_else(|| {
1884 format!(
1885 "{} evidence from {}",
1886 item.citation.agent, item.citation.source_id
1887 )
1888 })
1889}
1890
1891fn rendered_handoff(evidence: &[RenderedEvidence]) -> Vec<RenderedHandoffItem> {
1892 evidence
1893 .iter()
1894 .map(|item| RenderedHandoffItem {
1895 rank: item.rank,
1896 kind: handoff_kind(item),
1897 text: compact_excerpt(&item.excerpt, 220),
1898 evidence_ids: vec![item.id.clone()],
1899 })
1900 .collect()
1901}
1902
1903fn handoff_kind(item: &RenderedEvidence) -> &'static str {
1904 match item.roles.first().copied() {
1905 Some("assistant_conclusion") => "decision",
1906 Some("tool_result") => "fact",
1907 Some("user_requirement") => "next_step",
1908 Some("tool_call_argument") => "fact",
1909 _ => "fact",
1910 }
1911}
1912
1913fn rendered_source_summary(evidence: &[RenderedEvidence]) -> Vec<RenderedSourceSummary> {
1914 let mut sources: BTreeMap<(String, String), SourceAccumulator> = BTreeMap::new();
1915 for item in evidence {
1916 let key = (
1917 item.citation.source_id.clone(),
1918 item.citation.origin_kind.clone(),
1919 );
1920 let entry = sources.entry(key).or_insert_with(|| SourceAccumulator {
1921 origin_kind: item.citation.origin_kind.clone(),
1922 healthy: true,
1923 ..SourceAccumulator::default()
1924 });
1925 entry.sessions.insert(item.citation.source_path.clone());
1926 entry.evidence_count += 1;
1927 entry.newest_evidence_at_ms =
1928 newer_timestamp(entry.newest_evidence_at_ms, item.citation.created_at_ms);
1929 let readiness = item.citation_source_readiness();
1930 entry.healthy &= matches!(readiness, PackSourceReadiness::Healthy);
1931 if source_readiness_rank(readiness) > source_readiness_rank(entry.worst_readiness) {
1932 entry.worst_readiness = readiness;
1933 }
1934 }
1935
1936 sources
1937 .into_iter()
1938 .map(|((source_id, _), source)| RenderedSourceSummary {
1939 source_id,
1940 origin_kind: source.origin_kind,
1941 session_count: source.sessions.len(),
1942 evidence_count: source.evidence_count,
1943 newest_evidence_at_ms: source.newest_evidence_at_ms,
1944 healthy: source.healthy,
1945 })
1946 .collect()
1947}
1948
1949fn rendered_source_readiness(evidence: &[RenderedEvidence]) -> Vec<RenderedSourceReadiness> {
1950 let mut sources: BTreeMap<(String, String), SourceAccumulator> = BTreeMap::new();
1951 for item in evidence {
1952 let key = (
1953 item.citation.source_id.clone(),
1954 item.citation.origin_kind.clone(),
1955 );
1956 let entry = sources.entry(key).or_insert_with(|| SourceAccumulator {
1957 origin_kind: item.citation.origin_kind.clone(),
1958 healthy: true,
1959 ..SourceAccumulator::default()
1960 });
1961 entry.evidence_count += 1;
1962 let readiness = item.citation_source_readiness();
1963 entry.healthy &= matches!(readiness, PackSourceReadiness::Healthy);
1964 if source_readiness_rank(readiness) > source_readiness_rank(entry.worst_readiness) {
1965 entry.worst_readiness = readiness;
1966 }
1967 }
1968
1969 sources
1970 .into_iter()
1971 .map(|((source_id, _), source)| RenderedSourceReadiness {
1972 source_id,
1973 origin_kind: source.origin_kind,
1974 readiness: source_readiness_label(source.worst_readiness),
1975 healthy: source.healthy,
1976 evidence_count: source.evidence_count,
1977 })
1978 .collect()
1979}
1980
1981fn rendered_source_sync_gaps(
1982 gaps: &[PackSourceSyncGap],
1983 redactions: &mut Vec<RenderedRedaction>,
1984) -> Vec<RenderedSourceSyncGap> {
1985 gaps.iter()
1986 .map(|gap| RenderedSourceSyncGap {
1987 source_id: redacted_source_label(&gap.source_id, &gap.origin_kind, redactions),
1988 origin_kind: gap.origin_kind.clone(),
1989 kind: source_sync_gap_kind_label(gap.kind),
1990 lag_seconds: gap.lag_seconds,
1991 last_synced_at_ms: gap.last_synced_at_ms,
1992 recommended_action: gap
1993 .recommended_action
1994 .as_deref()
1995 .map(|action| redact_pack_output_text(action, redactions)),
1996 })
1997 .collect()
1998}
1999
2000fn stale_evidence_count(evidence: &[RenderedEvidence], request: &PackRenderRequest) -> usize {
2001 evidence
2002 .iter()
2003 .filter(|item| evidence_is_stale(item, request))
2004 .count()
2005}
2006
2007fn evidence_is_stale(item: &RenderedEvidence, request: &PackRenderRequest) -> bool {
2008 let Some(created_at_ms) = item.citation.created_at_ms else {
2009 return !matches!(request.freshness_policy, PackFreshnessPolicy::AllowStale);
2010 };
2011 let window_ms = request
2012 .freshness_window_seconds
2013 .max(1)
2014 .saturating_mul(1_000);
2015 request.generated_at_ms.saturating_sub(created_at_ms).max(0) > window_ms
2016}
2017
2018impl RenderedEvidence {
2019 fn citation_source_readiness(&self) -> PackSourceReadiness {
2020 if !self.citation.verified {
2021 PackSourceReadiness::IncompleteMetadata
2022 } else {
2023 self.source_readiness
2024 }
2025 }
2026}
2027
2028fn newer_timestamp(left: Option<i64>, right: Option<i64>) -> Option<i64> {
2029 match (left, right) {
2030 (Some(left), Some(right)) => Some(left.max(right)),
2031 (Some(left), None) => Some(left),
2032 (None, Some(right)) => Some(right),
2033 (None, None) => None,
2034 }
2035}
2036
2037fn rendered_omitted_items(
2038 omitted: &[OmittedPackCandidate],
2039) -> (Vec<OmittedPackCandidate>, Vec<RenderedRedaction>) {
2040 let mut redactions = Vec::new();
2041 let items = omitted
2042 .iter()
2043 .map(|item| {
2044 let mut rendered = item.clone();
2045 rendered.candidate_id =
2046 rendered_omitted_candidate_id(&rendered.candidate_id, &mut redactions);
2047 rendered.source_path = redact_pack_output_text(&rendered.source_path, &mut redactions);
2048 rendered.agent = redact_pack_output_text(&rendered.agent, &mut redactions);
2049 rendered
2050 })
2051 .collect();
2052 (items, redactions)
2053}
2054
2055fn rendered_omitted_candidate_id(
2056 candidate_id: &str,
2057 redactions: &mut Vec<RenderedRedaction>,
2058) -> String {
2059 if candidate_id_contains_source_material(candidate_id) {
2060 let hash = sha256_hex(candidate_id);
2061 let replacement = format!("omitted_{}", &hash[..16]);
2062 push_full_redaction(redactions, "candidate_id", candidate_id, &replacement);
2063 return replacement;
2064 }
2065 redact_pack_output_text(candidate_id, redactions)
2066}
2067
2068fn candidate_id_contains_source_material(candidate_id: &str) -> bool {
2069 candidate_id.contains(':')
2070 || candidate_id.contains('/')
2071 || candidate_id.contains('\\')
2072 || candidate_id.contains('~')
2073}
2074
2075fn redaction_counts(
2076 redacted_count: usize,
2077 evidence: &[RenderedEvidence],
2078 omitted_redactions: &[RenderedRedaction],
2079 envelope_redactions: &[RenderedRedaction],
2080) -> BTreeMap<String, usize> {
2081 let mut counts = BTreeMap::new();
2082 if redacted_count > 0 {
2083 counts.insert("redacted_to_empty".to_string(), redacted_count);
2084 }
2085 for redaction in evidence
2086 .iter()
2087 .flat_map(|item| item.redactions.iter())
2088 .chain(omitted_redactions.iter())
2089 .chain(envelope_redactions.iter())
2090 {
2091 *counts.entry(redaction.kind.clone()).or_default() += 1;
2092 }
2093 counts
2094}
2095
2096fn render_answer_pack_jsonl(
2097 envelope: &RenderedAnswerPack,
2098 request: &PackRenderRequest,
2099) -> Result<String, PackRenderError> {
2100 let mut lines = Vec::with_capacity(envelope.evidence.len() + 4);
2101 lines.push(json_line(
2102 serde_json::json!({ "_meta": &envelope.meta }),
2103 request,
2104 )?);
2105 lines.push(json_line(
2106 serde_json::json!({ "pack": &envelope.pack }),
2107 request,
2108 )?);
2109 for evidence in &envelope.evidence {
2110 lines.push(json_line(
2111 serde_json::json!({ "evidence": evidence }),
2112 request,
2113 )?);
2114 }
2115 lines.push(json_line(
2116 serde_json::json!({ "omitted": &envelope.omitted }),
2117 request,
2118 )?);
2119 lines.push(json_line(
2120 serde_json::json!({ "privacy": &envelope.privacy }),
2121 request,
2122 )?);
2123 Ok(lines.join("\n"))
2124}
2125
2126fn json_line(
2127 value: serde_json::Value,
2128 request: &PackRenderRequest,
2129) -> Result<String, PackRenderError> {
2130 serde_json::to_string(&value).map_err(|err| render_error(request, err))
2131}
2132
2133fn render_answer_pack_markdown(envelope: &RenderedAnswerPack) -> String {
2134 let mut out = String::new();
2135 out.push_str("# ");
2136 out.push_str(&markdown_line(&envelope.pack.title));
2137 if !envelope.warnings.is_empty() {
2138 out.push_str("\n\n## Warnings\n");
2139 for warning in &envelope.warnings {
2140 out.push_str("- ");
2141 out.push_str(&markdown_line(warning));
2142 out.push('\n');
2143 }
2144 }
2145 out.push_str("\n## Handoff\n");
2146 if envelope.pack.handoff.is_empty() {
2147 out.push_str("- No evidence selected.\n");
2148 } else {
2149 for item in &envelope.pack.handoff {
2150 out.push_str("- ");
2151 out.push_str(&markdown_line(&item.text));
2152 out.push_str(" [");
2153 out.push_str(&item.evidence_ids.join(", "));
2154 out.push_str("]\n");
2155 }
2156 }
2157
2158 out.push_str("\n## Evidence\n");
2159 if envelope.evidence.is_empty() {
2160 out.push_str("- No cited evidence.\n");
2161 } else {
2162 for item in &envelope.evidence {
2163 out.push('[');
2164 out.push_str(&item.id);
2165 out.push_str("] ");
2166 out.push_str(&markdown_line(&item.citation.agent));
2167 out.push(' ');
2168 out.push_str(&markdown_line(&item.citation.source_id));
2169 out.push(' ');
2170 out.push_str(&markdown_line(&item.citation.source_path));
2171 if let Some(line_start) = item.citation.line_start {
2172 out.push(':');
2173 out.push_str(&line_start.to_string());
2174 if item.citation.line_end != item.citation.line_start
2175 && let Some(line_end) = item.citation.line_end
2176 {
2177 out.push('-');
2178 out.push_str(&line_end.to_string());
2179 }
2180 }
2181 out.push('\n');
2182 }
2183 }
2184
2185 if envelope.omitted.count > 0 {
2186 out.push_str("\n## Omitted\n");
2187 for item in &envelope.omitted.items {
2188 out.push_str("- ");
2189 out.push_str(omitted_reason_label(item.reason));
2190 out.push_str(": ");
2191 out.push_str(&markdown_line(&item.source_path));
2192 if let Some(line_start) = item.line_start {
2193 out.push(':');
2194 out.push_str(&line_start.to_string());
2195 }
2196 out.push('\n');
2197 }
2198 }
2199 out
2200}
2201
2202fn compact_excerpt(excerpt: &str, max_chars: usize) -> String {
2203 let line = markdown_line(excerpt);
2204 if line.chars().count() <= max_chars {
2205 return line;
2206 }
2207 let mut compact = line
2208 .chars()
2209 .take(max_chars.saturating_sub(3))
2210 .collect::<String>();
2211 compact.push_str("...");
2212 compact
2213}
2214
2215fn markdown_line(text: &str) -> String {
2216 text.split_whitespace().collect::<Vec<_>>().join(" ")
2217}
2218
2219fn sha256_hex(text: &str) -> String {
2220 let digest = Sha256::digest(text.as_bytes());
2221 digest.iter().map(|byte| format!("{byte:02x}")).collect()
2222}
2223
2224fn pack_toon_encode_options() -> toon::EncodeOptions {
2225 toon::EncodeOptions {
2226 indent: None,
2227 delimiter: None,
2228 key_folding: Some(toon::options::KeyFoldingMode::Off),
2229 flatten_depth: None,
2230 replacer: None,
2231 }
2232}
2233
2234fn freshness_policy_label(policy: PackFreshnessPolicy) -> &'static str {
2235 match policy {
2236 PackFreshnessPolicy::PreferRecent => "prefer-recent",
2237 PackFreshnessPolicy::Strict => "strict",
2238 PackFreshnessPolicy::AllowStale => "allow-stale",
2239 }
2240}
2241
2242fn evidence_role_label(role: PackEvidenceRole) -> &'static str {
2243 match role {
2244 PackEvidenceRole::AssistantConclusion => "assistant_conclusion",
2245 PackEvidenceRole::ToolResult => "tool_result",
2246 PackEvidenceRole::UserRequirement => "user_requirement",
2247 PackEvidenceRole::ToolCallArgument => "tool_call_argument",
2248 PackEvidenceRole::Unknown => "unknown",
2249 }
2250}
2251
2252fn omitted_reason_label(reason: PackOmittedReason) -> &'static str {
2253 match reason {
2254 PackOmittedReason::TokenBudgetExhausted => "token_budget_exhausted",
2255 PackOmittedReason::MaxSessionsReached => "max_sessions_reached",
2256 PackOmittedReason::MaxEvidenceReached => "max_evidence_reached",
2257 PackOmittedReason::DuplicateContent => "duplicate_content",
2258 PackOmittedReason::SameSessionLowerRank => "same_session_lower_rank",
2259 PackOmittedReason::StaleUnderStrictPolicy => "stale_under_strict_policy",
2260 PackOmittedReason::SourceUnavailable => "source_unavailable",
2261 PackOmittedReason::RedactedToEmpty => "redacted_to_empty",
2262 PackOmittedReason::FieldMaskExcluded => "field_mask_excluded",
2263 }
2264}
2265
2266fn source_readiness_rank(readiness: PackSourceReadiness) -> usize {
2267 match readiness {
2268 PackSourceReadiness::Healthy => 0,
2269 PackSourceReadiness::StaleReadable => 1,
2270 PackSourceReadiness::IncompleteMetadata => 2,
2271 PackSourceReadiness::Unavailable => 3,
2272 }
2273}
2274
2275fn source_readiness_label(readiness: PackSourceReadiness) -> &'static str {
2276 match readiness {
2277 PackSourceReadiness::Healthy => "healthy",
2278 PackSourceReadiness::StaleReadable => "stale_readable",
2279 PackSourceReadiness::IncompleteMetadata => "incomplete_metadata",
2280 PackSourceReadiness::Unavailable => "unavailable",
2281 }
2282}
2283
2284fn lexical_readiness_label(readiness: PackLexicalReadiness) -> &'static str {
2285 match readiness {
2286 PackLexicalReadiness::Ready => "ready",
2287 PackLexicalReadiness::Stale => "stale",
2288 PackLexicalReadiness::Missing => "missing",
2289 PackLexicalReadiness::Rebuilding => "rebuilding",
2290 PackLexicalReadiness::Unknown => "unknown",
2291 }
2292}
2293
2294fn semantic_readiness_label(readiness: PackSemanticReadiness) -> &'static str {
2295 match readiness {
2296 PackSemanticReadiness::NotReported => "not_reported",
2297 PackSemanticReadiness::Joined => "joined",
2298 PackSemanticReadiness::FallbackLexical => "fallback_lexical",
2299 PackSemanticReadiness::Unavailable => "unavailable",
2300 PackSemanticReadiness::Disabled => "disabled",
2301 }
2302}
2303
2304fn source_sync_gap_kind_label(kind: PackSourceSyncGapKind) -> &'static str {
2305 match kind {
2306 PackSourceSyncGapKind::RemoteStale => "remote_stale",
2307 PackSourceSyncGapKind::SourcePruned => "source_pruned",
2308 PackSourceSyncGapKind::SyncDeferred => "sync_deferred",
2309 PackSourceSyncGapKind::Unknown => "unknown",
2310 }
2311}
2312
2313fn effective_semantic_readiness(request: &PackRenderRequest) -> PackSemanticReadiness {
2314 if !matches!(
2315 request.readiness.semantic_readiness,
2316 PackSemanticReadiness::NotReported
2317 ) {
2318 return request.readiness.semantic_readiness;
2319 }
2320 if request.semantic_joined {
2321 return PackSemanticReadiness::Joined;
2322 }
2323 if request
2324 .fallback_mode
2325 .as_deref()
2326 .is_some_and(|mode| mode.eq_ignore_ascii_case("lexical"))
2327 {
2328 return PackSemanticReadiness::FallbackLexical;
2329 }
2330 PackSemanticReadiness::NotReported
2331}
2332
2333#[cfg(test)]
2334mod tests {
2335 use super::*;
2336
2337 fn candidate(id: &str, source_id: &str, source_path: &str, score: f64) -> PackCandidate {
2338 PackCandidate {
2339 candidate_id: id.to_string(),
2340 source_path: source_path.to_string(),
2341 source_id: source_id.to_string(),
2342 origin_kind: if source_id == "local" {
2343 "local".to_string()
2344 } else {
2345 "ssh".to_string()
2346 },
2347 origin_host: None,
2348 workspace: "/work".to_string(),
2349 workspace_original: None,
2350 agent: "codex".to_string(),
2351 line_start: Some(10),
2352 line_end: Some(12),
2353 conversation_id: None,
2354 message_index: None,
2355 content_hash: format!("{id}_content"),
2356 span_hash: format!("{id}_span"),
2357 created_at_ms: Some(1_000_000),
2358 indexed_at_ms: Some(1_000_000),
2359 match_type: "exact".to_string(),
2360 excerpt: "0123456789abcdef".to_string(),
2361 role: PackEvidenceRole::AssistantConclusion,
2362 lexical_score: Some(score),
2363 semantic_score: None,
2364 hybrid_rank: None,
2365 matched_terms: vec!["pack".to_string()],
2366 matched_phrases: Vec::new(),
2367 query_term_count: 1,
2368 query_phrase_count: 0,
2369 source_readiness: PackSourceReadiness::Healthy,
2370 source_explicitly_requested: false,
2371 }
2372 }
2373
2374 fn request(candidates: Vec<PackCandidate>) -> PackPlanRequest {
2375 PackPlanRequest {
2376 now_ms: 1_000_000,
2377 limits: PackPlannerLimits {
2378 max_tokens: 1_024,
2379 max_sessions: 8,
2380 max_evidence: 24,
2381 context_lines: 3,
2382 max_excerpt_chars: 80,
2383 },
2384 freshness_policy: PackFreshnessPolicy::PreferRecent,
2385 freshness_window_seconds: 60,
2386 candidates,
2387 explain_selection: false,
2388 }
2389 }
2390
2391 fn render_request(format: PackRenderFormat) -> PackRenderRequest {
2392 PackRenderRequest {
2393 query_text: "pack handoff".to_string(),
2394 normalized_query: "pack handoff".to_string(),
2395 generated_at_ms: 1_060_000,
2396 elapsed_ms: 7,
2397 request_id: Some("req-1".to_string()),
2398 format,
2399 limits: PackPlannerLimits {
2400 max_tokens: 1_024,
2401 max_sessions: 8,
2402 max_evidence: 24,
2403 context_lines: 3,
2404 max_excerpt_chars: 80,
2405 },
2406 search_mode: "hybrid".to_string(),
2407 fallback_mode: Some("lexical".to_string()),
2408 semantic_joined: false,
2409 freshness_policy: PackFreshnessPolicy::PreferRecent,
2410 freshness_window_seconds: 60,
2411 redaction_policy: "strict".to_string(),
2412 sensitive_output: false,
2413 skill_content_included: false,
2414 explain_selection: false,
2415 readiness: PackReadinessSnapshot::default(),
2416 }
2417 }
2418
2419 #[test]
2420 fn from_search_hit_uses_robot_match_type_spelling() {
2421 let hit = SearchHit {
2422 title: "session".to_string(),
2423 snippet: "fallback".to_string(),
2424 content: "fallback content".to_string(),
2425 content_hash: 42,
2426 conversation_id: Some(7),
2427 score: 3.5,
2428 source_path: "/s/fallback.jsonl".to_string(),
2429 agent: "codex".to_string(),
2430 workspace: "/work".to_string(),
2431 workspace_original: None,
2432 created_at: Some(1_000_000),
2433 line_number: Some(12),
2434 match_type: MatchType::ImplicitWildcard,
2435 source_id: "local".to_string(),
2436 origin_kind: "local".to_string(),
2437 origin_host: None,
2438 };
2439
2440 let candidate = PackCandidate::from_search_hit(&hit, 1, 0);
2441
2442 assert_eq!(candidate.match_type, "implicit_wildcard");
2443 }
2444
2445 #[test]
2446 fn render_compact_json_base_pack_matches_golden_shape() {
2447 let mut item = candidate("base", "local", "/s/base.jsonl", 10.0);
2448 item.excerpt = "Planner output cites existing evidence.".to_string();
2449 item.match_type = "implicit_wildcard".to_string();
2450 let plan = plan_answer_pack(request(vec![item])).unwrap();
2451 let req = render_request(PackRenderFormat::CompactJson);
2452
2453 let rendered = render_answer_pack(&plan, &req).unwrap();
2454 let value: serde_json::Value = serde_json::from_str(&rendered).unwrap();
2455
2456 assert!(!rendered.contains('\n'));
2457 assert_eq!(value, render_answer_pack_value(&plan, &req).unwrap());
2458 assert_eq!(value["schema_version"], "cass.pack.v1");
2459 assert_eq!(value["_meta"]["format"], "compact");
2460 assert_eq!(value["query"]["text"], "pack handoff");
2461 assert_eq!(value["realized"]["fallback_mode"], "lexical");
2462 assert_eq!(
2463 value["evidence"][0]["citation"]["source_path"],
2464 "/s/base.jsonl"
2465 );
2466 assert_eq!(
2467 value["evidence"][0]["citation"]["match_type"],
2468 "implicit_wildcard"
2469 );
2470 assert_eq!(
2471 value["pack"]["handoff"][0]["evidence_ids"][0],
2472 value["evidence"][0]["id"]
2473 );
2474 }
2475
2476 #[test]
2477 fn render_jsonl_empty_pack_matches_golden_line_order() {
2478 let plan = plan_answer_pack(request(Vec::new())).unwrap();
2479 let req = render_request(PackRenderFormat::Jsonl);
2480
2481 let rendered = render_answer_pack(&plan, &req).unwrap();
2482 let lines: Vec<_> = rendered.lines().collect();
2483
2484 assert_eq!(lines.len(), 4);
2485 assert!(lines[0].starts_with("{\"_meta\":"));
2486 assert!(lines[1].starts_with("{\"pack\":"));
2487 assert!(lines[2].starts_with("{\"omitted\":"));
2488 assert!(lines[3].starts_with("{\"privacy\":"));
2489 let meta: serde_json::Value = serde_json::from_str(lines[0]).unwrap();
2490 let omitted: serde_json::Value = serde_json::from_str(lines[2]).unwrap();
2491 assert_eq!(
2492 meta["_meta"]["warnings"],
2493 serde_json::json!(["no_evidence_found", "semantic_fallback_lexical"])
2494 );
2495 assert_eq!(omitted["omitted"]["count"], 0);
2496 }
2497
2498 #[test]
2499 fn render_markdown_duplicate_omission_matches_golden_text() {
2500 let first = candidate("a", "local", "/s/a.jsonl", 10.0);
2501 let mut duplicate = candidate("b", "local", "/s/b.jsonl", 9.0);
2502 duplicate.content_hash = first.content_hash.clone();
2503 let plan = plan_answer_pack(request(vec![first, duplicate])).unwrap();
2504 let req = render_request(PackRenderFormat::Markdown);
2505 let evidence_id = &plan.evidence[0].id;
2506
2507 let rendered = render_answer_pack(&plan, &req).unwrap();
2508
2509 assert_eq!(
2510 rendered,
2511 format!(
2512 "# pack handoff\n\n\
2513 ## Warnings\n\
2514 - semantic_fallback_lexical\n\n\
2515 ## Handoff\n\
2516 - 0123456789abcdef [{evidence_id}]\n\n\
2517 ## Evidence\n\
2518 [{evidence_id}] codex local /s/a.jsonl:10-12\n\n\
2519 ## Omitted\n\
2520 - duplicate_content: /s/b.jsonl:10\n"
2521 )
2522 );
2523 }
2524
2525 #[test]
2526 fn render_stale_source_pack_marks_health_and_freshness() {
2527 let mut stale = candidate("stale", "remote", "/s/stale.jsonl", 10.0);
2528 stale.source_readiness = PackSourceReadiness::StaleReadable;
2529 let plan = plan_answer_pack(request(vec![stale])).unwrap();
2530 let req = render_request(PackRenderFormat::Json);
2531
2532 let value = render_answer_pack_value(&plan, &req).unwrap();
2533
2534 assert_eq!(value["health"]["healthy"], false);
2535 assert_eq!(
2536 value["health"]["recommended_action"],
2537 "inspect cass health --json and source sync status"
2538 );
2539 assert_eq!(
2540 value["health"]["source_readiness"][0]["readiness"],
2541 "stale_readable"
2542 );
2543 assert_eq!(value["freshness"]["stale_evidence_count"], 0);
2544 assert_eq!(value["pack"]["source_summary"][0]["healthy"], false);
2545 }
2546
2547 #[test]
2548 fn render_stale_evidence_count_uses_age_not_source_readiness() {
2549 let mut old_healthy = candidate("old-healthy", "local", "/s/old.jsonl", 10.0);
2550 old_healthy.created_at_ms = Some(999_999);
2551 let mut recent_stale_source =
2552 candidate("recent-stale-source", "remote", "/s/recent.jsonl", 9.0);
2553 recent_stale_source.created_at_ms = Some(1_060_000);
2554 recent_stale_source.source_readiness = PackSourceReadiness::StaleReadable;
2555
2556 let plan = plan_answer_pack(request(vec![old_healthy, recent_stale_source])).unwrap();
2557 let req = render_request(PackRenderFormat::Json);
2558
2559 let value = render_answer_pack_value(&plan, &req).unwrap();
2560
2561 assert_eq!(value["freshness"]["window_seconds"], 60);
2562 assert_eq!(value["freshness"]["stale_evidence_count"], 1);
2563 assert_eq!(value["health"]["healthy"], false);
2564 assert!(
2565 value["health"]["source_readiness"]
2566 .as_array()
2567 .unwrap()
2568 .iter()
2569 .any(|source| source["readiness"] == "stale_readable")
2570 );
2571 }
2572
2573 #[test]
2574 fn render_healthy_readiness_reports_generation_and_ready_states() {
2575 let plan = plan_answer_pack(request(vec![candidate(
2576 "healthy",
2577 "local",
2578 "/s/healthy.jsonl",
2579 10.0,
2580 )]))
2581 .unwrap();
2582 let mut req = render_request(PackRenderFormat::Json);
2583 req.fallback_mode = None;
2584 req.semantic_joined = true;
2585 req.readiness.index_generation = Some("lexical-generation-42".to_string());
2586 req.readiness.semantic_readiness = PackSemanticReadiness::Joined;
2587
2588 let value = render_answer_pack_value(&plan, &req).unwrap();
2589
2590 assert_eq!(value["health"]["healthy"], true);
2591 assert_eq!(value["health"]["index_state"], "ready");
2592 assert_eq!(value["health"]["index_generation"], "lexical-generation-42");
2593 assert_eq!(value["health"]["lexical_readiness"], "ready");
2594 assert_eq!(value["health"]["semantic_state"], "joined");
2595 assert_eq!(
2596 value["health"]["recommended_action"],
2597 serde_json::Value::Null
2598 );
2599 assert_eq!(value["warnings"], serde_json::json!([]));
2600 }
2601
2602 #[test]
2603 fn render_stale_lexical_readiness_reports_action() {
2604 let plan = plan_answer_pack(request(vec![candidate(
2605 "stale-index",
2606 "local",
2607 "/s/stale-index.jsonl",
2608 10.0,
2609 )]))
2610 .unwrap();
2611 let mut req = render_request(PackRenderFormat::Json);
2612 req.readiness.lexical_readiness = PackLexicalReadiness::Stale;
2613
2614 let value = render_answer_pack_value(&plan, &req).unwrap();
2615
2616 assert_eq!(value["health"]["healthy"], false);
2617 assert_eq!(value["health"]["index_state"], "stale");
2618 assert_eq!(value["health"]["lexical_readiness"], "stale");
2619 assert_eq!(
2620 value["health"]["recommended_action"],
2621 "refresh lexical index with cass index --full"
2622 );
2623 assert_eq!(value["warnings"][0], "lexical_index_stale");
2624 }
2625
2626 #[test]
2627 fn render_semantic_unavailable_keeps_lexical_health_truthful() {
2628 let plan = plan_answer_pack(request(vec![candidate(
2629 "semantic",
2630 "local",
2631 "/s/semantic.jsonl",
2632 10.0,
2633 )]))
2634 .unwrap();
2635 let mut req = render_request(PackRenderFormat::Json);
2636 req.readiness.semantic_readiness = PackSemanticReadiness::Unavailable;
2637
2638 let value = render_answer_pack_value(&plan, &req).unwrap();
2639
2640 assert_eq!(value["health"]["healthy"], true);
2641 assert_eq!(value["health"]["semantic_state"], "unavailable");
2642 assert_eq!(
2643 value["health"]["recommended_action"],
2644 "continue with lexical evidence or install semantic model explicitly"
2645 );
2646 assert_eq!(
2647 value["warnings"],
2648 serde_json::json!(["semantic_unavailable_lexical_fallback"])
2649 );
2650 }
2651
2652 #[test]
2653 fn render_active_rebuild_marks_pack_not_fresh() {
2654 let plan = plan_answer_pack(request(vec![candidate(
2655 "rebuild",
2656 "local",
2657 "/s/rebuild.jsonl",
2658 10.0,
2659 )]))
2660 .unwrap();
2661 let mut req = render_request(PackRenderFormat::Json);
2662 req.readiness.active_rebuild = true;
2663 req.readiness.lock_state = Some("writer-lock".to_string());
2664
2665 let value = render_answer_pack_value(&plan, &req).unwrap();
2666
2667 assert_eq!(value["health"]["healthy"], false);
2668 assert_eq!(value["health"]["active_rebuild"], true);
2669 assert_eq!(value["health"]["lock_state"], "writer-lock");
2670 assert_eq!(
2671 value["health"]["recommended_action"],
2672 "wait for active rebuild or inspect cass status --json"
2673 );
2674 assert!(
2675 value["warnings"]
2676 .as_array()
2677 .unwrap()
2678 .iter()
2679 .any(|warning| warning == "active_rebuild")
2680 );
2681 }
2682
2683 #[test]
2684 fn render_missing_db_reports_reindex_action() {
2685 let plan = plan_answer_pack(request(Vec::new())).unwrap();
2686 let mut req = render_request(PackRenderFormat::Json);
2687 req.readiness.missing_database = true;
2688 req.readiness.lexical_readiness = PackLexicalReadiness::Missing;
2689
2690 let value = render_answer_pack_value(&plan, &req).unwrap();
2691
2692 assert_eq!(value["health"]["healthy"], false);
2693 assert_eq!(value["health"]["missing_database"], true);
2694 assert_eq!(value["health"]["index_state"], "missing");
2695 assert_eq!(
2696 value["health"]["recommended_action"],
2697 "run cass index --full"
2698 );
2699 assert!(
2700 value["warnings"]
2701 .as_array()
2702 .unwrap()
2703 .iter()
2704 .any(|warning| warning == "missing_database")
2705 );
2706 }
2707
2708 #[test]
2709 fn render_source_sync_gap_and_pruned_source_metadata() {
2710 let plan = plan_answer_pack(request(vec![candidate(
2711 "remote-gap",
2712 "remote",
2713 "/s/remote-gap.jsonl",
2714 10.0,
2715 )]))
2716 .unwrap();
2717 let mut req = render_request(PackRenderFormat::Json);
2718 req.readiness.source_sync_gaps = vec![
2719 PackSourceSyncGap {
2720 source_id: "remote".to_string(),
2721 origin_kind: "ssh".to_string(),
2722 kind: PackSourceSyncGapKind::RemoteStale,
2723 lag_seconds: Some(3_600),
2724 last_synced_at_ms: Some(1_000_000),
2725 recommended_action: Some("run cass sources sync --json".to_string()),
2726 },
2727 PackSourceSyncGap {
2728 source_id: "old-laptop".to_string(),
2729 origin_kind: "ssh".to_string(),
2730 kind: PackSourceSyncGapKind::SourcePruned,
2731 lag_seconds: None,
2732 last_synced_at_ms: None,
2733 recommended_action: Some("remove or refresh pruned source".to_string()),
2734 },
2735 ];
2736
2737 let value = render_answer_pack_value(&plan, &req).unwrap();
2738
2739 assert_eq!(value["health"]["healthy"], false);
2740 assert_eq!(
2741 value["health"]["source_sync_gaps"][0]["kind"],
2742 "remote_stale"
2743 );
2744 assert_eq!(
2745 value["health"]["source_sync_gaps"][1]["kind"],
2746 "source_pruned"
2747 );
2748 assert_eq!(
2749 value["health"]["recommended_action"],
2750 "inspect cass sources sync --json and source status"
2751 );
2752 assert!(
2753 value["warnings"]
2754 .as_array()
2755 .unwrap()
2756 .iter()
2757 .any(|warning| warning == "source_sync_gap:[REDACTED_SOURCE]:source_pruned")
2758 );
2759 assert_eq!(
2760 value["health"]["source_sync_gaps"][1]["source_id"],
2761 REDACTED_SOURCE_MARKER
2762 );
2763 }
2764
2765 #[test]
2766 fn render_pack_redacts_freeform_query_and_health_strings() {
2767 let home_path = "/home/alice/projects/private";
2768 let host = "alice-workstation.internal";
2769 let token = format!("sk-{}", "abcdefghijklmnopqrstuv");
2770 let plan = plan_answer_pack(request(vec![candidate(
2771 "health-redaction",
2772 "local",
2773 "/s/health.jsonl",
2774 10.0,
2775 )]))
2776 .unwrap();
2777 let mut req = render_request(PackRenderFormat::Json);
2778 req.query_text = format!("investigate {token} at {home_path}");
2779 req.normalized_query = req.query_text.clone();
2780 req.readiness.lexical_readiness = PackLexicalReadiness::Stale;
2781 req.readiness.index_generation = Some(format!("generation from {home_path}"));
2782 req.readiness.lock_state = Some(format!("writer lock held by {host}"));
2783 req.readiness.recommended_action = Some(format!("inspect {home_path} on {host}"));
2784 req.readiness.source_sync_gaps = vec![PackSourceSyncGap {
2785 source_id: host.to_string(),
2786 origin_kind: "ssh".to_string(),
2787 kind: PackSourceSyncGapKind::RemoteStale,
2788 lag_seconds: Some(60),
2789 last_synced_at_ms: Some(1_000_000),
2790 recommended_action: Some(format!("sync {home_path} from {host}")),
2791 }];
2792
2793 let rendered = render_answer_pack(&plan, &req).unwrap();
2794 let value: serde_json::Value = serde_json::from_str(&rendered).unwrap();
2795
2796 for raw in [&token, home_path, host] {
2797 assert!(!rendered.contains(raw));
2798 }
2799 assert_eq!(
2800 value["query"]["text"],
2801 "investigate [REDACTED] at [REDACTED_PATH]/private"
2802 );
2803 assert_eq!(value["pack"]["title"], value["query"]["normalized"]);
2804 assert_eq!(
2805 value["health"]["source_sync_gaps"][0]["source_id"],
2806 REDACTED_SOURCE_MARKER
2807 );
2808 assert!(
2809 value["health"]["recommended_action"]
2810 .as_str()
2811 .unwrap()
2812 .contains("[REDACTED_PATH]/private")
2813 );
2814 assert!(
2815 value["warnings"]
2816 .as_array()
2817 .unwrap()
2818 .iter()
2819 .any(|warning| warning == "privacy_redactions_applied")
2820 );
2821 assert!(
2822 value["privacy"]["redaction_counts"]["private_path"]
2823 .as_u64()
2824 .unwrap()
2825 >= 3
2826 );
2827 }
2828
2829 #[test]
2830 fn render_redacted_empty_pack_reports_privacy_counts() {
2831 let mut redacted = candidate("redacted", "local", "/s/redacted.jsonl", 9.0);
2832 redacted.excerpt = " \n\t ".to_string();
2833 let plan = plan_answer_pack(request(vec![redacted])).unwrap();
2834 let req = render_request(PackRenderFormat::Json);
2835
2836 let value = render_answer_pack_value(&plan, &req).unwrap();
2837
2838 assert_eq!(value["privacy"]["redaction_applied"], true);
2839 assert_eq!(value["privacy"]["redaction_counts"]["redacted_to_empty"], 1);
2840 assert_eq!(value["omitted"]["items"][0]["reason"], "redacted_to_empty");
2841 assert_eq!(
2842 value["warnings"],
2843 serde_json::json!([
2844 "no_evidence_found",
2845 "semantic_fallback_lexical",
2846 "privacy_redactions_applied"
2847 ])
2848 );
2849 }
2850
2851 #[test]
2852 fn render_pack_redacts_api_keys_and_bearer_tokens_in_json_and_markdown() {
2853 let api_key = format!("sk-{}", "12345678901234567890");
2854 let bearer = "Bearer abcdefghijklmnopqrst";
2855 let mut secret = candidate("secret", "local", "/s/secret.jsonl", 10.0);
2856 secret.excerpt = format!("Use {api_key} and Authorization: {bearer}.");
2857 let plan = plan_answer_pack(request(vec![secret])).unwrap();
2858 let json_req = render_request(PackRenderFormat::Json);
2859 let markdown_req = render_request(PackRenderFormat::Markdown);
2860
2861 let json_rendered = render_answer_pack(&plan, &json_req).unwrap();
2862 let markdown_rendered = render_answer_pack(&plan, &markdown_req).unwrap();
2863 let value: serde_json::Value = serde_json::from_str(&json_rendered).unwrap();
2864
2865 assert!(!json_rendered.contains(&api_key));
2866 assert!(!json_rendered.contains(bearer));
2867 assert!(!markdown_rendered.contains(&api_key));
2868 assert!(!markdown_rendered.contains(bearer));
2869 assert!(json_rendered.contains(REDACTED_VALUE_MARKER));
2870 assert!(markdown_rendered.contains(REDACTED_VALUE_MARKER));
2871 assert_eq!(value["privacy"]["redaction_applied"], true);
2872 assert_eq!(value["privacy"]["redaction_counts"]["secret"], 1);
2873 assert_eq!(value["evidence"][0]["redactions"][0]["kind"], "secret");
2874 }
2875
2876 #[test]
2877 fn render_pack_redacts_home_directory_paths_in_evidence_and_omitted_output() {
2878 let source_path = "/home/alice/projects/private/session.jsonl";
2879 let duplicate_path = "/Users/alice/projects/private/duplicate.jsonl";
2880 let mut first = candidate("private-path", "local", source_path, 10.0);
2881 first.workspace = "/home/alice/projects/private".to_string();
2882 first.workspace_original = Some("/home/alice/projects/private".to_string());
2883 first.excerpt = format!("Open {source_path} before reading ~/notes/private.md");
2884 let mut duplicate = candidate("duplicate-private-path", "local", duplicate_path, 9.0);
2885 duplicate.candidate_id = format!("old-laptop:{duplicate_path}:10");
2886 duplicate.content_hash = first.content_hash.clone();
2887 let plan = plan_answer_pack(request(vec![first, duplicate])).unwrap();
2888 let json_req = render_request(PackRenderFormat::Json);
2889 let markdown_req = render_request(PackRenderFormat::Markdown);
2890
2891 let json_rendered = render_answer_pack(&plan, &json_req).unwrap();
2892 let markdown_rendered = render_answer_pack(&plan, &markdown_req).unwrap();
2893 let value: serde_json::Value = serde_json::from_str(&json_rendered).unwrap();
2894
2895 for raw in ["/home/alice", "/Users/alice", "~/notes", "old-laptop"] {
2896 assert!(!json_rendered.contains(raw));
2897 assert!(!markdown_rendered.contains(raw));
2898 }
2899 assert_eq!(
2900 value["evidence"][0]["citation"]["source_path"],
2901 "[REDACTED_PATH]/session.jsonl"
2902 );
2903 assert_eq!(
2904 value["omitted"]["items"][0]["source_path"],
2905 "[REDACTED_PATH]/duplicate.jsonl"
2906 );
2907 assert!(
2908 value["omitted"]["items"][0]["candidate_id"]
2909 .as_str()
2910 .unwrap()
2911 .starts_with("omitted_")
2912 );
2913 assert!(markdown_rendered.contains("[REDACTED_PATH]/session.jsonl"));
2914 assert!(markdown_rendered.contains("[REDACTED_PATH]/duplicate.jsonl"));
2915 assert_eq!(value["privacy"]["redaction_applied"], true);
2916 assert!(
2917 value["privacy"]["redaction_counts"]["private_path"]
2918 .as_u64()
2919 .unwrap()
2920 >= 2
2921 );
2922 assert_eq!(value["privacy"]["redaction_counts"]["candidate_id"], 1);
2923 }
2924
2925 #[test]
2926 fn render_pack_redacts_remote_host_details_from_citation_contract() {
2927 let mut remote = candidate(
2928 "remote-host",
2929 "alice-workstation.internal",
2930 "/s/remote.jsonl",
2931 10.0,
2932 );
2933 remote.origin_kind = "ssh".to_string();
2934 remote.origin_host = Some("alice@workstation.internal".to_string());
2935 let plan = plan_answer_pack(request(vec![remote])).unwrap();
2936 let req = render_request(PackRenderFormat::Json);
2937
2938 let rendered = render_answer_pack(&plan, &req).unwrap();
2939 let value: serde_json::Value = serde_json::from_str(&rendered).unwrap();
2940
2941 assert!(!rendered.contains("alice-workstation.internal"));
2942 assert!(!rendered.contains("alice@workstation.internal"));
2943 assert_eq!(
2944 value["evidence"][0]["citation"]["source_id"],
2945 REDACTED_SOURCE_MARKER
2946 );
2947 assert_eq!(
2948 value["evidence"][0]["citation"]["origin_host"],
2949 REDACTED_REMOTE_HOST_MARKER
2950 );
2951 assert_eq!(value["privacy"]["redaction_counts"]["remote_host"], 2);
2952 }
2953
2954 #[test]
2955 fn render_pack_redacts_encrypted_payload_material() {
2956 let payload = "encrypted_payload_material=abcdef0123456789abcdef0123456789";
2957 let mut encrypted = candidate("encrypted", "local", "/s/chatgpt.jsonl", 10.0);
2958 encrypted.agent = "chatgpt".to_string();
2959 encrypted.excerpt = format!("Skipped encrypted ChatGPT block: {payload}.");
2960 let plan = plan_answer_pack(request(vec![encrypted])).unwrap();
2961 let json_req = render_request(PackRenderFormat::Json);
2962 let markdown_req = render_request(PackRenderFormat::Markdown);
2963
2964 let json_rendered = render_answer_pack(&plan, &json_req).unwrap();
2965 let markdown_rendered = render_answer_pack(&plan, &markdown_req).unwrap();
2966 let value: serde_json::Value = serde_json::from_str(&json_rendered).unwrap();
2967
2968 assert!(!json_rendered.contains(payload));
2969 assert!(!markdown_rendered.contains(payload));
2970 assert!(json_rendered.contains(REDACTED_ENCRYPTED_PAYLOAD_MARKER));
2971 assert!(markdown_rendered.contains(REDACTED_ENCRYPTED_PAYLOAD_MARKER));
2972 assert_eq!(value["privacy"]["redaction_counts"]["encrypted_payload"], 1);
2973 assert_eq!(
2974 value["evidence"][0]["redactions"][0]["replacement"],
2975 REDACTED_ENCRYPTED_PAYLOAD_MARKER
2976 );
2977 }
2978
2979 #[test]
2980 fn render_toon_matches_existing_toon_encoder() {
2981 let plan = plan_answer_pack(request(vec![candidate(
2982 "toon",
2983 "local",
2984 "/s/toon.jsonl",
2985 10.0,
2986 )]))
2987 .unwrap();
2988 let req = render_request(PackRenderFormat::Toon);
2989 let value = render_answer_pack_value(&plan, &req).unwrap();
2990
2991 let rendered = render_answer_pack(&plan, &req).unwrap();
2992
2993 assert_eq!(
2994 rendered,
2995 toon::encode(value, Some(pack_toon_encode_options()))
2996 );
2997 }
2998
2999 #[test]
3000 fn empty_corpus_returns_empty_plan() {
3001 let plan = plan_answer_pack(request(Vec::new())).unwrap();
3002
3003 assert_eq!(plan.candidate_count, 0);
3004 assert_eq!(plan.selected_evidence_count, 0);
3005 assert_eq!(plan.diagnostics.candidate_fetch_limit, 192);
3006 assert!(plan.evidence.is_empty());
3007 assert!(plan.omitted.is_empty());
3008 }
3009
3010 #[test]
3011 fn candidate_fetch_limit_matches_contract_formula() {
3012 let mut limits = PackPlannerLimits {
3013 max_tokens: 12_000,
3014 max_sessions: 2,
3015 max_evidence: 3,
3016 context_lines: 3,
3017 max_excerpt_chars: 1_600,
3018 };
3019
3020 assert_eq!(pack_candidate_fetch_limit(&limits).unwrap(), 64);
3021
3022 limits.max_sessions = 20;
3023 assert_eq!(pack_candidate_fetch_limit(&limits).unwrap(), 320);
3024
3025 limits.max_sessions = 64;
3026 limits.max_evidence = 256;
3027 assert_eq!(
3028 pack_candidate_fetch_limit(&limits).unwrap(),
3029 PACK_CANDIDATE_LIMIT_CAP
3030 );
3031 }
3032
3033 #[test]
3034 fn token_budget_reserves_documented_sections() {
3035 let budget = pack_planner_budget(&PackPlannerLimits {
3036 max_tokens: 12_000,
3037 max_sessions: 8,
3038 max_evidence: 24,
3039 context_lines: 3,
3040 max_excerpt_chars: 1_600,
3041 })
3042 .unwrap();
3043
3044 assert_eq!(budget.metadata_tokens, 1_800);
3045 assert_eq!(budget.outline_tokens, 1_800);
3046 assert_eq!(budget.evidence_tokens, 7_200);
3047 assert_eq!(budget.omitted_tokens, 1_200);
3048 assert_eq!(budget.max_output_tokens_with_overflow, 12_600);
3049 assert_eq!(
3050 budget.metadata_tokens
3051 + budget.outline_tokens
3052 + budget.evidence_tokens
3053 + budget.omitted_tokens,
3054 budget.max_tokens
3055 );
3056 }
3057
3058 #[test]
3059 fn duplicate_content_is_omitted_after_first_selection() {
3060 let first = candidate("a", "local", "/s/a.jsonl", 10.0);
3061 let mut duplicate = candidate("b", "local", "/s/b.jsonl", 9.0);
3062 duplicate.content_hash = first.content_hash.clone();
3063
3064 let plan = plan_answer_pack(request(vec![first, duplicate])).unwrap();
3065
3066 assert_eq!(plan.selected_evidence_count, 1);
3067 assert_eq!(plan.omitted.len(), 1);
3068 assert_eq!(plan.omitted[0].reason, PackOmittedReason::DuplicateContent);
3069 }
3070
3071 #[test]
3072 fn duplicate_span_and_overlapping_ranges_are_omitted_once() {
3073 let span_source = candidate("span-source", "local", "/s/span.jsonl", 10.0);
3074 let mut span_duplicate = candidate("span-dup", "remote", "/s/other.jsonl", 9.0);
3075 span_duplicate.span_hash = span_source.span_hash.clone();
3076
3077 let range_source = candidate("range-source", "local", "/s/range.jsonl", 8.0);
3078 let mut range_duplicate = candidate("range-dup", "remote", "/s/range.jsonl", 7.0);
3079 range_duplicate.line_start = Some(11);
3080 range_duplicate.line_end = Some(14);
3081
3082 let plan = plan_answer_pack(request(vec![
3083 span_source,
3084 span_duplicate,
3085 range_source,
3086 range_duplicate,
3087 ]))
3088 .unwrap();
3089
3090 let omitted_ids: Vec<_> = plan
3091 .omitted
3092 .iter()
3093 .map(|omitted| (omitted.candidate_id.as_str(), omitted.reason))
3094 .collect();
3095 assert_eq!(
3096 omitted_ids,
3097 vec![
3098 ("span-dup", PackOmittedReason::DuplicateContent),
3099 ("range-dup", PackOmittedReason::DuplicateContent),
3100 ]
3101 );
3102 }
3103
3104 #[test]
3105 fn unavailable_and_redacted_empty_candidates_are_omitted_once() {
3106 let mut unavailable = candidate("unavailable", "remote", "/s/down.jsonl", 10.0);
3107 unavailable.source_readiness = PackSourceReadiness::Unavailable;
3108 let mut redacted = candidate("redacted", "local", "/s/redacted.jsonl", 9.0);
3109 redacted.excerpt = " \n\t ".to_string();
3110
3111 let plan = plan_answer_pack(request(vec![unavailable, redacted])).unwrap();
3112
3113 assert!(plan.evidence.is_empty());
3114 let omitted_reasons: Vec<_> = plan
3115 .omitted
3116 .iter()
3117 .map(|omitted| (omitted.candidate_id.as_str(), omitted.reason))
3118 .collect();
3119 assert_eq!(
3120 omitted_reasons,
3121 vec![
3122 ("unavailable", PackOmittedReason::SourceUnavailable),
3123 ("redacted", PackOmittedReason::RedactedToEmpty),
3124 ]
3125 );
3126 }
3127
3128 #[test]
3129 fn exact_token_budget_boundary_selects_until_budget_exhausted() {
3130 let mut first = candidate("a", "local", "/s/a.jsonl", 10.0);
3131 first.excerpt = "12345678".to_string();
3132 let mut second = candidate("b", "remote", "/s/b.jsonl", 9.0);
3133 second.excerpt = "abcdefgh".to_string();
3134
3135 let mut req = request(vec![first, second]);
3136 req.limits.max_tokens = 1_024;
3137 req.limits.max_excerpt_chars = 4_096;
3138 let evidence_budget = pack_planner_budget(&req.limits).unwrap().evidence_tokens;
3139 req.candidates[0].excerpt = "x".repeat(evidence_budget * TOKEN_ESTIMATE_CHARS_PER_TOKEN);
3140 req.candidates[1].excerpt = "y".repeat(4);
3141
3142 let plan = plan_answer_pack(req).unwrap();
3143
3144 assert_eq!(plan.selected_evidence_count, 1);
3145 assert_eq!(plan.estimated_tokens, evidence_budget);
3146 assert_eq!(
3147 plan.omitted[0].reason,
3148 PackOmittedReason::TokenBudgetExhausted
3149 );
3150 }
3151
3152 #[test]
3153 fn oversized_high_score_candidate_can_be_skipped_for_budget_fit() {
3154 let mut oversized = candidate("oversized", "local", "/s/oversized.jsonl", 10.0);
3155 let mut fitting = candidate("fit", "remote", "/s/fit.jsonl", 9.0);
3156
3157 let mut req = request(vec![oversized.clone(), fitting.clone()]);
3158 req.limits.max_excerpt_chars = 8_000;
3159 let evidence_budget = pack_planner_budget(&req.limits).unwrap().evidence_tokens;
3160 oversized.excerpt = "x".repeat((evidence_budget + 1) * TOKEN_ESTIMATE_CHARS_PER_TOKEN);
3161 fitting.excerpt = "y".repeat(TOKEN_ESTIMATE_CHARS_PER_TOKEN);
3162 req.candidates = vec![oversized, fitting];
3163
3164 let plan = plan_answer_pack(req).unwrap();
3165
3166 assert_eq!(plan.evidence[0].candidate.candidate_id, "fit");
3167 assert_eq!(plan.omitted.len(), 1);
3168 assert_eq!(
3169 plan.omitted[0].reason,
3170 PackOmittedReason::TokenBudgetExhausted
3171 );
3172 }
3173
3174 #[test]
3175 fn source_diversity_changes_second_pick() {
3176 let first = candidate("a", "local", "/s/a.jsonl", 10.0);
3177 let same_source = candidate("b", "local", "/s/b.jsonl", 9.9);
3178 let different_source = candidate("c", "remote", "/s/c.jsonl", 9.9);
3179
3180 let mut req = request(vec![first, same_source, different_source]);
3181 req.limits.max_evidence = 2;
3182
3183 let plan = plan_answer_pack(req).unwrap();
3184
3185 assert_eq!(plan.evidence[0].candidate.candidate_id, "a");
3186 assert_eq!(plan.evidence[1].candidate.candidate_id, "c");
3187 }
3188
3189 #[test]
3190 fn session_cap_omits_new_sessions_but_allows_existing_session_evidence() {
3191 let first = candidate("a", "local", "/s/a.jsonl", 10.0);
3192 let mut same_session = candidate("b", "local", "/s/a.jsonl", 9.0);
3193 same_session.line_start = Some(20);
3194 same_session.line_end = Some(22);
3195 let new_session = candidate("c", "remote", "/s/c.jsonl", 8.0);
3196
3197 let mut req = request(vec![first, same_session, new_session]);
3198 req.limits.max_sessions = 1;
3199 req.limits.max_evidence = 3;
3200
3201 let plan = plan_answer_pack(req).unwrap();
3202
3203 let selected_ids: Vec<_> = plan
3204 .evidence
3205 .iter()
3206 .map(|evidence| evidence.candidate.candidate_id.as_str())
3207 .collect();
3208 assert_eq!(selected_ids, vec!["a", "b"]);
3209 assert_eq!(plan.omitted.len(), 1);
3210 assert_eq!(plan.omitted[0].candidate_id, "c");
3211 assert_eq!(
3212 plan.omitted[0].reason,
3213 PackOmittedReason::MaxSessionsReached
3214 );
3215 }
3216
3217 #[test]
3218 fn evidence_cap_omits_remaining_candidates_once() {
3219 let first = candidate("a", "local", "/s/a.jsonl", 10.0);
3220 let second = candidate("b", "remote", "/s/b.jsonl", 9.0);
3221 let third = candidate("c", "remote", "/s/c.jsonl", 8.0);
3222
3223 let mut req = request(vec![first, second, third]);
3224 req.limits.max_evidence = 1;
3225
3226 let plan = plan_answer_pack(req).unwrap();
3227
3228 assert_eq!(plan.evidence.len(), 1);
3229 assert_eq!(plan.evidence[0].candidate.candidate_id, "a");
3230 let omitted_reasons: Vec<_> = plan
3231 .omitted
3232 .iter()
3233 .map(|omitted| (omitted.candidate_id.as_str(), omitted.reason))
3234 .collect();
3235 assert_eq!(
3236 omitted_reasons,
3237 vec![
3238 ("b", PackOmittedReason::MaxEvidenceReached),
3239 ("c", PackOmittedReason::MaxEvidenceReached),
3240 ]
3241 );
3242 }
3243
3244 #[test]
3245 fn strict_freshness_omits_stale_or_unknown_timestamps() {
3246 let mut stale = candidate("old", "local", "/s/old.jsonl", 10.0);
3247 stale.created_at_ms = Some(0);
3248 let mut unknown = candidate("unknown", "remote", "/s/unknown.jsonl", 9.0);
3249 unknown.created_at_ms = None;
3250
3251 let mut req = request(vec![stale, unknown]);
3252 req.freshness_policy = PackFreshnessPolicy::Strict;
3253 req.freshness_window_seconds = 60;
3254
3255 let plan = plan_answer_pack(req).unwrap();
3256
3257 assert!(plan.evidence.is_empty());
3258 assert_eq!(plan.omitted.len(), 2);
3259 assert!(
3260 plan.omitted
3261 .iter()
3262 .all(|omitted| omitted.reason == PackOmittedReason::StaleUnderStrictPolicy)
3263 );
3264 }
3265
3266 #[test]
3267 fn null_timestamps_sort_last_when_scores_tie() {
3268 let mut unknown = candidate("unknown", "a", "/a.jsonl", 1.0);
3269 unknown.created_at_ms = None;
3270 let mut timestamped = candidate("timestamped", "z", "/z.jsonl", 1.0);
3271 timestamped.created_at_ms = Some(1_000_000);
3272
3273 let mut req = request(vec![unknown, timestamped]);
3274 req.freshness_policy = PackFreshnessPolicy::AllowStale;
3275
3276 let plan = plan_answer_pack(req).unwrap();
3277
3278 assert_eq!(plan.evidence[0].candidate.candidate_id, "timestamped");
3279 }
3280
3281 #[test]
3282 fn freshness_policy_scores_unknown_timestamps_explicitly() {
3283 let mut unknown = candidate("unknown", "local", "/s/unknown.jsonl", 1.0);
3284 unknown.created_at_ms = None;
3285 let mut req = request(vec![unknown.clone()]);
3286
3287 req.freshness_policy = PackFreshnessPolicy::PreferRecent;
3288 assert_eq!(freshness_score(&unknown, &req), 0.25);
3289
3290 req.freshness_policy = PackFreshnessPolicy::AllowStale;
3291 assert_eq!(freshness_score(&unknown, &req), 1.0);
3292
3293 req.freshness_policy = PackFreshnessPolicy::Strict;
3294 assert_eq!(freshness_score(&unknown, &req), 0.0);
3295 }
3296
3297 #[test]
3298 fn lexical_score_drives_relevance_when_semantic_is_absent() {
3299 let plan =
3300 plan_answer_pack(request(vec![candidate("a", "local", "/s/a.jsonl", 7.0)])).unwrap();
3301
3302 assert_eq!(plan.selected_evidence_count, 1);
3303 assert!(plan.evidence[0].selection.relevance_score > 0.0);
3304 }
3305
3306 #[test]
3307 fn stable_tie_breaks_do_not_depend_on_input_order() {
3308 let mut later_path = candidate("z", "remote", "/z.jsonl", 1.0);
3309 later_path.line_start = Some(50);
3310 let mut earlier_path = candidate("a", "local", "/a.jsonl", 1.0);
3311 earlier_path.line_start = Some(50);
3312
3313 let left =
3314 plan_answer_pack(request(vec![later_path.clone(), earlier_path.clone()])).unwrap();
3315 let right = plan_answer_pack(request(vec![earlier_path, later_path])).unwrap();
3316
3317 assert_eq!(left.evidence[0].candidate.source_path, "/a.jsonl");
3318 assert_eq!(right.evidence[0].candidate.source_path, "/a.jsonl");
3319 }
3320
3321 #[test]
3322 fn stable_ordering_keeps_cursor_like_page_boundaries() {
3323 let candidates = vec![
3324 candidate("e", "remote", "/e.jsonl", 1.0),
3325 candidate("b", "remote", "/b.jsonl", 1.0),
3326 candidate("d", "remote", "/d.jsonl", 1.0),
3327 candidate("a", "remote", "/a.jsonl", 1.0),
3328 candidate("c", "remote", "/c.jsonl", 1.0),
3329 ];
3330 let mut reversed = candidates.clone();
3331 reversed.reverse();
3332
3333 let left = plan_answer_pack(request(candidates)).unwrap();
3334 let right = plan_answer_pack(request(reversed)).unwrap();
3335 let left_ids: Vec<_> = left
3336 .evidence
3337 .iter()
3338 .map(|evidence| evidence.candidate.candidate_id.as_str())
3339 .collect();
3340 let right_ids: Vec<_> = right
3341 .evidence
3342 .iter()
3343 .map(|evidence| evidence.candidate.candidate_id.as_str())
3344 .collect();
3345
3346 assert_eq!(
3347 left_ids.chunks(2).collect::<Vec<_>>(),
3348 right_ids.chunks(2).collect::<Vec<_>>()
3349 );
3350 }
3351
3352 #[test]
3353 fn omitted_reasons_serialize_to_documented_snake_case() {
3354 let reasons = [
3355 (
3356 PackOmittedReason::TokenBudgetExhausted,
3357 "token_budget_exhausted",
3358 ),
3359 (
3360 PackOmittedReason::MaxSessionsReached,
3361 "max_sessions_reached",
3362 ),
3363 (
3364 PackOmittedReason::MaxEvidenceReached,
3365 "max_evidence_reached",
3366 ),
3367 (PackOmittedReason::DuplicateContent, "duplicate_content"),
3368 (
3369 PackOmittedReason::SameSessionLowerRank,
3370 "same_session_lower_rank",
3371 ),
3372 (
3373 PackOmittedReason::StaleUnderStrictPolicy,
3374 "stale_under_strict_policy",
3375 ),
3376 (PackOmittedReason::SourceUnavailable, "source_unavailable"),
3377 (PackOmittedReason::RedactedToEmpty, "redacted_to_empty"),
3378 (PackOmittedReason::FieldMaskExcluded, "field_mask_excluded"),
3379 ];
3380
3381 for (reason, expected) in reasons {
3382 assert_eq!(serde_json::to_value(reason).unwrap(), expected);
3383 }
3384 }
3385}