1use serde::{Deserialize, Serialize};
8
9pub const QUERY_COST_PLAN_SCHEMA_VERSION: &str = "1";
10
11#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
12pub struct QueryCostPlan {
13 pub schema_version: String,
14 pub planner_id: String,
15 pub phases: Vec<QueryPhasePlan>,
16 pub budget_exhaustion: Option<BudgetExhaustion>,
17 pub result_identity: ResultIdentityContinuity,
18 pub cache: CachePlan,
19 pub summary: String,
20}
21
22#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
23pub struct QueryPhasePlan {
24 pub phase: QueryPhase,
25 pub planned: bool,
26 pub realized: bool,
27 pub budget: PhaseBudget,
28 pub reason: String,
29}
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
32#[serde(rename_all = "snake_case")]
33pub enum QueryPhase {
34 Lexical,
35 Semantic,
36 Hydration,
37 Output,
38 Cursor,
39}
40
41#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
42pub struct PhaseBudget {
43 pub limit: Option<usize>,
44 pub offset: Option<usize>,
45 pub max_tokens: Option<usize>,
46 pub timeout_ms: Option<u64>,
47}
48
49#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
50pub struct BudgetExhaustion {
51 pub kind: BudgetExhaustionKind,
52 pub reason: String,
53}
54
55#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
56#[serde(rename_all = "snake_case")]
57pub enum BudgetExhaustionKind {
58 TokenBudget,
59 Timeout,
60 CursorPage,
61}
62
63#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
64pub struct ResultIdentityContinuity {
65 pub input_cursor_present: bool,
66 pub next_cursor_present: bool,
67 pub cursor_continuation: bool,
68 pub offset: usize,
69 pub limit: usize,
70 pub returned_count: usize,
71 pub total_matches: usize,
72 pub continuity_key: String,
73 pub reason: String,
74}
75
76#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
77pub struct CachePlan {
78 pub eligible: bool,
79 pub hits: u64,
80 pub misses: u64,
81 pub shortfall: u64,
82 pub reason: String,
83}
84
85#[derive(Debug, Clone, PartialEq, Eq)]
86pub struct QueryCostPlanInput {
87 pub query_chars: usize,
88 pub requested_mode: String,
89 pub realized_mode: String,
90 pub fallback_tier: Option<String>,
91 pub fallback_reason: Option<String>,
92 pub semantic_refinement: bool,
93 pub wildcard_fallback: bool,
94 pub limit: usize,
95 pub offset: usize,
96 pub returned_count: usize,
97 pub total_matches: usize,
98 pub max_tokens: Option<usize>,
99 pub tokens_estimated: Option<usize>,
100 pub hits_clamped: bool,
101 pub timeout_ms: Option<u64>,
102 pub timed_out: bool,
103 pub input_cursor_present: bool,
104 pub next_cursor_present: bool,
105 pub output_projection: String,
106 pub cache_hits: u64,
107 pub cache_misses: u64,
108 pub cache_shortfall: u64,
109 pub aggregation_count: usize,
110}
111
112pub fn build_query_cost_plan(input: QueryCostPlanInput) -> QueryCostPlan {
113 let semantic_requested = input.requested_mode == "semantic" || input.requested_mode == "hybrid";
114 let semantic_realized = input.realized_mode == "semantic"
115 || input.realized_mode == "hybrid"
116 || input.semantic_refinement;
117 let lexical_realized = input.realized_mode == "lexical"
118 || input.realized_mode == "hybrid"
119 || input.wildcard_fallback;
120 let budget_exhaustion = budget_exhaustion(&input);
121 let result_identity = result_identity_continuity(&input);
122 let cache = cache_plan(&input);
123
124 let mut phases = vec![
125 QueryPhasePlan {
126 phase: QueryPhase::Lexical,
127 planned: input.requested_mode != "semantic",
128 realized: lexical_realized,
129 budget: PhaseBudget {
130 limit: Some(input.limit),
131 offset: Some(input.offset),
132 max_tokens: None,
133 timeout_ms: input.timeout_ms,
134 },
135 reason: lexical_reason(&input, lexical_realized),
136 },
137 QueryPhasePlan {
138 phase: QueryPhase::Semantic,
139 planned: semantic_requested,
140 realized: semantic_realized,
141 budget: PhaseBudget {
142 limit: Some(input.limit),
143 offset: Some(input.offset),
144 max_tokens: None,
145 timeout_ms: input.timeout_ms,
146 },
147 reason: semantic_reason(&input, semantic_realized),
148 },
149 QueryPhasePlan {
150 phase: QueryPhase::Hydration,
151 planned: true,
152 realized: input.returned_count > 0,
153 budget: PhaseBudget {
154 limit: Some(input.limit),
155 offset: Some(input.offset),
156 max_tokens: input.max_tokens,
157 timeout_ms: input.timeout_ms,
158 },
159 reason: format!(
160 "hydrated {} result(s) for {} total match(es)",
161 input.returned_count, input.total_matches
162 ),
163 },
164 QueryPhasePlan {
165 phase: QueryPhase::Output,
166 planned: true,
167 realized: true,
168 budget: PhaseBudget {
169 limit: Some(input.limit),
170 offset: Some(input.offset),
171 max_tokens: input.max_tokens,
172 timeout_ms: input.timeout_ms,
173 },
174 reason: output_reason(&input),
175 },
176 QueryPhasePlan {
177 phase: QueryPhase::Cursor,
178 planned: input.next_cursor_present || input.input_cursor_present,
179 realized: input.next_cursor_present,
180 budget: PhaseBudget {
181 limit: Some(input.limit),
182 offset: Some(input.offset),
183 max_tokens: None,
184 timeout_ms: None,
185 },
186 reason: result_identity.reason.clone(),
187 },
188 ];
189 phases.sort_by_key(|phase| phase.phase as u8);
190
191 QueryCostPlan {
192 schema_version: QUERY_COST_PLAN_SCHEMA_VERSION.to_string(),
193 planner_id: "query_cost.v1".to_string(),
194 phases,
195 budget_exhaustion,
196 result_identity,
197 cache,
198 summary: format!(
199 "{} mode realized with {} returned / {} total match(es), projection={}, query_chars={}",
200 input.realized_mode,
201 input.returned_count,
202 input.total_matches,
203 input.output_projection,
204 input.query_chars
205 ),
206 }
207}
208
209fn budget_exhaustion(input: &QueryCostPlanInput) -> Option<BudgetExhaustion> {
210 if input.timed_out {
211 return Some(BudgetExhaustion {
212 kind: BudgetExhaustionKind::Timeout,
213 reason: format!(
214 "search reported partial results after timeout budget {}",
215 format_timeout_budget(input.timeout_ms)
216 ),
217 });
218 }
219 if input.hits_clamped {
220 return Some(BudgetExhaustion {
221 kind: BudgetExhaustionKind::TokenBudget,
222 reason: format!(
223 "output was clamped to max_tokens={} after estimating {} tokens",
224 format_optional_usize(input.max_tokens),
225 format_optional_usize(input.tokens_estimated)
226 ),
227 });
228 }
229 if input.next_cursor_present {
230 return Some(BudgetExhaustion {
231 kind: BudgetExhaustionKind::CursorPage,
232 reason: "result window ended before the full match set; continue with next_cursor"
233 .to_string(),
234 });
235 }
236 None
237}
238
239fn result_identity_continuity(input: &QueryCostPlanInput) -> ResultIdentityContinuity {
240 let cursor_continuation = input.input_cursor_present || input.next_cursor_present;
241 let reason = if input.input_cursor_present && input.next_cursor_present {
242 "continued an existing cursor and emitted the next page cursor"
243 } else if input.input_cursor_present {
244 "continued an existing cursor and exhausted the visible result window"
245 } else if input.next_cursor_present {
246 "first page preserved continuity by emitting next_cursor"
247 } else {
248 "single response contains the visible result identity window"
249 };
250 ResultIdentityContinuity {
251 input_cursor_present: input.input_cursor_present,
252 next_cursor_present: input.next_cursor_present,
253 cursor_continuation,
254 offset: input.offset,
255 limit: input.limit,
256 returned_count: input.returned_count,
257 total_matches: input.total_matches,
258 continuity_key: format!(
259 "offset:{}:limit:{}:returned:{}:total:{}",
260 input.offset, input.limit, input.returned_count, input.total_matches
261 ),
262 reason: reason.to_string(),
263 }
264}
265
266fn cache_plan(input: &QueryCostPlanInput) -> CachePlan {
267 let eligible = input.aggregation_count == 0 && input.max_tokens.is_none();
268 let reason = if !eligible && input.aggregation_count > 0 {
269 "aggregation query bypasses reusable hit-cache admission"
270 } else if !eligible {
271 "token-budgeted output bypasses reusable hit-cache admission"
272 } else if input.cache_hits > 0 {
273 "cache supplied at least one hit"
274 } else if input.cache_misses > 0 {
275 "cache was eligible but missed"
276 } else {
277 "cache eligible; no cache event was reported"
278 };
279 CachePlan {
280 eligible,
281 hits: input.cache_hits,
282 misses: input.cache_misses,
283 shortfall: input.cache_shortfall,
284 reason: reason.to_string(),
285 }
286}
287
288fn lexical_reason(input: &QueryCostPlanInput, realized: bool) -> String {
289 if realized && input.wildcard_fallback {
290 "lexical phase realized with wildcard fallback".to_string()
291 } else if realized {
292 "lexical phase realized for the selected search mode".to_string()
293 } else {
294 "lexical phase skipped because semantic-only mode was realized".to_string()
295 }
296}
297
298fn semantic_reason(input: &QueryCostPlanInput, realized: bool) -> String {
299 if realized {
300 "semantic phase realized for semantic or hybrid search".to_string()
301 } else if let Some(reason) = &input.fallback_reason {
302 format!(
303 "semantic phase planned but fell back to {}: {reason}",
304 input.fallback_tier.as_deref().unwrap_or("unknown")
305 )
306 } else if input.requested_mode == "lexical" {
307 "semantic phase not planned for lexical mode".to_string()
308 } else {
309 "semantic phase was not realized".to_string()
310 }
311}
312
313fn output_reason(input: &QueryCostPlanInput) -> String {
314 if input.hits_clamped {
315 format!(
316 "projection {} was clamped by max_tokens={}",
317 input.output_projection,
318 format_optional_usize(input.max_tokens)
319 )
320 } else {
321 format!(
322 "projection {} emitted {} hit(s)",
323 input.output_projection, input.returned_count
324 )
325 }
326}
327
328fn format_timeout_budget(timeout_ms: Option<u64>) -> String {
329 timeout_ms
330 .map(|timeout_ms| format!("{timeout_ms}ms"))
331 .unwrap_or_else(|| "unspecified".to_string())
332}
333
334fn format_optional_usize(value: Option<usize>) -> String {
335 value
336 .map(|value| value.to_string())
337 .unwrap_or_else(|| "unspecified".to_string())
338}
339
340#[cfg(test)]
341mod tests {
342 use super::*;
343
344 fn base_input() -> QueryCostPlanInput {
345 QueryCostPlanInput {
346 query_chars: 12,
347 requested_mode: "hybrid".to_string(),
348 realized_mode: "hybrid".to_string(),
349 fallback_tier: None,
350 fallback_reason: None,
351 semantic_refinement: true,
352 wildcard_fallback: false,
353 limit: 10,
354 offset: 0,
355 returned_count: 10,
356 total_matches: 25,
357 max_tokens: None,
358 tokens_estimated: Some(400),
359 hits_clamped: false,
360 timeout_ms: None,
361 timed_out: false,
362 input_cursor_present: false,
363 next_cursor_present: true,
364 output_projection: "all".to_string(),
365 cache_hits: 0,
366 cache_misses: 1,
367 cache_shortfall: 0,
368 aggregation_count: 0,
369 }
370 }
371
372 #[test]
373 fn no_limit_token_budget_reports_token_exhaustion() {
374 let plan = build_query_cost_plan(QueryCostPlanInput {
375 limit: 0,
376 max_tokens: Some(200),
377 tokens_estimated: Some(450),
378 hits_clamped: true,
379 output_projection: "summary".to_string(),
380 ..base_input()
381 });
382
383 assert_eq!(
384 plan.budget_exhaustion.as_ref().map(|b| b.kind),
385 Some(BudgetExhaustionKind::TokenBudget)
386 );
387 assert!(
388 plan.phases
389 .iter()
390 .any(|phase| phase.phase == QueryPhase::Output && phase.reason.contains("clamped"))
391 );
392 }
393
394 #[test]
395 fn huge_snippet_projection_keeps_budget_reason_explicit() {
396 let plan = build_query_cost_plan(QueryCostPlanInput {
397 max_tokens: Some(100),
398 tokens_estimated: Some(2_000),
399 hits_clamped: true,
400 output_projection: "custom".to_string(),
401 ..base_input()
402 });
403
404 assert!(
405 plan.budget_exhaustion
406 .as_ref()
407 .expect("budget exhaustion")
408 .reason
409 .contains("max_tokens")
410 );
411 let budget_reason = &plan
412 .budget_exhaustion
413 .as_ref()
414 .expect("budget exhaustion")
415 .reason;
416 assert!(
417 !budget_reason.contains("Some(") && !budget_reason.contains("None"),
418 "robot metadata reason must not leak Rust Option debug syntax: {budget_reason}"
419 );
420 }
421
422 #[test]
423 fn semantic_unavailable_records_planned_but_unrealized_semantic_phase() {
424 let plan = build_query_cost_plan(QueryCostPlanInput {
425 realized_mode: "lexical".to_string(),
426 semantic_refinement: false,
427 fallback_tier: Some("lexical".to_string()),
428 fallback_reason: Some("semantic assets unavailable".to_string()),
429 ..base_input()
430 });
431
432 let semantic = plan
433 .phases
434 .iter()
435 .find(|phase| phase.phase == QueryPhase::Semantic)
436 .expect("semantic phase");
437 assert!(semantic.planned);
438 assert!(!semantic.realized);
439 assert!(semantic.reason.contains("fell back to lexical"));
440 assert!(semantic.reason.contains("semantic assets unavailable"));
441 assert!(
442 !semantic.reason.contains("Some(") && !semantic.reason.contains("None"),
443 "robot metadata reason must not leak Rust Option debug syntax: {}",
444 semantic.reason
445 );
446 }
447
448 #[test]
449 fn cache_hit_and_miss_stats_stay_truthful() {
450 let plan = build_query_cost_plan(QueryCostPlanInput {
451 cache_hits: 3,
452 cache_misses: 2,
453 cache_shortfall: 1,
454 ..base_input()
455 });
456
457 assert!(plan.cache.eligible);
458 assert_eq!(plan.cache.hits, 3);
459 assert_eq!(plan.cache.misses, 2);
460 assert_eq!(plan.cache.shortfall, 1);
461 }
462
463 #[test]
464 fn cursor_continuation_preserves_identity_window() {
465 let plan = build_query_cost_plan(QueryCostPlanInput {
466 input_cursor_present: true,
467 next_cursor_present: true,
468 offset: 10,
469 limit: 10,
470 returned_count: 10,
471 total_matches: 31,
472 ..base_input()
473 });
474
475 assert!(plan.result_identity.cursor_continuation);
476 assert_eq!(
477 plan.result_identity.continuity_key,
478 "offset:10:limit:10:returned:10:total:31"
479 );
480 assert_eq!(
481 plan.budget_exhaustion.as_ref().map(|b| b.kind),
482 Some(BudgetExhaustionKind::CursorPage)
483 );
484 }
485
486 #[test]
487 fn empty_offset_page_still_realizes_output_phase() {
488 let plan = build_query_cost_plan(QueryCostPlanInput {
489 offset: 100,
490 returned_count: 0,
491 total_matches: 12,
492 next_cursor_present: false,
493 ..base_input()
494 });
495
496 let output = plan
497 .phases
498 .iter()
499 .find(|phase| phase.phase == QueryPhase::Output)
500 .expect("output phase");
501 assert!(output.realized);
502 assert_eq!(
503 plan.result_identity.continuity_key,
504 "offset:100:limit:10:returned:0:total:12"
505 );
506 }
507}