Skip to main content

coding_agent_search/
query_cost_planner.rs

1//! Data-only query cost planner for robot metadata.
2//!
3//! This module does not change search execution. It summarizes the plan cass
4//! already chose and the realized outcome so operators can reason about tail
5//! cost, fallback tiers, cache behavior, and cursor continuity from `_meta`.
6
7use serde::{Deserialize, Serialize};
8
9pub const QUERY_COST_PLAN_SCHEMA_VERSION: &str = "1";
10
11#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
12pub struct QueryCostPlan {
13    pub schema_version: String,
14    pub planner_id: String,
15    pub phases: Vec<QueryPhasePlan>,
16    pub budget_exhaustion: Option<BudgetExhaustion>,
17    pub result_identity: ResultIdentityContinuity,
18    pub cache: CachePlan,
19    pub summary: String,
20}
21
22#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
23pub struct QueryPhasePlan {
24    pub phase: QueryPhase,
25    pub planned: bool,
26    pub realized: bool,
27    pub budget: PhaseBudget,
28    pub reason: String,
29}
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
32#[serde(rename_all = "snake_case")]
33pub enum QueryPhase {
34    Lexical,
35    Semantic,
36    Hydration,
37    Output,
38    Cursor,
39}
40
41#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
42pub struct PhaseBudget {
43    pub limit: Option<usize>,
44    pub offset: Option<usize>,
45    pub max_tokens: Option<usize>,
46    pub timeout_ms: Option<u64>,
47}
48
49#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
50pub struct BudgetExhaustion {
51    pub kind: BudgetExhaustionKind,
52    pub reason: String,
53}
54
55#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
56#[serde(rename_all = "snake_case")]
57pub enum BudgetExhaustionKind {
58    TokenBudget,
59    Timeout,
60    CursorPage,
61}
62
63#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
64pub struct ResultIdentityContinuity {
65    pub input_cursor_present: bool,
66    pub next_cursor_present: bool,
67    pub cursor_continuation: bool,
68    pub offset: usize,
69    pub limit: usize,
70    pub returned_count: usize,
71    pub total_matches: usize,
72    pub continuity_key: String,
73    pub reason: String,
74}
75
76#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
77pub struct CachePlan {
78    pub eligible: bool,
79    pub hits: u64,
80    pub misses: u64,
81    pub shortfall: u64,
82    pub reason: String,
83}
84
85#[derive(Debug, Clone, PartialEq, Eq)]
86pub struct QueryCostPlanInput {
87    pub query_chars: usize,
88    pub requested_mode: String,
89    pub realized_mode: String,
90    pub fallback_tier: Option<String>,
91    pub fallback_reason: Option<String>,
92    pub semantic_refinement: bool,
93    pub wildcard_fallback: bool,
94    pub limit: usize,
95    pub offset: usize,
96    pub returned_count: usize,
97    pub total_matches: usize,
98    pub max_tokens: Option<usize>,
99    pub tokens_estimated: Option<usize>,
100    pub hits_clamped: bool,
101    pub timeout_ms: Option<u64>,
102    pub timed_out: bool,
103    pub input_cursor_present: bool,
104    pub next_cursor_present: bool,
105    pub output_projection: String,
106    pub cache_hits: u64,
107    pub cache_misses: u64,
108    pub cache_shortfall: u64,
109    pub aggregation_count: usize,
110}
111
112pub fn build_query_cost_plan(input: QueryCostPlanInput) -> QueryCostPlan {
113    let semantic_requested = input.requested_mode == "semantic" || input.requested_mode == "hybrid";
114    let semantic_realized = input.realized_mode == "semantic"
115        || input.realized_mode == "hybrid"
116        || input.semantic_refinement;
117    let lexical_realized = input.realized_mode == "lexical"
118        || input.realized_mode == "hybrid"
119        || input.wildcard_fallback;
120    let budget_exhaustion = budget_exhaustion(&input);
121    let result_identity = result_identity_continuity(&input);
122    let cache = cache_plan(&input);
123
124    let mut phases = vec![
125        QueryPhasePlan {
126            phase: QueryPhase::Lexical,
127            planned: input.requested_mode != "semantic",
128            realized: lexical_realized,
129            budget: PhaseBudget {
130                limit: Some(input.limit),
131                offset: Some(input.offset),
132                max_tokens: None,
133                timeout_ms: input.timeout_ms,
134            },
135            reason: lexical_reason(&input, lexical_realized),
136        },
137        QueryPhasePlan {
138            phase: QueryPhase::Semantic,
139            planned: semantic_requested,
140            realized: semantic_realized,
141            budget: PhaseBudget {
142                limit: Some(input.limit),
143                offset: Some(input.offset),
144                max_tokens: None,
145                timeout_ms: input.timeout_ms,
146            },
147            reason: semantic_reason(&input, semantic_realized),
148        },
149        QueryPhasePlan {
150            phase: QueryPhase::Hydration,
151            planned: true,
152            realized: input.returned_count > 0,
153            budget: PhaseBudget {
154                limit: Some(input.limit),
155                offset: Some(input.offset),
156                max_tokens: input.max_tokens,
157                timeout_ms: input.timeout_ms,
158            },
159            reason: format!(
160                "hydrated {} result(s) for {} total match(es)",
161                input.returned_count, input.total_matches
162            ),
163        },
164        QueryPhasePlan {
165            phase: QueryPhase::Output,
166            planned: true,
167            realized: true,
168            budget: PhaseBudget {
169                limit: Some(input.limit),
170                offset: Some(input.offset),
171                max_tokens: input.max_tokens,
172                timeout_ms: input.timeout_ms,
173            },
174            reason: output_reason(&input),
175        },
176        QueryPhasePlan {
177            phase: QueryPhase::Cursor,
178            planned: input.next_cursor_present || input.input_cursor_present,
179            realized: input.next_cursor_present,
180            budget: PhaseBudget {
181                limit: Some(input.limit),
182                offset: Some(input.offset),
183                max_tokens: None,
184                timeout_ms: None,
185            },
186            reason: result_identity.reason.clone(),
187        },
188    ];
189    phases.sort_by_key(|phase| phase.phase as u8);
190
191    QueryCostPlan {
192        schema_version: QUERY_COST_PLAN_SCHEMA_VERSION.to_string(),
193        planner_id: "query_cost.v1".to_string(),
194        phases,
195        budget_exhaustion,
196        result_identity,
197        cache,
198        summary: format!(
199            "{} mode realized with {} returned / {} total match(es), projection={}, query_chars={}",
200            input.realized_mode,
201            input.returned_count,
202            input.total_matches,
203            input.output_projection,
204            input.query_chars
205        ),
206    }
207}
208
209fn budget_exhaustion(input: &QueryCostPlanInput) -> Option<BudgetExhaustion> {
210    if input.timed_out {
211        return Some(BudgetExhaustion {
212            kind: BudgetExhaustionKind::Timeout,
213            reason: format!(
214                "search reported partial results after timeout budget {}",
215                format_timeout_budget(input.timeout_ms)
216            ),
217        });
218    }
219    if input.hits_clamped {
220        return Some(BudgetExhaustion {
221            kind: BudgetExhaustionKind::TokenBudget,
222            reason: format!(
223                "output was clamped to max_tokens={} after estimating {} tokens",
224                format_optional_usize(input.max_tokens),
225                format_optional_usize(input.tokens_estimated)
226            ),
227        });
228    }
229    if input.next_cursor_present {
230        return Some(BudgetExhaustion {
231            kind: BudgetExhaustionKind::CursorPage,
232            reason: "result window ended before the full match set; continue with next_cursor"
233                .to_string(),
234        });
235    }
236    None
237}
238
239fn result_identity_continuity(input: &QueryCostPlanInput) -> ResultIdentityContinuity {
240    let cursor_continuation = input.input_cursor_present || input.next_cursor_present;
241    let reason = if input.input_cursor_present && input.next_cursor_present {
242        "continued an existing cursor and emitted the next page cursor"
243    } else if input.input_cursor_present {
244        "continued an existing cursor and exhausted the visible result window"
245    } else if input.next_cursor_present {
246        "first page preserved continuity by emitting next_cursor"
247    } else {
248        "single response contains the visible result identity window"
249    };
250    ResultIdentityContinuity {
251        input_cursor_present: input.input_cursor_present,
252        next_cursor_present: input.next_cursor_present,
253        cursor_continuation,
254        offset: input.offset,
255        limit: input.limit,
256        returned_count: input.returned_count,
257        total_matches: input.total_matches,
258        continuity_key: format!(
259            "offset:{}:limit:{}:returned:{}:total:{}",
260            input.offset, input.limit, input.returned_count, input.total_matches
261        ),
262        reason: reason.to_string(),
263    }
264}
265
266fn cache_plan(input: &QueryCostPlanInput) -> CachePlan {
267    let eligible = input.aggregation_count == 0 && input.max_tokens.is_none();
268    let reason = if !eligible && input.aggregation_count > 0 {
269        "aggregation query bypasses reusable hit-cache admission"
270    } else if !eligible {
271        "token-budgeted output bypasses reusable hit-cache admission"
272    } else if input.cache_hits > 0 {
273        "cache supplied at least one hit"
274    } else if input.cache_misses > 0 {
275        "cache was eligible but missed"
276    } else {
277        "cache eligible; no cache event was reported"
278    };
279    CachePlan {
280        eligible,
281        hits: input.cache_hits,
282        misses: input.cache_misses,
283        shortfall: input.cache_shortfall,
284        reason: reason.to_string(),
285    }
286}
287
288fn lexical_reason(input: &QueryCostPlanInput, realized: bool) -> String {
289    if realized && input.wildcard_fallback {
290        "lexical phase realized with wildcard fallback".to_string()
291    } else if realized {
292        "lexical phase realized for the selected search mode".to_string()
293    } else {
294        "lexical phase skipped because semantic-only mode was realized".to_string()
295    }
296}
297
298fn semantic_reason(input: &QueryCostPlanInput, realized: bool) -> String {
299    if realized {
300        "semantic phase realized for semantic or hybrid search".to_string()
301    } else if let Some(reason) = &input.fallback_reason {
302        format!(
303            "semantic phase planned but fell back to {}: {reason}",
304            input.fallback_tier.as_deref().unwrap_or("unknown")
305        )
306    } else if input.requested_mode == "lexical" {
307        "semantic phase not planned for lexical mode".to_string()
308    } else {
309        "semantic phase was not realized".to_string()
310    }
311}
312
313fn output_reason(input: &QueryCostPlanInput) -> String {
314    if input.hits_clamped {
315        format!(
316            "projection {} was clamped by max_tokens={}",
317            input.output_projection,
318            format_optional_usize(input.max_tokens)
319        )
320    } else {
321        format!(
322            "projection {} emitted {} hit(s)",
323            input.output_projection, input.returned_count
324        )
325    }
326}
327
328fn format_timeout_budget(timeout_ms: Option<u64>) -> String {
329    timeout_ms
330        .map(|timeout_ms| format!("{timeout_ms}ms"))
331        .unwrap_or_else(|| "unspecified".to_string())
332}
333
334fn format_optional_usize(value: Option<usize>) -> String {
335    value
336        .map(|value| value.to_string())
337        .unwrap_or_else(|| "unspecified".to_string())
338}
339
340#[cfg(test)]
341mod tests {
342    use super::*;
343
344    fn base_input() -> QueryCostPlanInput {
345        QueryCostPlanInput {
346            query_chars: 12,
347            requested_mode: "hybrid".to_string(),
348            realized_mode: "hybrid".to_string(),
349            fallback_tier: None,
350            fallback_reason: None,
351            semantic_refinement: true,
352            wildcard_fallback: false,
353            limit: 10,
354            offset: 0,
355            returned_count: 10,
356            total_matches: 25,
357            max_tokens: None,
358            tokens_estimated: Some(400),
359            hits_clamped: false,
360            timeout_ms: None,
361            timed_out: false,
362            input_cursor_present: false,
363            next_cursor_present: true,
364            output_projection: "all".to_string(),
365            cache_hits: 0,
366            cache_misses: 1,
367            cache_shortfall: 0,
368            aggregation_count: 0,
369        }
370    }
371
372    #[test]
373    fn no_limit_token_budget_reports_token_exhaustion() {
374        let plan = build_query_cost_plan(QueryCostPlanInput {
375            limit: 0,
376            max_tokens: Some(200),
377            tokens_estimated: Some(450),
378            hits_clamped: true,
379            output_projection: "summary".to_string(),
380            ..base_input()
381        });
382
383        assert_eq!(
384            plan.budget_exhaustion.as_ref().map(|b| b.kind),
385            Some(BudgetExhaustionKind::TokenBudget)
386        );
387        assert!(
388            plan.phases
389                .iter()
390                .any(|phase| phase.phase == QueryPhase::Output && phase.reason.contains("clamped"))
391        );
392    }
393
394    #[test]
395    fn huge_snippet_projection_keeps_budget_reason_explicit() {
396        let plan = build_query_cost_plan(QueryCostPlanInput {
397            max_tokens: Some(100),
398            tokens_estimated: Some(2_000),
399            hits_clamped: true,
400            output_projection: "custom".to_string(),
401            ..base_input()
402        });
403
404        assert!(
405            plan.budget_exhaustion
406                .as_ref()
407                .expect("budget exhaustion")
408                .reason
409                .contains("max_tokens")
410        );
411        let budget_reason = &plan
412            .budget_exhaustion
413            .as_ref()
414            .expect("budget exhaustion")
415            .reason;
416        assert!(
417            !budget_reason.contains("Some(") && !budget_reason.contains("None"),
418            "robot metadata reason must not leak Rust Option debug syntax: {budget_reason}"
419        );
420    }
421
422    #[test]
423    fn semantic_unavailable_records_planned_but_unrealized_semantic_phase() {
424        let plan = build_query_cost_plan(QueryCostPlanInput {
425            realized_mode: "lexical".to_string(),
426            semantic_refinement: false,
427            fallback_tier: Some("lexical".to_string()),
428            fallback_reason: Some("semantic assets unavailable".to_string()),
429            ..base_input()
430        });
431
432        let semantic = plan
433            .phases
434            .iter()
435            .find(|phase| phase.phase == QueryPhase::Semantic)
436            .expect("semantic phase");
437        assert!(semantic.planned);
438        assert!(!semantic.realized);
439        assert!(semantic.reason.contains("fell back to lexical"));
440        assert!(semantic.reason.contains("semantic assets unavailable"));
441        assert!(
442            !semantic.reason.contains("Some(") && !semantic.reason.contains("None"),
443            "robot metadata reason must not leak Rust Option debug syntax: {}",
444            semantic.reason
445        );
446    }
447
448    #[test]
449    fn cache_hit_and_miss_stats_stay_truthful() {
450        let plan = build_query_cost_plan(QueryCostPlanInput {
451            cache_hits: 3,
452            cache_misses: 2,
453            cache_shortfall: 1,
454            ..base_input()
455        });
456
457        assert!(plan.cache.eligible);
458        assert_eq!(plan.cache.hits, 3);
459        assert_eq!(plan.cache.misses, 2);
460        assert_eq!(plan.cache.shortfall, 1);
461    }
462
463    #[test]
464    fn cursor_continuation_preserves_identity_window() {
465        let plan = build_query_cost_plan(QueryCostPlanInput {
466            input_cursor_present: true,
467            next_cursor_present: true,
468            offset: 10,
469            limit: 10,
470            returned_count: 10,
471            total_matches: 31,
472            ..base_input()
473        });
474
475        assert!(plan.result_identity.cursor_continuation);
476        assert_eq!(
477            plan.result_identity.continuity_key,
478            "offset:10:limit:10:returned:10:total:31"
479        );
480        assert_eq!(
481            plan.budget_exhaustion.as_ref().map(|b| b.kind),
482            Some(BudgetExhaustionKind::CursorPage)
483        );
484    }
485
486    #[test]
487    fn empty_offset_page_still_realizes_output_phase() {
488        let plan = build_query_cost_plan(QueryCostPlanInput {
489            offset: 100,
490            returned_count: 0,
491            total_matches: 12,
492            next_cursor_present: false,
493            ..base_input()
494        });
495
496        let output = plan
497            .phases
498            .iter()
499            .find(|phase| phase.phase == QueryPhase::Output)
500            .expect("output phase");
501        assert!(output.realized);
502        assert_eq!(
503            plan.result_identity.continuity_key,
504            "offset:100:limit:10:returned:0:total:12"
505        );
506    }
507}