vectorless/retrieval/pilot/
config.rs

1// Copyright (c) 2026 vectorless developers
2// SPDX-License-Identifier: Apache-2.0
3
4//! Configuration types for Pilot.
5//!
6//! This module defines all configuration structures that control
7//! Pilot's behavior, including budget limits, intervention thresholds,
8//! and operation modes.
9
10use serde::{Deserialize, Serialize};
11
12/// Main Pilot configuration.
13///
14/// Controls all aspects of Pilot behavior including budget,
15/// intervention strategy, and feature flags.
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct PilotConfig {
18    /// Operation mode controlling how aggressively Pilot intervenes.
19    pub mode: PilotMode,
20    /// Token and call budget constraints.
21    pub budget: BudgetConfig,
22    /// Intervention threshold settings.
23    pub intervention: InterventionConfig,
24    /// Whether to provide guidance at search start.
25    pub guide_at_start: bool,
26    /// Whether to provide guidance during backtracking.
27    pub guide_at_backtrack: bool,
28    /// Optional path to custom prompt templates.
29    pub prompt_template_path: Option<String>,
30}
31
32impl Default for PilotConfig {
33    fn default() -> Self {
34        Self {
35            mode: PilotMode::Balanced,
36            budget: BudgetConfig::default(),
37            intervention: InterventionConfig::default(),
38            guide_at_start: true,
39            guide_at_backtrack: true,
40            prompt_template_path: None,
41        }
42    }
43}
44
45impl PilotConfig {
46    /// Create a new config with specified mode.
47    pub fn with_mode(mode: PilotMode) -> Self {
48        Self {
49            mode,
50            ..Default::default()
51        }
52    }
53
54    /// Create a high-quality config (more LLM calls).
55    pub fn high_quality() -> Self {
56        Self {
57            mode: PilotMode::Aggressive,
58            budget: BudgetConfig {
59                max_tokens_per_query: 5000,
60                max_tokens_per_call: 1000,
61                max_calls_per_query: 10,
62                max_calls_per_level: 3,
63                hard_limit: false,
64            },
65            intervention: InterventionConfig {
66                fork_threshold: 2,
67                score_gap_threshold: 0.2,
68                low_score_threshold: 0.4,
69                max_interventions_per_level: 3,
70            },
71            guide_at_start: true,
72            guide_at_backtrack: true,
73            prompt_template_path: None,
74        }
75    }
76
77    /// Create a low-cost config (fewer LLM calls).
78    pub fn low_cost() -> Self {
79        Self {
80            mode: PilotMode::Conservative,
81            budget: BudgetConfig {
82                max_tokens_per_query: 500,
83                max_tokens_per_call: 200,
84                max_calls_per_query: 2,
85                max_calls_per_level: 1,
86                hard_limit: true,
87            },
88            intervention: InterventionConfig {
89                fork_threshold: 5,
90                score_gap_threshold: 0.1,
91                low_score_threshold: 0.2,
92                max_interventions_per_level: 1,
93            },
94            guide_at_start: false,
95            guide_at_backtrack: true,
96            prompt_template_path: None,
97        }
98    }
99
100    /// Create a pure algorithm config (no LLM calls).
101    pub fn algorithm_only() -> Self {
102        Self {
103            mode: PilotMode::AlgorithmOnly,
104            ..Default::default()
105        }
106    }
107}
108
109/// Pilot operation mode.
110///
111/// Controls the trade-off between LLM usage and algorithm-only search.
112#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
113pub enum PilotMode {
114    /// Aggressive mode: frequent LLM calls for maximum accuracy.
115    Aggressive,
116    /// Balanced mode: LLM calls at key decision points (default).
117    #[default]
118    Balanced,
119    /// Conservative mode: minimal LLM calls, rely more on algorithm.
120    Conservative,
121    /// Pure algorithm mode: no LLM calls at all.
122    AlgorithmOnly,
123}
124
125impl PilotMode {
126    /// Check if this mode uses LLM at all.
127    pub fn uses_llm(&self) -> bool {
128        !matches!(self, PilotMode::AlgorithmOnly)
129    }
130
131    /// Get the fork threshold multiplier for this mode.
132    pub fn fork_threshold_multiplier(&self) -> f32 {
133        match self {
134            PilotMode::Aggressive => 0.5, // Lower threshold = more interventions
135            PilotMode::Balanced => 1.0,
136            PilotMode::Conservative => 2.0, // Higher threshold = fewer interventions
137            PilotMode::AlgorithmOnly => f32::MAX,
138        }
139    }
140}
141
142/// Token and call budget configuration.
143///
144/// Controls resource consumption during retrieval.
145#[derive(Debug, Clone, Serialize, Deserialize)]
146pub struct BudgetConfig {
147    /// Maximum total tokens per query (input + output).
148    pub max_tokens_per_query: usize,
149    /// Maximum tokens per single LLM call.
150    pub max_tokens_per_call: usize,
151    /// Maximum number of LLM calls per query.
152    pub max_calls_per_query: usize,
153    /// Maximum number of LLM calls per tree level.
154    pub max_calls_per_level: usize,
155    /// Whether to enforce hard limits (true) or soft limits with warnings (false).
156    pub hard_limit: bool,
157}
158
159impl Default for BudgetConfig {
160    fn default() -> Self {
161        Self {
162            max_tokens_per_query: 2000,
163            max_tokens_per_call: 500,
164            max_calls_per_query: 5,
165            max_calls_per_level: 2,
166            hard_limit: true,
167        }
168    }
169}
170
171impl BudgetConfig {
172    /// Check if a given token count is within budget.
173    pub fn is_within_budget(&self, used: usize) -> bool {
174        used < self.max_tokens_per_query
175    }
176
177    /// Get remaining tokens given current usage.
178    pub fn remaining_tokens(&self, used: usize) -> usize {
179        self.max_tokens_per_query.saturating_sub(used)
180    }
181}
182
183/// Intervention threshold configuration.
184///
185/// Controls when Pilot decides to intervene in the search process.
186#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct InterventionConfig {
188    /// Minimum number of candidates to trigger fork intervention.
189    pub fork_threshold: usize,
190    /// Score gap threshold (intervene when top scores are within this range).
191    pub score_gap_threshold: f32,
192    /// Low score threshold (intervene when best score is below this).
193    pub low_score_threshold: f32,
194    /// Maximum interventions allowed per tree level.
195    pub max_interventions_per_level: usize,
196}
197
198impl Default for InterventionConfig {
199    fn default() -> Self {
200        Self {
201            fork_threshold: 3,
202            score_gap_threshold: 0.15,
203            low_score_threshold: 0.3,
204            max_interventions_per_level: 2,
205        }
206    }
207}
208
209impl InterventionConfig {
210    /// Check if the candidate count triggers intervention.
211    pub fn should_intervene_at_fork(&self, candidate_count: usize) -> bool {
212        candidate_count > self.fork_threshold
213    }
214
215    /// Check if scores are too close (algorithm uncertain).
216    pub fn scores_are_close(&self, scores: &[f32]) -> bool {
217        if scores.len() < 2 {
218            return false;
219        }
220        let max_score = scores.iter().cloned().fold(0.0, f32::max);
221        let min_score = scores.iter().cloned().fold(1.0, f32::min);
222        (max_score - min_score) < self.score_gap_threshold
223    }
224
225    /// Check if the best score is too low.
226    pub fn is_low_confidence(&self, best_score: f32) -> bool {
227        best_score < self.low_score_threshold
228    }
229}
230
231#[cfg(test)]
232mod tests {
233    use super::*;
234
235    #[test]
236    fn test_pilot_mode_uses_llm() {
237        assert!(PilotMode::Aggressive.uses_llm());
238        assert!(PilotMode::Balanced.uses_llm());
239        assert!(PilotMode::Conservative.uses_llm());
240        assert!(!PilotMode::AlgorithmOnly.uses_llm());
241    }
242
243    #[test]
244    fn test_budget_config() {
245        let config = BudgetConfig::default();
246        assert!(config.is_within_budget(1000));
247        assert!(!config.is_within_budget(3000));
248        assert_eq!(config.remaining_tokens(1500), 500);
249    }
250
251    #[test]
252    fn test_intervention_config() {
253        let config = InterventionConfig::default();
254
255        // Fork threshold
256        assert!(!config.should_intervene_at_fork(2));
257        assert!(config.should_intervene_at_fork(4));
258
259        // Scores close
260        assert!(config.scores_are_close(&[0.5, 0.55, 0.52]));
261        assert!(!config.scores_are_close(&[0.3, 0.8]));
262
263        // Low confidence
264        assert!(config.is_low_confidence(0.2));
265        assert!(!config.is_low_confidence(0.5));
266    }
267
268    #[test]
269    fn test_pilot_config_presets() {
270        let high = PilotConfig::high_quality();
271        assert_eq!(high.mode, PilotMode::Aggressive);
272
273        let low = PilotConfig::low_cost();
274        assert_eq!(low.mode, PilotMode::Conservative);
275
276        let algo = PilotConfig::algorithm_only();
277        assert_eq!(algo.mode, PilotMode::AlgorithmOnly);
278    }
279}
vectorless/retrieval/pilot/config.rs

vectorless/retrieval/pilot/
config.rs