vectorless/config/
validator.rs

1// Copyright (c) 2026 vectorless developers
2// SPDX-License-Identifier: Apache-2.0
3
4//! Configuration validation.
5//!
6//! This module provides comprehensive validation for configuration values,
7//! including range checks, consistency checks, and dependency validation.
8
9use super::types::{Config, ConfigValidationError, Severity, ValidationError};
10
11/// Configuration validator.
12#[derive(Debug, Default)]
13pub struct ConfigValidator {
14    /// Validation rules to apply.
15    rules: Vec<Box<dyn ValidationRule>>,
16}
17
18impl ConfigValidator {
19    /// Create a new validator with default rules.
20    pub fn new() -> Self {
21        Self {
22            rules: vec![
23                Box::new(RangeValidator),
24                Box::new(ConsistencyValidator),
25                Box::new(DependencyValidator),
26            ],
27        }
28    }
29
30    /// Add a custom validation rule.
31    pub fn with_rule(mut self, rule: Box<dyn ValidationRule>) -> Self {
32        self.rules.push(rule);
33        self
34    }
35
36    /// Validate the configuration.
37    pub fn validate(&self, config: &Config) -> Result<(), ConfigValidationError> {
38        let mut errors = Vec::new();
39
40        for rule in &self.rules {
41            rule.validate(config, &mut errors);
42        }
43
44        // Only fail on errors, not warnings or info
45        let has_errors = errors.iter().any(|e| e.severity == Severity::Error);
46
47        if has_errors {
48            Err(ConfigValidationError { errors })
49        } else {
50            Ok(())
51        }
52    }
53}
54
55/// Trait for validation rules.
56pub trait ValidationRule: std::fmt::Debug + Send + Sync {
57    /// Validate the configuration, appending errors if found.
58    fn validate(&self, config: &Config, errors: &mut Vec<ValidationError>);
59}
60
61/// Validates value ranges.
62#[derive(Debug)]
63struct RangeValidator;
64
65impl ValidationRule for RangeValidator {
66    fn validate(&self, config: &Config, errors: &mut Vec<ValidationError>) {
67        // Indexer ranges
68        if config.indexer.subsection_threshold == 0 {
69            errors.push(ValidationError::error(
70                "indexer.subsection_threshold",
71                "Subsection threshold must be greater than 0",
72            ));
73        }
74
75        if config.indexer.subsection_threshold > 10000 {
76            errors.push(ValidationError::warning(
77                "indexer.subsection_threshold",
78                "Subsection threshold is very high, may impact performance",
79            ).with_actual(config.indexer.subsection_threshold.to_string()));
80        }
81
82        // Summary ranges
83        if config.summary.max_tokens == 0 {
84            errors.push(ValidationError::error(
85                "summary.max_tokens",
86                "Summary max tokens must be greater than 0",
87            ));
88        }
89
90        if config.summary.temperature < 0.0 || config.summary.temperature > 2.0 {
91            errors.push(ValidationError::warning(
92                "summary.temperature",
93                "Temperature outside typical range [0.0, 2.0]",
94            ).with_actual(config.summary.temperature.to_string()));
95        }
96
97        // Retrieval ranges
98        if config.retrieval.top_k == 0 {
99            errors.push(ValidationError::error(
100                "retrieval.top_k",
101                "Top K must be greater than 0",
102            ));
103        }
104
105        if config.retrieval.search.beam_width == 0 {
106            errors.push(ValidationError::error(
107                "retrieval.search.beam_width",
108                "Beam width must be greater than 0",
109            ));
110        }
111
112        // Content aggregator ranges
113        if config.retrieval.content.token_budget == 0 {
114            errors.push(ValidationError::error(
115                "retrieval.content.token_budget",
116                "Token budget must be greater than 0",
117            ));
118        }
119
120        if config.retrieval.content.min_relevance_score < 0.0
121            || config.retrieval.content.min_relevance_score > 1.0
122        {
123            errors.push(ValidationError::error(
124                "retrieval.content.min_relevance_score",
125                "Min relevance score must be between 0.0 and 1.0",
126            )
127            .with_expected("0.0 - 1.0")
128            .with_actual(config.retrieval.content.min_relevance_score.to_string()));
129        }
130
131        if config.retrieval.content.hierarchical_min_per_level < 0.0
132            || config.retrieval.content.hierarchical_min_per_level > 1.0
133        {
134            errors.push(ValidationError::error(
135                "retrieval.content.hierarchical_min_per_level",
136                "Hierarchical min per level must be between 0.0 and 1.0",
137            ));
138        }
139
140        // Concurrency ranges
141        if config.concurrency.max_concurrent_requests == 0 {
142            errors.push(ValidationError::error(
143                "concurrency.max_concurrent_requests",
144                "Max concurrent requests must be greater than 0",
145            ));
146        }
147
148        if config.concurrency.requests_per_minute == 0 {
149            errors.push(ValidationError::error(
150                "concurrency.requests_per_minute",
151                "Requests per minute must be greater than 0",
152            ));
153        }
154
155        // Fallback ranges
156        if config.fallback.max_retries == 0 {
157            errors.push(ValidationError::warning(
158                "fallback.max_retries",
159                "Max retries is 0, fallback will not retry",
160            ));
161        }
162    }
163}
164
165/// Validates configuration consistency.
166#[derive(Debug)]
167struct ConsistencyValidator;
168
169impl ValidationRule for ConsistencyValidator {
170    fn validate(&self, config: &Config, errors: &mut Vec<ValidationError>) {
171        // Check if summary tokens are reasonable
172        if config.summary.max_tokens > config.indexer.max_segment_tokens {
173            errors.push(ValidationError::warning(
174                "summary.max_tokens",
175                "Summary max tokens exceeds max segment tokens",
176            )
177            .with_expected(format!("<= {}", config.indexer.max_segment_tokens))
178            .with_actual(config.summary.max_tokens.to_string()));
179        }
180
181        // Check if content token budget is reasonable
182        if config.retrieval.content.token_budget > 100000 {
183            errors.push(ValidationError::warning(
184                "retrieval.content.token_budget",
185                "Token budget is very high, may cause performance issues",
186            ).with_actual(config.retrieval.content.token_budget.to_string()));
187        }
188
189        // Check if sufficiency thresholds are consistent
190        if config.retrieval.sufficiency.min_tokens > config.retrieval.sufficiency.target_tokens {
191            errors.push(ValidationError::error(
192                "retrieval.sufficiency.min_tokens",
193                "Min tokens cannot exceed target tokens",
194            )
195            .with_expected(format!("<= {}", config.retrieval.sufficiency.target_tokens))
196            .with_actual(config.retrieval.sufficiency.min_tokens.to_string()));
197        }
198
199        if config.retrieval.sufficiency.target_tokens > config.retrieval.sufficiency.max_tokens {
200            errors.push(ValidationError::error(
201                "retrieval.sufficiency.target_tokens",
202                "Target tokens cannot exceed max tokens",
203            )
204            .with_expected(format!("<= {}", config.retrieval.sufficiency.max_tokens))
205            .with_actual(config.retrieval.sufficiency.target_tokens.to_string()));
206        }
207
208        // Check scoring strategy validity
209        let valid_strategies = ["keyword_only", "keyword_bm25", "hybrid"];
210        if !valid_strategies.contains(&config.retrieval.content.scoring_strategy.as_str()) {
211            errors.push(ValidationError::error(
212                "retrieval.content.scoring_strategy",
213                "Invalid scoring strategy",
214            )
215            .with_expected(format!("one of: {:?}", valid_strategies))
216            .with_actual(config.retrieval.content.scoring_strategy.clone()));
217        }
218
219        // Check output format validity
220        let valid_formats = ["markdown", "json", "tree", "flat"];
221        if !valid_formats.contains(&config.retrieval.content.output_format.as_str()) {
222            errors.push(ValidationError::error(
223                "retrieval.content.output_format",
224                "Invalid output format",
225            )
226            .with_expected(format!("one of: {:?}", valid_formats))
227            .with_actual(config.retrieval.content.output_format.clone()));
228        }
229    }
230}
231
232/// Validates configuration dependencies.
233#[derive(Debug)]
234struct DependencyValidator;
235
236impl ValidationRule for DependencyValidator {
237    fn validate(&self, config: &Config, errors: &mut Vec<ValidationError>) {
238        // Check if API key is available when summaries are needed
239        if config.summary.api_key.is_none() {
240            // Check if any feature requires LLM
241            if config.indexer.max_summary_tokens > 0 {
242                errors.push(ValidationError::info(
243                    "summary.api_key",
244                    "No API key configured, summary generation will be disabled",
245                ));
246            }
247        }
248
249        // Check fallback configuration
250        if config.fallback.enabled {
251            if config.fallback.models.is_empty() && config.fallback.endpoints.is_empty() {
252                errors.push(ValidationError::warning(
253                    "fallback.models",
254                    "Fallback enabled but no fallback models or endpoints configured",
255                ));
256            }
257
258            // Check retry behavior consistency
259            if matches!(
260                config.fallback.on_rate_limit,
261                super::types::FallbackBehavior::Fallback
262            ) && config.fallback.models.is_empty()
263            {
264                errors.push(ValidationError::error(
265                    "fallback.models",
266                    "Rate limit behavior is 'fallback' but no fallback models configured",
267                ));
268            }
269        }
270
271        // Check cache configuration
272        if config.retrieval.cache.max_entries == 0 {
273            errors.push(ValidationError::warning(
274                "retrieval.cache.max_entries",
275                "Cache disabled (max_entries = 0), performance may be impacted",
276            ));
277        }
278
279        // Check strategy configuration
280        if config.retrieval.strategy.exploration_weight <= 0.0 {
281            errors.push(ValidationError::error(
282                "retrieval.strategy.exploration_weight",
283                "Exploration weight must be positive",
284            ).with_actual(config.retrieval.strategy.exploration_weight.to_string()));
285        }
286
287        // Check similarity thresholds are ordered correctly
288        if config.retrieval.strategy.low_similarity_threshold
289            >= config.retrieval.strategy.high_similarity_threshold
290        {
291            errors.push(ValidationError::error(
292                "retrieval.strategy.low_similarity_threshold",
293                "Low similarity threshold must be less than high similarity threshold",
294            )
295            .with_expected(format!(
296                "< {}",
297                config.retrieval.strategy.high_similarity_threshold
298            ))
299            .with_actual(config.retrieval.strategy.low_similarity_threshold.to_string()));
300        }
301    }
302}
303
304#[cfg(test)]
305mod tests {
306    use super::*;
307
308    #[test]
309    fn test_validator_valid_config() {
310        let config = Config::default();
311        let validator = ConfigValidator::new();
312        // Default config should pass validation (no errors, warnings are ok)
313        let result = validator.validate(&config);
314        assert!(result.is_ok(), "Default config should pass validation");
315    }
316
317    #[test]
318    fn test_validator_catches_range_errors() {
319        let mut config = Config::default();
320        config.retrieval.content.token_budget = 0;
321        config.retrieval.content.min_relevance_score = 1.5;
322
323        let validator = ConfigValidator::new();
324        let result = validator.validate(&config);
325
326        assert!(result.is_err());
327        let err = result.unwrap_err();
328        assert!(err.errors.iter().any(|e| e.path.contains("token_budget")));
329    }
330
331    #[test]
332    fn test_validator_catches_consistency_errors() {
333        let mut config = Config::default();
334        config.retrieval.sufficiency.min_tokens = 3000;
335        config.retrieval.sufficiency.target_tokens = 2000;
336
337        let validator = ConfigValidator::new();
338        let result = validator.validate(&config);
339
340        assert!(result.is_err());
341        let err = result.unwrap_err();
342        assert!(err.errors.iter().any(|e| e.path.contains("min_tokens")));
343    }
344
345    #[test]
346    fn test_validator_catches_dependency_warnings() {
347        let mut config = Config::default();
348        config.fallback.enabled = true;
349        config.fallback.models.clear();
350
351        let validator = ConfigValidator::new();
352        let result = validator.validate(&config);
353
354        // Should succeed but with warnings
355        if let Err(err) = result {
356            assert!(err.errors.iter().any(|e| e.path.contains("fallback.models")));
357        }
358    }
359}
vectorless/config/validator.rs

vectorless/config/
validator.rs