1use super::types::{Config, ConfigValidationError, Severity, ValidationError};
10
11#[derive(Debug, Default)]
13pub struct ConfigValidator {
14 rules: Vec<Box<dyn ValidationRule>>,
16}
17
18impl ConfigValidator {
19 pub fn new() -> Self {
21 Self {
22 rules: vec![
23 Box::new(RangeValidator),
24 Box::new(ConsistencyValidator),
25 Box::new(DependencyValidator),
26 ],
27 }
28 }
29
30 pub fn with_rule(mut self, rule: Box<dyn ValidationRule>) -> Self {
32 self.rules.push(rule);
33 self
34 }
35
36 pub fn validate(&self, config: &Config) -> Result<(), ConfigValidationError> {
38 let mut errors = Vec::new();
39
40 for rule in &self.rules {
41 rule.validate(config, &mut errors);
42 }
43
44 let has_errors = errors.iter().any(|e| e.severity == Severity::Error);
46
47 if has_errors {
48 Err(ConfigValidationError { errors })
49 } else {
50 Ok(())
51 }
52 }
53}
54
55pub trait ValidationRule: std::fmt::Debug + Send + Sync {
57 fn validate(&self, config: &Config, errors: &mut Vec<ValidationError>);
59}
60
61#[derive(Debug)]
63struct RangeValidator;
64
65impl ValidationRule for RangeValidator {
66 fn validate(&self, config: &Config, errors: &mut Vec<ValidationError>) {
67 if config.indexer.subsection_threshold == 0 {
69 errors.push(ValidationError::error(
70 "indexer.subsection_threshold",
71 "Subsection threshold must be greater than 0",
72 ));
73 }
74
75 if config.indexer.subsection_threshold > 10000 {
76 errors.push(ValidationError::warning(
77 "indexer.subsection_threshold",
78 "Subsection threshold is very high, may impact performance",
79 ).with_actual(config.indexer.subsection_threshold.to_string()));
80 }
81
82 if config.summary.max_tokens == 0 {
84 errors.push(ValidationError::error(
85 "summary.max_tokens",
86 "Summary max tokens must be greater than 0",
87 ));
88 }
89
90 if config.summary.temperature < 0.0 || config.summary.temperature > 2.0 {
91 errors.push(ValidationError::warning(
92 "summary.temperature",
93 "Temperature outside typical range [0.0, 2.0]",
94 ).with_actual(config.summary.temperature.to_string()));
95 }
96
97 if config.retrieval.top_k == 0 {
99 errors.push(ValidationError::error(
100 "retrieval.top_k",
101 "Top K must be greater than 0",
102 ));
103 }
104
105 if config.retrieval.search.beam_width == 0 {
106 errors.push(ValidationError::error(
107 "retrieval.search.beam_width",
108 "Beam width must be greater than 0",
109 ));
110 }
111
112 if config.retrieval.content.token_budget == 0 {
114 errors.push(ValidationError::error(
115 "retrieval.content.token_budget",
116 "Token budget must be greater than 0",
117 ));
118 }
119
120 if config.retrieval.content.min_relevance_score < 0.0
121 || config.retrieval.content.min_relevance_score > 1.0
122 {
123 errors.push(ValidationError::error(
124 "retrieval.content.min_relevance_score",
125 "Min relevance score must be between 0.0 and 1.0",
126 )
127 .with_expected("0.0 - 1.0")
128 .with_actual(config.retrieval.content.min_relevance_score.to_string()));
129 }
130
131 if config.retrieval.content.hierarchical_min_per_level < 0.0
132 || config.retrieval.content.hierarchical_min_per_level > 1.0
133 {
134 errors.push(ValidationError::error(
135 "retrieval.content.hierarchical_min_per_level",
136 "Hierarchical min per level must be between 0.0 and 1.0",
137 ));
138 }
139
140 if config.concurrency.max_concurrent_requests == 0 {
142 errors.push(ValidationError::error(
143 "concurrency.max_concurrent_requests",
144 "Max concurrent requests must be greater than 0",
145 ));
146 }
147
148 if config.concurrency.requests_per_minute == 0 {
149 errors.push(ValidationError::error(
150 "concurrency.requests_per_minute",
151 "Requests per minute must be greater than 0",
152 ));
153 }
154
155 if config.fallback.max_retries == 0 {
157 errors.push(ValidationError::warning(
158 "fallback.max_retries",
159 "Max retries is 0, fallback will not retry",
160 ));
161 }
162 }
163}
164
165#[derive(Debug)]
167struct ConsistencyValidator;
168
169impl ValidationRule for ConsistencyValidator {
170 fn validate(&self, config: &Config, errors: &mut Vec<ValidationError>) {
171 if config.summary.max_tokens > config.indexer.max_segment_tokens {
173 errors.push(ValidationError::warning(
174 "summary.max_tokens",
175 "Summary max tokens exceeds max segment tokens",
176 )
177 .with_expected(format!("<= {}", config.indexer.max_segment_tokens))
178 .with_actual(config.summary.max_tokens.to_string()));
179 }
180
181 if config.retrieval.content.token_budget > 100000 {
183 errors.push(ValidationError::warning(
184 "retrieval.content.token_budget",
185 "Token budget is very high, may cause performance issues",
186 ).with_actual(config.retrieval.content.token_budget.to_string()));
187 }
188
189 if config.retrieval.sufficiency.min_tokens > config.retrieval.sufficiency.target_tokens {
191 errors.push(ValidationError::error(
192 "retrieval.sufficiency.min_tokens",
193 "Min tokens cannot exceed target tokens",
194 )
195 .with_expected(format!("<= {}", config.retrieval.sufficiency.target_tokens))
196 .with_actual(config.retrieval.sufficiency.min_tokens.to_string()));
197 }
198
199 if config.retrieval.sufficiency.target_tokens > config.retrieval.sufficiency.max_tokens {
200 errors.push(ValidationError::error(
201 "retrieval.sufficiency.target_tokens",
202 "Target tokens cannot exceed max tokens",
203 )
204 .with_expected(format!("<= {}", config.retrieval.sufficiency.max_tokens))
205 .with_actual(config.retrieval.sufficiency.target_tokens.to_string()));
206 }
207
208 let valid_strategies = ["keyword_only", "keyword_bm25", "hybrid"];
210 if !valid_strategies.contains(&config.retrieval.content.scoring_strategy.as_str()) {
211 errors.push(ValidationError::error(
212 "retrieval.content.scoring_strategy",
213 "Invalid scoring strategy",
214 )
215 .with_expected(format!("one of: {:?}", valid_strategies))
216 .with_actual(config.retrieval.content.scoring_strategy.clone()));
217 }
218
219 let valid_formats = ["markdown", "json", "tree", "flat"];
221 if !valid_formats.contains(&config.retrieval.content.output_format.as_str()) {
222 errors.push(ValidationError::error(
223 "retrieval.content.output_format",
224 "Invalid output format",
225 )
226 .with_expected(format!("one of: {:?}", valid_formats))
227 .with_actual(config.retrieval.content.output_format.clone()));
228 }
229 }
230}
231
232#[derive(Debug)]
234struct DependencyValidator;
235
236impl ValidationRule for DependencyValidator {
237 fn validate(&self, config: &Config, errors: &mut Vec<ValidationError>) {
238 if config.summary.api_key.is_none() {
240 if config.indexer.max_summary_tokens > 0 {
242 errors.push(ValidationError::info(
243 "summary.api_key",
244 "No API key configured, summary generation will be disabled",
245 ));
246 }
247 }
248
249 if config.fallback.enabled {
251 if config.fallback.models.is_empty() && config.fallback.endpoints.is_empty() {
252 errors.push(ValidationError::warning(
253 "fallback.models",
254 "Fallback enabled but no fallback models or endpoints configured",
255 ));
256 }
257
258 if matches!(
260 config.fallback.on_rate_limit,
261 super::types::FallbackBehavior::Fallback
262 ) && config.fallback.models.is_empty()
263 {
264 errors.push(ValidationError::error(
265 "fallback.models",
266 "Rate limit behavior is 'fallback' but no fallback models configured",
267 ));
268 }
269 }
270
271 if config.retrieval.cache.max_entries == 0 {
273 errors.push(ValidationError::warning(
274 "retrieval.cache.max_entries",
275 "Cache disabled (max_entries = 0), performance may be impacted",
276 ));
277 }
278
279 if config.retrieval.strategy.exploration_weight <= 0.0 {
281 errors.push(ValidationError::error(
282 "retrieval.strategy.exploration_weight",
283 "Exploration weight must be positive",
284 ).with_actual(config.retrieval.strategy.exploration_weight.to_string()));
285 }
286
287 if config.retrieval.strategy.low_similarity_threshold
289 >= config.retrieval.strategy.high_similarity_threshold
290 {
291 errors.push(ValidationError::error(
292 "retrieval.strategy.low_similarity_threshold",
293 "Low similarity threshold must be less than high similarity threshold",
294 )
295 .with_expected(format!(
296 "< {}",
297 config.retrieval.strategy.high_similarity_threshold
298 ))
299 .with_actual(config.retrieval.strategy.low_similarity_threshold.to_string()));
300 }
301 }
302}
303
304#[cfg(test)]
305mod tests {
306 use super::*;
307
308 #[test]
309 fn test_validator_valid_config() {
310 let config = Config::default();
311 let validator = ConfigValidator::new();
312 let result = validator.validate(&config);
314 assert!(result.is_ok(), "Default config should pass validation");
315 }
316
317 #[test]
318 fn test_validator_catches_range_errors() {
319 let mut config = Config::default();
320 config.retrieval.content.token_budget = 0;
321 config.retrieval.content.min_relevance_score = 1.5;
322
323 let validator = ConfigValidator::new();
324 let result = validator.validate(&config);
325
326 assert!(result.is_err());
327 let err = result.unwrap_err();
328 assert!(err.errors.iter().any(|e| e.path.contains("token_budget")));
329 }
330
331 #[test]
332 fn test_validator_catches_consistency_errors() {
333 let mut config = Config::default();
334 config.retrieval.sufficiency.min_tokens = 3000;
335 config.retrieval.sufficiency.target_tokens = 2000;
336
337 let validator = ConfigValidator::new();
338 let result = validator.validate(&config);
339
340 assert!(result.is_err());
341 let err = result.unwrap_err();
342 assert!(err.errors.iter().any(|e| e.path.contains("min_tokens")));
343 }
344
345 #[test]
346 fn test_validator_catches_dependency_warnings() {
347 let mut config = Config::default();
348 config.fallback.enabled = true;
349 config.fallback.models.clear();
350
351 let validator = ConfigValidator::new();
352 let result = validator.validate(&config);
353
354 if let Err(err) = result {
356 assert!(err.errors.iter().any(|e| e.path.contains("fallback.models")));
357 }
358 }
359}