agentic_config/
validation.rs

1//! Advisory validation for `AgenticConfig`.
2//!
3//! Validation is advisory - it produces warnings but doesn't prevent
4//! the config from being used. This allows tools to work with imperfect
5//! configs while still surfacing potential issues.
6
7use crate::types::AgenticConfig;
8
9/// An advisory warning about a configuration issue.
10#[derive(Debug, Clone, PartialEq, Eq)]
11pub struct AdvisoryWarning {
12    /// Machine-readable warning code.
13    pub code: &'static str,
14
15    /// Human-readable warning message.
16    pub message: String,
17
18    /// Config path to the problematic field.
19    pub path: &'static str,
20}
21
22impl AdvisoryWarning {
23    /// Create a new advisory warning.
24    pub fn new(code: &'static str, path: &'static str, message: impl Into<String>) -> Self {
25        Self {
26            code,
27            path,
28            message: message.into(),
29        }
30    }
31}
32
33impl std::fmt::Display for AdvisoryWarning {
34    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
35        write!(f, "[{}] {}: {}", self.code, self.path, self.message)
36    }
37}
38
39/// Detect deprecated config keys in raw TOML before deserialization.
40///
41/// This inspects the merged TOML Value to detect keys that are no longer
42/// used and emit advisory warnings. The config will still load successfully,
43/// but users will be notified that they should update their configuration.
44pub fn detect_deprecated_keys_toml(v: &toml::Value) -> Vec<AdvisoryWarning> {
45    let mut warnings = Vec::new();
46
47    // Warn if old "thoughts" section exists (removed in this version)
48    if let Some(tbl) = v.as_table() {
49        if tbl.contains_key("thoughts") {
50            warnings.push(AdvisoryWarning::new(
51                "config.deprecated.thoughts",
52                "thoughts",
53                "The 'thoughts' section has been removed. thoughts-core now has its own config.",
54            ));
55        }
56        if tbl.contains_key("models") {
57            warnings.push(AdvisoryWarning::new(
58                "config.deprecated.models",
59                "models",
60                "The 'models' section has been replaced by 'subagents' and 'reasoning'.",
61            ));
62        }
63    }
64
65    warnings
66}
67
68// TODO(2): This list must be kept in sync with AgenticConfig fields in types.rs.
69// Consider generating dynamically via schemars introspection, or adding a compile-time
70// test that extracts field names from AgenticConfig's JsonSchema and verifies they
71// match this list. Currently requires manual updates when adding new config sections.
72// See research/pr127-group7-type-safety-external-type-dependencies.md for analysis.
73
74/// Known top-level keys for unknown key detection.
75/// Unknown keys at root level produce advisory warnings.
76const KNOWN_TOP_LEVEL_KEYS: &[&str] = &[
77    "$schema",
78    "subagents",
79    "reasoning",
80    "services",
81    "orchestrator",
82    "web_retrieval",
83    "cli_tools",
84    "logging",
85];
86
87const GPT5_2_COMPLETION_TOKENS_DOC_MAX: u32 = 128_000;
88
89/// Detect unknown top-level keys in raw TOML before deserialization.
90///
91/// Unknown keys at the root are ignored by serde, so we emit an advisory warning
92/// to help users catch typos like `[servics]` instead of `[services]`.
93pub fn detect_unknown_top_level_keys_toml(v: &toml::Value) -> Vec<AdvisoryWarning> {
94    let mut warnings = Vec::new();
95    let Some(tbl) = v.as_table() else {
96        return warnings;
97    };
98
99    for key in tbl.keys() {
100        if !KNOWN_TOP_LEVEL_KEYS.contains(&key.as_str()) {
101            warnings.push(AdvisoryWarning::new(
102                "config.unknown_top_level_key",
103                "$",
104                format!("Unknown top-level key '{key}' will be ignored"),
105            ));
106        }
107    }
108
109    warnings
110}
111
112/// Validate a configuration and return advisory warnings.
113///
114/// This does NOT fail on issues - it only collects warnings that
115/// callers can choose to display or log.
116pub fn validate(cfg: &AgenticConfig) -> Vec<AdvisoryWarning> {
117    let mut warnings = vec![];
118
119    // Validate service URLs
120    validate_url(
121        &cfg.services.anthropic.base_url,
122        "services.anthropic.base_url",
123        "services.anthropic.base_url.invalid",
124        &mut warnings,
125    );
126
127    validate_url(
128        &cfg.services.exa.base_url,
129        "services.exa.base_url",
130        "services.exa.base_url.invalid",
131        &mut warnings,
132    );
133
134    // Validate log level
135    let valid_levels = ["trace", "debug", "info", "warn", "error"];
136    if !valid_levels.contains(&cfg.logging.level.to_lowercase().as_str()) {
137        warnings.push(AdvisoryWarning {
138            code: "logging.level.invalid",
139            path: "logging.level",
140            message: format!(
141                "Unknown log level '{}'. Expected one of: {}",
142                cfg.logging.level,
143                valid_levels.join(", ")
144            ),
145        });
146    }
147
148    // Validate subagents model values are not empty
149    if cfg.subagents.locator_model.trim().is_empty() {
150        warnings.push(AdvisoryWarning::new(
151            "subagents.locator_model.empty",
152            "subagents.locator_model",
153            "value is empty",
154        ));
155    }
156    if cfg.subagents.analyzer_model.trim().is_empty() {
157        warnings.push(AdvisoryWarning::new(
158            "subagents.analyzer_model.empty",
159            "subagents.analyzer_model",
160            "value is empty",
161        ));
162    }
163
164    // Validate reasoning model values are not empty
165    if cfg.reasoning.optimizer_model.trim().is_empty() {
166        warnings.push(AdvisoryWarning::new(
167            "reasoning.optimizer_model.empty",
168            "reasoning.optimizer_model",
169            "value is empty",
170        ));
171    }
172    if cfg.reasoning.executor_model.trim().is_empty() {
173        warnings.push(AdvisoryWarning::new(
174            "reasoning.executor_model.empty",
175            "reasoning.executor_model",
176            "value is empty",
177        ));
178    }
179
180    // Validate OpenRouter format for reasoning models (should contain '/')
181    if !cfg.reasoning.optimizer_model.trim().is_empty()
182        && !cfg.reasoning.optimizer_model.contains('/')
183    {
184        warnings.push(AdvisoryWarning::new(
185            "reasoning.optimizer_model.format",
186            "reasoning.optimizer_model",
187            "expected OpenRouter format like `anthropic/claude-sonnet-4.6`",
188        ));
189    }
190
191    if !cfg.reasoning.executor_model.trim().is_empty()
192        && !cfg.reasoning.executor_model.contains('/')
193    {
194        warnings.push(AdvisoryWarning::new(
195            "reasoning.executor_model.format",
196            "reasoning.executor_model",
197            "expected OpenRouter format like `openai/gpt-5.2`",
198        ));
199    } else if !cfg.reasoning.executor_model.trim().is_empty()
200        && !cfg
201            .reasoning
202            .executor_model
203            .to_lowercase()
204            .contains("gpt-5")
205    {
206        warnings.push(AdvisoryWarning::new(
207            "reasoning.executor_model.suspicious",
208            "reasoning.executor_model",
209            "executor_model does not look like a GPT-5 model; reasoning_effort may not work",
210        ));
211    }
212
213    // Validate reasoning_effort enum
214    if let Some(eff) = cfg.reasoning.reasoning_effort.as_deref() {
215        let eff_lc = eff.trim().to_lowercase();
216        if !matches!(eff_lc.as_str(), "low" | "medium" | "high" | "xhigh") {
217            warnings.push(AdvisoryWarning::new(
218                "reasoning.reasoning_effort.invalid",
219                "reasoning.reasoning_effort",
220                "expected one of: low, medium, high, xhigh",
221            ));
222        }
223    }
224
225    if cfg
226        .reasoning
227        .executor_model
228        .to_lowercase()
229        .contains("gpt-5.2")
230        && let Some(n) = cfg.reasoning.max_completion_tokens
231        && n > GPT5_2_COMPLETION_TOKENS_DOC_MAX
232    {
233        warnings.push(AdvisoryWarning::new(
234            "reasoning.max_completion_tokens.exceeds_doc",
235            "reasoning.max_completion_tokens",
236            format!(
237                "max_completion_tokens={n} exceeds documented GPT-5.2 ceiling {GPT5_2_COMPLETION_TOKENS_DOC_MAX}; request may be rejected or truncate unexpectedly (warn-only; not clamped)."
238            ),
239        ));
240    }
241
242    if let Some(n) = cfg.reasoning.max_input_tokens
243        && n > 250_000
244    {
245        warnings.push(AdvisoryWarning::new(
246            "reasoning.max_input_tokens.suspicious",
247            "reasoning.max_input_tokens",
248            format!(
249                "max_input_tokens={n} exceeds the tool's default prompt cap (250000); ensure executor model supports this context size (warn-only)."
250            ),
251        ));
252    }
253
254    // Validate orchestrator.compaction_threshold is in (0,1]
255    if !(0.0..=1.0).contains(&cfg.orchestrator.compaction_threshold) {
256        warnings.push(AdvisoryWarning::new(
257            "orchestrator.compaction_threshold.out_of_range",
258            "orchestrator.compaction_threshold",
259            "expected a value between 0.0 and 1.0",
260        ));
261    }
262
263    // Validate web_retrieval: default_search_results <= max_search_results
264    if cfg.web_retrieval.default_search_results > cfg.web_retrieval.max_search_results {
265        warnings.push(AdvisoryWarning::new(
266            "web_retrieval.default_exceeds_max",
267            "web_retrieval.default_search_results",
268            "default_search_results exceeds max_search_results",
269        ));
270    }
271
272    // Validate web_retrieval.summarizer.model is not empty
273    if cfg.web_retrieval.summarizer.model.trim().is_empty() {
274        warnings.push(AdvisoryWarning::new(
275            "web_retrieval.summarizer.model.empty",
276            "web_retrieval.summarizer.model",
277            "value is empty",
278        ));
279    }
280
281    // Validate cli_tools.max_depth is reasonable
282    if cfg.cli_tools.max_depth == 0 {
283        warnings.push(AdvisoryWarning::new(
284            "cli_tools.max_depth.zero",
285            "cli_tools.max_depth",
286            "max_depth is 0, directory listing may be limited",
287        ));
288    }
289
290    warnings
291}
292
293fn validate_url(
294    url: &str,
295    path: &'static str,
296    code: &'static str,
297    warnings: &mut Vec<AdvisoryWarning>,
298) {
299    if !url.starts_with("http://") && !url.starts_with("https://") {
300        warnings.push(AdvisoryWarning {
301            code,
302            path,
303            message: format!("Expected an http(s) URL, got: '{url}'"),
304        });
305    }
306}
307
308#[cfg(test)]
309mod tests {
310    use super::*;
311
312    #[test]
313    fn test_default_config_has_no_warnings() {
314        let config = AgenticConfig::default();
315        let warnings = validate(&config);
316        assert!(
317            warnings.is_empty(),
318            "Default config should have no warnings: {warnings:?}"
319        );
320    }
321
322    #[test]
323    fn test_invalid_anthropic_url_warns() {
324        let mut config = AgenticConfig::default();
325        config.services.anthropic.base_url = "not-a-url".into();
326
327        let warnings = validate(&config);
328        assert_eq!(warnings.len(), 1);
329        assert_eq!(warnings[0].code, "services.anthropic.base_url.invalid");
330    }
331
332    #[test]
333    fn test_invalid_log_level_warns() {
334        let mut config = AgenticConfig::default();
335        config.logging.level = "verbose".into();
336
337        let warnings = validate(&config);
338        assert!(warnings.iter().any(|w| w.code == "logging.level.invalid"));
339    }
340
341    #[test]
342    fn test_warning_display() {
343        let warning = AdvisoryWarning {
344            code: "test.code",
345            path: "test.path",
346            message: "Test message".into(),
347        };
348        let display = format!("{warning}");
349        assert_eq!(display, "[test.code] test.path: Test message");
350    }
351
352    #[test]
353    fn test_empty_subagent_model_warns() {
354        let mut config = AgenticConfig::default();
355        config.subagents.locator_model = String::new();
356
357        let warnings = validate(&config);
358        assert!(
359            warnings
360                .iter()
361                .any(|w| w.code == "subagents.locator_model.empty")
362        );
363    }
364
365    #[test]
366    fn test_reasoning_optimizer_model_format_warns() {
367        let mut config = AgenticConfig::default();
368        config.reasoning.optimizer_model = "claude-sonnet-4.6".into(); // Missing provider prefix
369
370        let warnings = validate(&config);
371        assert!(
372            warnings
373                .iter()
374                .any(|w| w.code == "reasoning.optimizer_model.format")
375        );
376    }
377
378    #[test]
379    fn test_reasoning_executor_model_suspicious_warns() {
380        let mut config = AgenticConfig::default();
381        config.reasoning.executor_model = "anthropic/claude-sonnet-4.6".into(); // Not GPT-5
382
383        let warnings = validate(&config);
384        assert!(
385            warnings
386                .iter()
387                .any(|w| w.code == "reasoning.executor_model.suspicious")
388        );
389    }
390
391    #[test]
392    fn test_reasoning_effort_invalid_warns() {
393        let mut config = AgenticConfig::default();
394        config.reasoning.reasoning_effort = Some("extreme".into()); // Invalid value
395
396        let warnings = validate(&config);
397        assert!(
398            warnings
399                .iter()
400                .any(|w| w.code == "reasoning.reasoning_effort.invalid")
401        );
402    }
403
404    #[test]
405    fn test_reasoning_effort_valid_no_warning() {
406        let mut config = AgenticConfig::default();
407        config.reasoning.reasoning_effort = Some("high".into());
408
409        let warnings = validate(&config);
410        assert!(
411            !warnings
412                .iter()
413                .any(|w| w.code == "reasoning.reasoning_effort.invalid")
414        );
415    }
416
417    #[test]
418    fn test_orchestrator_compaction_threshold_out_of_range() {
419        let mut config = AgenticConfig::default();
420        config.orchestrator.compaction_threshold = 1.5; // Invalid
421
422        let warnings = validate(&config);
423        assert!(
424            warnings
425                .iter()
426                .any(|w| w.code == "orchestrator.compaction_threshold.out_of_range")
427        );
428    }
429
430    #[test]
431    fn test_web_retrieval_default_exceeds_max() {
432        let mut config = AgenticConfig::default();
433        config.web_retrieval.default_search_results = 100;
434        config.web_retrieval.max_search_results = 20;
435
436        let warnings = validate(&config);
437        assert!(
438            warnings
439                .iter()
440                .any(|w| w.code == "web_retrieval.default_exceeds_max")
441        );
442    }
443
444    #[test]
445    fn test_detect_deprecated_thoughts_toml() {
446        let toml_val: toml::Value = toml::from_str(
447            r"
448[thoughts]
449mount_dirs = {}
450",
451        )
452        .unwrap();
453
454        let warnings = detect_deprecated_keys_toml(&toml_val);
455        assert!(
456            warnings
457                .iter()
458                .any(|w| w.code == "config.deprecated.thoughts")
459        );
460    }
461
462    #[test]
463    fn test_detect_deprecated_reasoning_token_limit_toml_is_silent() {
464        let toml_val: toml::Value = toml::from_str(
465            r"
466[reasoning]
467token_limit = 12345
468",
469        )
470        .unwrap();
471
472        let warnings = detect_deprecated_keys_toml(&toml_val);
473        assert!(warnings.is_empty());
474    }
475
476    #[test]
477    fn test_reasoning_max_completion_tokens_above_doc_max_warns() {
478        let mut config = AgenticConfig::default();
479        config.reasoning.max_completion_tokens = Some(128_001);
480
481        let warnings = validate(&config);
482        assert!(
483            warnings
484                .iter()
485                .any(|w| w.code == "reasoning.max_completion_tokens.exceeds_doc")
486        );
487    }
488
489    #[test]
490    fn test_reasoning_max_input_tokens_above_default_cap_warns() {
491        let mut config = AgenticConfig::default();
492        config.reasoning.max_input_tokens = Some(250_001);
493
494        let warnings = validate(&config);
495        assert!(
496            warnings
497                .iter()
498                .any(|w| w.code == "reasoning.max_input_tokens.suspicious")
499        );
500    }
501}
agentic_config/validation.rs

agentic_config/
validation.rs