Skip to main content

agentic_config/
validation.rs

1//! Advisory validation for `AgenticConfig`.
2//!
3//! Validation is advisory - it produces warnings but doesn't prevent
4//! the config from being used. This allows tools to work with imperfect
5//! configs while still surfacing potential issues.
6
7use crate::types::AgenticConfig;
8use std::collections::BTreeSet;
9
10/// An advisory warning about a configuration issue.
11#[derive(Debug, Clone, PartialEq, Eq)]
12pub struct AdvisoryWarning {
13    /// Machine-readable warning code.
14    pub code: &'static str,
15
16    /// Human-readable warning message.
17    pub message: String,
18
19    /// Config path to the problematic field.
20    pub path: &'static str,
21}
22
23impl AdvisoryWarning {
24    /// Create a new advisory warning.
25    pub fn new(code: &'static str, path: &'static str, message: impl Into<String>) -> Self {
26        Self {
27            code,
28            path,
29            message: message.into(),
30        }
31    }
32}
33
34impl std::fmt::Display for AdvisoryWarning {
35    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
36        write!(f, "[{}] {}: {}", self.code, self.path, self.message)
37    }
38}
39
40/// Detect deprecated config keys in raw TOML before deserialization.
41///
42/// This inspects the merged TOML Value to detect keys that are no longer
43/// used and emit advisory warnings. The config will still load successfully,
44/// but users will be notified that they should update their configuration.
45pub fn detect_deprecated_keys_toml(v: &toml::Value) -> Vec<AdvisoryWarning> {
46    let mut warnings = Vec::new();
47
48    // Warn if old "thoughts" section exists (removed in this version)
49    if let Some(tbl) = v.as_table() {
50        if tbl.contains_key("thoughts") {
51            warnings.push(AdvisoryWarning::new(
52                "config.deprecated.thoughts",
53                "thoughts",
54                "The 'thoughts' section has been removed. thoughts-core now has its own config.",
55            ));
56        }
57        if tbl.contains_key("models") {
58            warnings.push(AdvisoryWarning::new(
59                "config.deprecated.models",
60                "models",
61                "The 'models' section has been replaced by 'subagents' and 'reasoning'.",
62            ));
63        }
64    }
65
66    warnings
67}
68
69// TODO(2): This list must be kept in sync with AgenticConfig fields in types.rs.
70// Consider generating dynamically via schemars introspection, or adding a compile-time
71// test that extracts field names from AgenticConfig's JsonSchema and verifies they
72// match this list. Currently requires manual updates when adding new config sections.
73// See research/pr127-group7-type-safety-external-type-dependencies.md for analysis.
74
75/// Known top-level keys for unknown key detection.
76/// Unknown keys at root level produce advisory warnings.
77const KNOWN_TOP_LEVEL_KEYS: &[&str] = &[
78    "$schema",
79    "subagents",
80    "reasoning",
81    "services",
82    "orchestrator",
83    "web_retrieval",
84    "cli_tools",
85    "logging",
86];
87
88const GPT5_2_COMPLETION_TOKENS_DOC_MAX: u32 = 128_000;
89
90/// Detect unknown top-level keys in raw TOML before deserialization.
91///
92/// Unknown keys at the root are ignored by serde, so we emit an advisory warning
93/// to help users catch typos like `[servics]` instead of `[services]`.
94pub fn detect_unknown_top_level_keys_toml(v: &toml::Value) -> Vec<AdvisoryWarning> {
95    let mut warnings = Vec::new();
96    let Some(tbl) = v.as_table() else {
97        return warnings;
98    };
99
100    for key in tbl.keys() {
101        if !KNOWN_TOP_LEVEL_KEYS.contains(&key.as_str()) {
102            warnings.push(AdvisoryWarning::new(
103                "config.unknown_top_level_key",
104                "$",
105                format!("Unknown top-level key '{key}' will be ignored"),
106            ));
107        }
108    }
109
110    warnings
111}
112
113/// Validate a configuration and return advisory warnings.
114///
115/// This does NOT fail on issues - it only collects warnings that
116/// callers can choose to display or log.
117pub fn validate(cfg: &AgenticConfig) -> Vec<AdvisoryWarning> {
118    let mut warnings = vec![];
119
120    // Validate service URLs
121    validate_url(
122        &cfg.services.anthropic.base_url,
123        "services.anthropic.base_url",
124        "services.anthropic.base_url.invalid",
125        &mut warnings,
126    );
127
128    validate_url(
129        &cfg.services.exa.base_url,
130        "services.exa.base_url",
131        "services.exa.base_url.invalid",
132        &mut warnings,
133    );
134
135    // Validate log level
136    let valid_levels = ["trace", "debug", "info", "warn", "error"];
137    if !valid_levels.contains(&cfg.logging.level.to_lowercase().as_str()) {
138        warnings.push(AdvisoryWarning {
139            code: "logging.level.invalid",
140            path: "logging.level",
141            message: format!(
142                "Unknown log level '{}'. Expected one of: {}",
143                cfg.logging.level,
144                valid_levels.join(", ")
145            ),
146        });
147    }
148
149    // Validate subagents model values are not empty
150    if cfg.subagents.locator_model.trim().is_empty() {
151        warnings.push(AdvisoryWarning::new(
152            "subagents.locator_model.empty",
153            "subagents.locator_model",
154            "value is empty",
155        ));
156    }
157    if cfg.subagents.analyzer_model.trim().is_empty() {
158        warnings.push(AdvisoryWarning::new(
159            "subagents.analyzer_model.empty",
160            "subagents.analyzer_model",
161            "value is empty",
162        ));
163    }
164
165    // Validate reasoning model values are not empty
166    if cfg.reasoning.optimizer_model.trim().is_empty() {
167        warnings.push(AdvisoryWarning::new(
168            "reasoning.optimizer_model.empty",
169            "reasoning.optimizer_model",
170            "value is empty",
171        ));
172    }
173    if cfg.reasoning.executor_model.trim().is_empty() {
174        warnings.push(AdvisoryWarning::new(
175            "reasoning.executor_model.empty",
176            "reasoning.executor_model",
177            "value is empty",
178        ));
179    }
180
181    // Validate OpenRouter format for reasoning models (should contain '/')
182    if !cfg.reasoning.optimizer_model.trim().is_empty()
183        && !cfg.reasoning.optimizer_model.contains('/')
184    {
185        warnings.push(AdvisoryWarning::new(
186            "reasoning.optimizer_model.format",
187            "reasoning.optimizer_model",
188            "expected OpenRouter format like `anthropic/claude-sonnet-4.6`",
189        ));
190    }
191
192    if !cfg.reasoning.executor_model.trim().is_empty()
193        && !cfg.reasoning.executor_model.contains('/')
194    {
195        warnings.push(AdvisoryWarning::new(
196            "reasoning.executor_model.format",
197            "reasoning.executor_model",
198            "expected OpenRouter format like `openai/gpt-5.2`",
199        ));
200    } else if !cfg.reasoning.executor_model.trim().is_empty()
201        && !cfg
202            .reasoning
203            .executor_model
204            .to_lowercase()
205            .contains("gpt-5")
206    {
207        warnings.push(AdvisoryWarning::new(
208            "reasoning.executor_model.suspicious",
209            "reasoning.executor_model",
210            "executor_model does not look like a GPT-5 model; reasoning_effort may not work",
211        ));
212    }
213
214    // Validate reasoning_effort enum
215    if let Some(eff) = cfg.reasoning.reasoning_effort.as_deref() {
216        let eff_lc = eff.trim().to_lowercase();
217        if !matches!(eff_lc.as_str(), "low" | "medium" | "high" | "xhigh") {
218            warnings.push(AdvisoryWarning::new(
219                "reasoning.reasoning_effort.invalid",
220                "reasoning.reasoning_effort",
221                "expected one of: low, medium, high, xhigh",
222            ));
223        }
224    }
225
226    if cfg
227        .reasoning
228        .executor_model
229        .to_lowercase()
230        .contains("gpt-5.2")
231        && let Some(n) = cfg.reasoning.max_completion_tokens
232        && n > GPT5_2_COMPLETION_TOKENS_DOC_MAX
233    {
234        warnings.push(AdvisoryWarning::new(
235            "reasoning.max_completion_tokens.exceeds_doc",
236            "reasoning.max_completion_tokens",
237            format!(
238                "max_completion_tokens={n} exceeds documented GPT-5.2 ceiling {GPT5_2_COMPLETION_TOKENS_DOC_MAX}; request may be rejected or truncate unexpectedly (warn-only; not clamped)."
239            ),
240        ));
241    }
242
243    if let Some(n) = cfg.reasoning.max_input_tokens
244        && n > 250_000
245    {
246        warnings.push(AdvisoryWarning::new(
247            "reasoning.max_input_tokens.suspicious",
248            "reasoning.max_input_tokens",
249            format!(
250                "max_input_tokens={n} exceeds the tool's default prompt cap (250000); ensure executor model supports this context size (warn-only)."
251            ),
252        ));
253    }
254
255    // Validate orchestrator.compaction_threshold is in (0,1]
256    if !(0.0..=1.0).contains(&cfg.orchestrator.compaction_threshold) {
257        warnings.push(AdvisoryWarning::new(
258            "orchestrator.compaction_threshold.out_of_range",
259            "orchestrator.compaction_threshold",
260            "expected a value between 0.0 and 1.0",
261        ));
262    }
263
264    validate_command_entries(
265        &cfg.orchestrator.commands.allow,
266        "orchestrator.commands.allow",
267        &mut warnings,
268    );
269    validate_command_entries(
270        &cfg.orchestrator.commands.deny,
271        "orchestrator.commands.deny",
272        &mut warnings,
273    );
274    validate_command_overlap(cfg, &mut warnings);
275
276    // Validate web_retrieval: default_search_results <= max_search_results
277    if cfg.web_retrieval.default_search_results > cfg.web_retrieval.max_search_results {
278        warnings.push(AdvisoryWarning::new(
279            "web_retrieval.default_exceeds_max",
280            "web_retrieval.default_search_results",
281            "default_search_results exceeds max_search_results",
282        ));
283    }
284
285    // Validate web_retrieval.summarizer.model is not empty
286    if cfg.web_retrieval.summarizer.model.trim().is_empty() {
287        warnings.push(AdvisoryWarning::new(
288            "web_retrieval.summarizer.model.empty",
289            "web_retrieval.summarizer.model",
290            "value is empty",
291        ));
292    }
293
294    // Validate cli_tools.max_depth is reasonable
295    if cfg.cli_tools.max_depth == 0 {
296        warnings.push(AdvisoryWarning::new(
297            "cli_tools.max_depth.zero",
298            "cli_tools.max_depth",
299            "max_depth is 0, directory listing may be limited",
300        ));
301    }
302
303    warnings
304}
305
306fn validate_command_entries(
307    entries: &[String],
308    path: &'static str,
309    warnings: &mut Vec<AdvisoryWarning>,
310) {
311    let mut seen = BTreeSet::new();
312    let mut duplicates = BTreeSet::new();
313
314    for entry in entries {
315        let trimmed = entry.trim();
316
317        if trimmed.is_empty() {
318            warnings.push(AdvisoryWarning::new(
319                if path.ends_with("allow") {
320                    "orchestrator.commands.allow.empty_entry"
321                } else {
322                    "orchestrator.commands.deny.empty_entry"
323                },
324                path,
325                format!("entry {entry:?} becomes empty after trimming"),
326            ));
327            continue;
328        }
329
330        if trimmed != entry {
331            warnings.push(AdvisoryWarning::new(
332                if path.ends_with("allow") {
333                    "orchestrator.commands.allow.trimmed_entry"
334                } else {
335                    "orchestrator.commands.deny.trimmed_entry"
336                },
337                path,
338                format!(
339                    "entry {entry:?} has surrounding whitespace; effective value is {trimmed:?}"
340                ),
341            ));
342        }
343
344        if !seen.insert(trimmed.to_string()) {
345            duplicates.insert(trimmed.to_string());
346        }
347    }
348
349    if !duplicates.is_empty() {
350        let duplicates = duplicates.into_iter().collect::<Vec<_>>().join(", ");
351        warnings.push(AdvisoryWarning::new(
352            if path.ends_with("allow") {
353                "orchestrator.commands.allow.duplicate"
354            } else {
355                "orchestrator.commands.deny.duplicate"
356            },
357            path,
358            format!("duplicate command entries after trimming: {duplicates}"),
359        ));
360    }
361}
362
363fn validate_command_overlap(cfg: &AgenticConfig, warnings: &mut Vec<AdvisoryWarning>) {
364    let allow = cfg
365        .orchestrator
366        .commands
367        .allow
368        .iter()
369        .map(|entry| entry.trim())
370        .filter(|entry| !entry.is_empty())
371        .map(str::to_string)
372        .collect::<BTreeSet<_>>();
373    let deny = cfg
374        .orchestrator
375        .commands
376        .deny
377        .iter()
378        .map(|entry| entry.trim())
379        .filter(|entry| !entry.is_empty())
380        .map(str::to_string)
381        .collect::<BTreeSet<_>>();
382
383    let overlap = allow.intersection(&deny).cloned().collect::<Vec<_>>();
384    if overlap.is_empty() {
385        return;
386    }
387
388    warnings.push(AdvisoryWarning::new(
389        "orchestrator.commands.overlap",
390        "orchestrator.commands",
391        format!(
392            "commands appear in both allow and deny: {}. deny wins at runtime",
393            overlap.join(", ")
394        ),
395    ));
396}
397
398fn validate_url(
399    url: &str,
400    path: &'static str,
401    code: &'static str,
402    warnings: &mut Vec<AdvisoryWarning>,
403) {
404    if !url.starts_with("http://") && !url.starts_with("https://") {
405        warnings.push(AdvisoryWarning {
406            code,
407            path,
408            message: format!("Expected an http(s) URL, got: '{url}'"),
409        });
410    }
411}
412
413#[cfg(test)]
414mod tests {
415    use super::*;
416
417    #[test]
418    fn test_default_config_has_no_warnings() {
419        let config = AgenticConfig::default();
420        let warnings = validate(&config);
421        assert!(
422            warnings.is_empty(),
423            "Default config should have no warnings: {warnings:?}"
424        );
425    }
426
427    #[test]
428    fn test_invalid_anthropic_url_warns() {
429        let mut config = AgenticConfig::default();
430        config.services.anthropic.base_url = "not-a-url".into();
431
432        let warnings = validate(&config);
433        assert_eq!(warnings.len(), 1);
434        assert_eq!(warnings[0].code, "services.anthropic.base_url.invalid");
435    }
436
437    #[test]
438    fn test_invalid_log_level_warns() {
439        let mut config = AgenticConfig::default();
440        config.logging.level = "verbose".into();
441
442        let warnings = validate(&config);
443        assert!(warnings.iter().any(|w| w.code == "logging.level.invalid"));
444    }
445
446    #[test]
447    fn test_warning_display() {
448        let warning = AdvisoryWarning {
449            code: "test.code",
450            path: "test.path",
451            message: "Test message".into(),
452        };
453        let display = format!("{warning}");
454        assert_eq!(display, "[test.code] test.path: Test message");
455    }
456
457    #[test]
458    fn test_empty_subagent_model_warns() {
459        let mut config = AgenticConfig::default();
460        config.subagents.locator_model = String::new();
461
462        let warnings = validate(&config);
463        assert!(
464            warnings
465                .iter()
466                .any(|w| w.code == "subagents.locator_model.empty")
467        );
468    }
469
470    #[test]
471    fn test_reasoning_optimizer_model_format_warns() {
472        let mut config = AgenticConfig::default();
473        config.reasoning.optimizer_model = "claude-sonnet-4.6".into(); // Missing provider prefix
474
475        let warnings = validate(&config);
476        assert!(
477            warnings
478                .iter()
479                .any(|w| w.code == "reasoning.optimizer_model.format")
480        );
481    }
482
483    #[test]
484    fn test_reasoning_executor_model_suspicious_warns() {
485        let mut config = AgenticConfig::default();
486        config.reasoning.executor_model = "anthropic/claude-sonnet-4.6".into(); // Not GPT-5
487
488        let warnings = validate(&config);
489        assert!(
490            warnings
491                .iter()
492                .any(|w| w.code == "reasoning.executor_model.suspicious")
493        );
494    }
495
496    #[test]
497    fn test_reasoning_effort_invalid_warns() {
498        let mut config = AgenticConfig::default();
499        config.reasoning.reasoning_effort = Some("extreme".into()); // Invalid value
500
501        let warnings = validate(&config);
502        assert!(
503            warnings
504                .iter()
505                .any(|w| w.code == "reasoning.reasoning_effort.invalid")
506        );
507    }
508
509    #[test]
510    fn test_reasoning_effort_valid_no_warning() {
511        let mut config = AgenticConfig::default();
512        config.reasoning.reasoning_effort = Some("high".into());
513
514        let warnings = validate(&config);
515        assert!(
516            !warnings
517                .iter()
518                .any(|w| w.code == "reasoning.reasoning_effort.invalid")
519        );
520    }
521
522    #[test]
523    fn test_orchestrator_compaction_threshold_out_of_range() {
524        let mut config = AgenticConfig::default();
525        config.orchestrator.compaction_threshold = 1.5; // Invalid
526
527        let warnings = validate(&config);
528        assert!(
529            warnings
530                .iter()
531                .any(|w| w.code == "orchestrator.compaction_threshold.out_of_range")
532        );
533    }
534
535    #[test]
536    fn test_orchestrator_allow_empty_entry_warns() {
537        let mut config = AgenticConfig::default();
538        config.orchestrator.commands.allow = vec!["   ".into()];
539
540        let warnings = validate(&config);
541        let warning = warnings
542            .iter()
543            .find(|w| w.code == "orchestrator.commands.allow.empty_entry")
544            .expect("empty allow warning expected");
545
546        assert_eq!(warning.path, "orchestrator.commands.allow");
547        assert!(warning.message.contains("becomes empty after trimming"));
548    }
549
550    #[test]
551    fn test_orchestrator_deny_trimmed_entry_warns() {
552        let mut config = AgenticConfig::default();
553        config.orchestrator.commands.deny = vec!["  plan  ".into()];
554
555        let warnings = validate(&config);
556        let warning = warnings
557            .iter()
558            .find(|w| w.code == "orchestrator.commands.deny.trimmed_entry")
559            .expect("trimmed deny warning expected");
560
561        assert_eq!(warning.path, "orchestrator.commands.deny");
562        assert!(warning.message.contains("effective value is \"plan\""));
563    }
564
565    #[test]
566    fn test_orchestrator_allow_duplicate_warns() {
567        let mut config = AgenticConfig::default();
568        config.orchestrator.commands.allow = vec!["plan".into(), " plan ".into()];
569
570        let warnings = validate(&config);
571        let warning = warnings
572            .iter()
573            .find(|w| w.code == "orchestrator.commands.allow.duplicate")
574            .expect("duplicate allow warning expected");
575
576        assert_eq!(warning.path, "orchestrator.commands.allow");
577        assert!(warning.message.contains("plan"));
578    }
579
580    #[test]
581    fn test_orchestrator_command_overlap_warns_with_deny_wins_message() {
582        let mut config = AgenticConfig::default();
583        config.orchestrator.commands.allow = vec!["plan".into()];
584        config.orchestrator.commands.deny = vec![" plan ".into()];
585
586        let warnings = validate(&config);
587        let warning = warnings
588            .iter()
589            .find(|w| w.code == "orchestrator.commands.overlap")
590            .expect("overlap warning expected");
591
592        assert_eq!(warning.path, "orchestrator.commands");
593        assert!(warning.message.contains("plan"));
594        assert!(warning.message.contains("deny wins at runtime"));
595    }
596
597    #[test]
598    fn test_web_retrieval_default_exceeds_max() {
599        let mut config = AgenticConfig::default();
600        config.web_retrieval.default_search_results = 100;
601        config.web_retrieval.max_search_results = 20;
602
603        let warnings = validate(&config);
604        assert!(
605            warnings
606                .iter()
607                .any(|w| w.code == "web_retrieval.default_exceeds_max")
608        );
609    }
610
611    #[test]
612    fn test_detect_deprecated_thoughts_toml() {
613        let toml_val: toml::Value = toml::from_str(
614            r"
615[thoughts]
616mount_dirs = {}
617",
618        )
619        .unwrap();
620
621        let warnings = detect_deprecated_keys_toml(&toml_val);
622        assert!(
623            warnings
624                .iter()
625                .any(|w| w.code == "config.deprecated.thoughts")
626        );
627    }
628
629    #[test]
630    fn test_detect_deprecated_reasoning_token_limit_toml_is_silent() {
631        let toml_val: toml::Value = toml::from_str(
632            r"
633[reasoning]
634token_limit = 12345
635",
636        )
637        .unwrap();
638
639        let warnings = detect_deprecated_keys_toml(&toml_val);
640        assert!(warnings.is_empty());
641    }
642
643    #[test]
644    fn test_reasoning_max_completion_tokens_above_doc_max_warns() {
645        let mut config = AgenticConfig::default();
646        config.reasoning.max_completion_tokens = Some(128_001);
647
648        let warnings = validate(&config);
649        assert!(
650            warnings
651                .iter()
652                .any(|w| w.code == "reasoning.max_completion_tokens.exceeds_doc")
653        );
654    }
655
656    #[test]
657    fn test_reasoning_max_input_tokens_above_default_cap_warns() {
658        let mut config = AgenticConfig::default();
659        config.reasoning.max_input_tokens = Some(250_001);
660
661        let warnings = validate(&config);
662        assert!(
663            warnings
664                .iter()
665                .any(|w| w.code == "reasoning.max_input_tokens.suspicious")
666        );
667    }
668}