Skip to main content

ralph_workflow/config/validation/
mod.rs

1//! Configuration validation and error reporting.
2//!
3//! This module provides validation for configuration files with:
4//! - TOML syntax validation
5//! - Type checking (expected vs actual types)
6//! - Unknown key detection with typo suggestions (Levenshtein distance)
7//! - Multi-file error aggregation
8//! - User-friendly error messages
9//!
10//! ## Architecture
11//!
12//! The validation process follows these steps:
13//! 1. Parse TOML syntax → `ConfigValidationError::TomlSyntax` on failure
14//! 2. Detect unknown/deprecated keys → `ConfigValidationError::UnknownKey` + warnings
15//! 3. Validate types against schema → `ConfigValidationError::InvalidValue` on mismatch
16//!
17//! ## Modules
18//!
19//! - `levenshtein`: String distance calculation for typo suggestions
20//! - `keys`: Valid configuration key definitions
21//! - `key_detection`: TOML structure traversal for unknown key detection
22//! - `error_formatting`: User-friendly error message generation
23
24use std::path::{Path, PathBuf};
25use thiserror::Error;
26
27mod error_formatting;
28mod key_detection;
29mod keys;
30mod levenshtein;
31
32// Re-export public API
33pub use levenshtein::suggest_key;
34
35/// Configuration validation error.
36#[derive(Debug, Clone, Error)]
37pub enum ConfigValidationError {
38    #[error("TOML syntax error in {file}: {error}")]
39    TomlSyntax {
40        file: PathBuf,
41        error: toml::de::Error,
42    },
43
44    #[error("Invalid value in {file} at '{key}': {message}")]
45    InvalidValue {
46        file: PathBuf,
47        key: String,
48        message: String,
49    },
50
51    #[error("Unknown key in {file}: '{key}'")]
52    UnknownKey {
53        file: PathBuf,
54        key: String,
55        suggestion: Option<String>,
56    },
57}
58
59/// Result of config validation.
60/// On success: Ok(warnings) where warnings is a Vec<String> of deprecation warnings
61/// On failure: Err(errors) where errors is a Vec<ConfigValidationError>
62pub type ValidationResult = Result<Vec<String>, Vec<ConfigValidationError>>;
63
64/// Validate a config file and collect errors and warnings.
65///
66/// This validates:
67/// - TOML syntax
68/// - Type checking against `UnifiedConfig` schema
69/// - Unknown keys with typo suggestions
70/// - Deprecated keys (returns as warnings, not errors)
71///
72/// Returns Ok((warnings)) on success with optional deprecation warnings,
73/// or Err(errors) on validation failure.
74///
75/// # Errors
76///
77/// Returns error if the operation fails.
78pub fn validate_config_file(
79    path: &Path,
80    content: &str,
81) -> Result<Vec<String>, Vec<ConfigValidationError>> {
82    // Step 1: Validate TOML syntax and parse to generic Value for unknown key detection
83    let parsed_value: toml::Value = toml::from_str(content).map_err(|e| {
84        vec![ConfigValidationError::TomlSyntax {
85            file: path.to_path_buf(),
86            error: e,
87        }]
88    })?;
89
90    // Step 2: Detect unknown and deprecated keys by walking the TOML structure
91    // This is necessary because #[serde(default)] causes serde to silently ignore unknown fields
92    let (unknown_keys, deprecated_keys) =
93        key_detection::detect_unknown_and_deprecated_keys(&parsed_value);
94
95    // Collect unknown keys as errors using iterator
96    let valid_keys = keys::get_valid_config_keys();
97    let unknown_errors: Vec<ConfigValidationError> = unknown_keys
98        .iter()
99        .map(|(key, location)| ConfigValidationError::UnknownKey {
100            file: path.to_path_buf(),
101            key: format!("{location}{key}"),
102            suggestion: levenshtein::suggest_key(key, &valid_keys),
103        })
104        .collect();
105
106    // Collect deprecated keys as warnings using iterator
107    let deprecation_warnings: Vec<String> = deprecated_keys
108        .iter()
109        .map(|(key, location)| {
110            let full_key = format!("{location}{key}");
111            format!(
112                "Deprecated key '{}' in {} - this key is no longer used and can be safely removed",
113                full_key,
114                path.display()
115            )
116        })
117        .collect();
118
119    // Step 3: Validate against UnifiedConfig schema for type checking
120    // Unknown keys won't cause deserialization to fail due to #[serde(default)],
121    // but we've already detected them in Step 2
122    match toml::from_str::<crate::config::unified::UnifiedConfig>(content) {
123        Ok(config) => {
124            // Check for agent_chain vs agent_chains confusion
125            let has_agent_chain = parsed_value.get("agent_chain").is_some();
126            let agent_chain_error = has_agent_chain
127                .then_some(!config.agent_drains.is_empty() && config.agent_chains.is_empty())
128                .and_then(|cond| cond.then_some(ConfigValidationError::InvalidValue {
129                    file: path.to_path_buf(),
130                    key: "agent_chain".to_string(),
131                    message: "found [agent_drains] with singular [agent_chain]; did you mean [agent_chains]? Move retry/backoff settings to [general] (max_retries, retry_delay_ms, backoff_multiplier, max_backoff_ms, max_cycles)".to_string(),
132                }));
133
134            let agent_chain_warning = has_agent_chain
135                .then_some(config.agent_drains.is_empty() || config.agent_chains.is_empty())
136                .and_then(|cond| cond.then_some(format!(
137                    "Deprecated section '[agent_chain]' in {} - Ralph will keep legacy role-keyed behavior by adding the default drain bindings automatically. Migrate agent lists to [agent_chains]/[agent_drains] and move retry/backoff settings to [general]",
138                    path.display()
139                )));
140
141            let has_named_chains = !config.agent_chains.is_empty();
142            let has_named_drains = !config.agent_drains.is_empty();
143            let has_legacy_role_bindings = config
144                .agent_chain
145                .as_ref()
146                .is_some_and(crate::agents::fallback::FallbackConfig::uses_legacy_role_schema);
147            let validate_named_schema_now = (!has_named_chains && !has_named_drains)
148                || (has_named_chains && has_named_drains)
149                || has_legacy_role_bindings;
150
151            let resolve_error: Option<ConfigValidationError> = validate_named_schema_now
152                .then(|| config.resolve_agent_drains_checked())
153                .and_then(|result| result.err())
154                .map(|message| {
155                    let message_string = message.to_string();
156                    let key = if message_string.contains("references unknown chain") {
157                        message_string
158                            .split_whitespace()
159                            .next()
160                            .map_or_else(|| "agent_drains".to_string(), ToString::to_string)
161                    } else if message_string.contains("agent_chain") {
162                        "agent_chain".to_string()
163                    } else {
164                        "agent_drains".to_string()
165                    };
166                    ConfigValidationError::InvalidValue {
167                        file: path.to_path_buf(),
168                        key,
169                        message: message_string,
170                    }
171                });
172
173            // Combine all errors
174            let schema_errors: Vec<ConfigValidationError> = [agent_chain_error, resolve_error]
175                .into_iter()
176                .flatten()
177                .collect();
178
179            // Combine all warnings
180            let schema_warnings: Vec<String> = agent_chain_warning.into_iter().collect();
181
182            let all_errors: Vec<_> = unknown_errors.into_iter().chain(schema_errors).collect();
183
184            let all_warnings: Vec<_> = deprecation_warnings
185                .into_iter()
186                .chain(schema_warnings)
187                .collect();
188
189            if all_errors.is_empty() {
190                Ok(all_warnings)
191            } else {
192                Err(all_errors)
193            }
194        }
195        Err(e) => {
196            // TOML is syntactically valid but doesn't match our schema
197            // This could be a type error or missing required field
198            let error_str = e.to_string();
199
200            // Build schema errors based on error type
201            let schema_error: Option<ConfigValidationError> =
202                if error_str.contains("missing field") || error_str.contains("invalid type") {
203                    Some(ConfigValidationError::InvalidValue {
204                        file: path.to_path_buf(),
205                        key: error_formatting::extract_key_from_toml_error(&error_str),
206                        message: error_formatting::format_invalid_type_message(&error_str),
207                    })
208                } else {
209                    Some(ConfigValidationError::InvalidValue {
210                        file: path.to_path_buf(),
211                        key: "config".to_string(),
212                        message: error_str,
213                    })
214                };
215
216            let all_errors: Vec<_> = unknown_errors.into_iter().chain(schema_error).collect();
217
218            let all_warnings: Vec<_> = deprecation_warnings.into_iter().collect();
219
220            if all_errors.is_empty() {
221                Ok(all_warnings)
222            } else {
223                Err(all_errors)
224            }
225        }
226    }
227}
228
229/// Format validation errors for user display.
230#[must_use]
231pub fn format_validation_errors(errors: &[ConfigValidationError]) -> String {
232    errors
233        .iter()
234        .map(|error| {
235            let error_line = format!("  {error}");
236            if let ConfigValidationError::UnknownKey {
237                suggestion: Some(s),
238                ..
239            } = error
240            {
241                format!("{error_line}\n    Did you mean '{s}'?")
242            } else {
243                error_line
244            }
245        })
246        .collect::<Vec<_>>()
247        .join("\n")
248}
249
250#[cfg(test)]
251mod tests {
252    use super::*;
253
254    #[test]
255    fn test_validate_config_file_valid_toml() {
256        let content = r"
257[general]
258verbosity = 2
259developer_iters = 5
260max_retries = 4
261retry_delay_ms = 1500
262";
263        let result = validate_config_file(Path::new("test.toml"), content);
264        assert!(result.is_ok());
265    }
266
267    #[test]
268    fn test_validate_config_file_warns_for_legacy_agent_chain_with_migration_message() {
269        let content = r#"
270[agent_chain]
271developer = ["codex"]
272max_retries = 5
273retry_delay_ms = 2000
274"#;
275
276        let result = validate_config_file(Path::new("test.toml"), content);
277        assert!(
278            result.is_ok(),
279            "legacy agent_chain should remain compatible"
280        );
281
282        let warnings = result.expect("validation should succeed with warnings");
283        assert!(
284            warnings
285                .iter()
286                .any(|warning| warning.contains("Deprecated section '[agent_chain]'")),
287            "expected legacy migration warning, got: {warnings:?}"
288        );
289    }
290
291    #[test]
292    fn test_validate_config_file_invalid_toml() {
293        let content = r"
294[general
295verbosity = 2
296";
297        let result = validate_config_file(Path::new("test.toml"), content);
298        assert!(result.is_err());
299
300        if let Err(errors) = result {
301            assert_eq!(errors.len(), 1);
302            match &errors[0] {
303                ConfigValidationError::TomlSyntax { file, .. } => {
304                    assert_eq!(file, Path::new("test.toml"));
305                }
306                _ => panic!("Expected TomlSyntax error"),
307            }
308        }
309    }
310
311    #[test]
312    fn test_format_validation_errors_with_suggestion() {
313        let errors = vec![ConfigValidationError::UnknownKey {
314            file: PathBuf::from("test.toml"),
315            key: "develper_iters".to_string(),
316            suggestion: Some("developer_iters".to_string()),
317        }];
318
319        let formatted = format_validation_errors(&errors);
320        assert!(formatted.contains("develper_iters"));
321        assert!(formatted.contains("Did you mean 'developer_iters'?"));
322    }
323
324    #[test]
325    fn test_format_validation_errors_without_suggestion() {
326        let errors = vec![ConfigValidationError::UnknownKey {
327            file: PathBuf::from("test.toml"),
328            key: "completely_unknown".to_string(),
329            suggestion: None,
330        }];
331
332        let formatted = format_validation_errors(&errors);
333        assert!(formatted.contains("completely_unknown"));
334        assert!(!formatted.contains("Did you mean"));
335    }
336
337    #[test]
338    fn test_format_validation_errors_multiple() {
339        // Create a real TOML parse error
340        let toml_error = toml::from_str::<toml::Value>("[invalid\nkey = value").unwrap_err();
341
342        let errors = vec![
343            ConfigValidationError::TomlSyntax {
344                file: PathBuf::from("global.toml"),
345                error: toml_error,
346            },
347            ConfigValidationError::UnknownKey {
348                file: PathBuf::from("local.toml"),
349                key: "bad_key".to_string(),
350                suggestion: Some("good_key".to_string()),
351            },
352        ];
353
354        let formatted = format_validation_errors(&errors);
355        assert!(formatted.contains("global.toml"));
356        assert!(formatted.contains("local.toml"));
357        assert!(formatted.contains("Did you mean 'good_key'?"));
358    }
359
360    #[test]
361    fn test_validate_config_file_unknown_key() {
362        let content = r"
363[general]
364develper_iters = 5
365verbosity = 2
366";
367        let result = validate_config_file(Path::new("test.toml"), content);
368        // Unknown keys are now detected via custom validation
369        assert!(result.is_err());
370
371        if let Err(errors) = result {
372            assert_eq!(errors.len(), 1);
373            match &errors[0] {
374                ConfigValidationError::UnknownKey {
375                    key, suggestion, ..
376                } => {
377                    assert!(key.contains("develper_iters"));
378                    assert_eq!(suggestion.as_ref().unwrap(), "developer_iters");
379                }
380                _ => panic!("Expected UnknownKey error"),
381            }
382        }
383    }
384
385    #[test]
386    fn test_validate_config_file_invalid_type() {
387        // This test verifies that type errors during deserialization are caught.
388        // When a string is provided where an integer is expected, validation should fail.
389        let content = r#"
390[general]
391developer_iters = "five"
392"#;
393        let result = validate_config_file(Path::new("test.toml"), content);
394        assert!(result.is_err(), "Should fail with string instead of int");
395    }
396
397    #[test]
398    fn test_validate_config_file_valid_with_all_sections() {
399        let content = r#"
400[general]
401verbosity = 2
402developer_iters = 5
403reviewer_reviews = 2
404
405[ccs]
406output_flag = "--output=json"
407
408[agents.claude]
409cmd = "claude"
410
411[ccs_aliases]
412work = "ccs work"
413"#;
414        let result = validate_config_file(Path::new("test.toml"), content);
415        assert!(result.is_ok(), "Valid config with all sections should pass");
416    }
417
418    #[test]
419    fn test_validate_config_file_empty_file() {
420        let content = "";
421        let result = validate_config_file(Path::new("test.toml"), content);
422        assert!(result.is_ok(), "Empty file should use default values");
423    }
424
425    #[test]
426    fn test_validate_general_retry_keys() {
427        let content = r#"
428[general]
429developer_iters = 5
430max_retries = 5
431retry_delay_ms = 2000
432backoff_multiplier = 2.5
433max_backoff_ms = 120000
434max_cycles = 5
435
436[agent_chains]
437shared_dev = ["claude", "codex"]
438shared_review = ["claude"]
439
440[agent_drains]
441planning = "shared_dev"
442development = "shared_dev"
443analysis = "shared_dev"
444review = "shared_review"
445fix = "shared_review"
446commit = "shared_review"
447"#;
448        let result = validate_config_file(Path::new("test.toml"), content);
449        assert!(result.is_ok(), "general retry/backoff keys should be valid");
450    }
451
452    #[test]
453    fn test_validate_general_provider_fallback_key() {
454        let content = r#"
455[general]
456
457[general.provider_fallback]
458opencode = ["-m opencode/glm-4.7-free"]
459"#;
460        let result = validate_config_file(Path::new("test.toml"), content);
461        assert!(result.is_ok(), "general.provider_fallback should be valid");
462    }
463
464    #[test]
465    fn test_validate_agent_chain_with_all_valid_keys() {
466        // Legacy [agent_chain] remains accepted with a warning for compatibility.
467        let content = r#"
468[general]
469developer_iters = 5
470
471[agent_chain]
472developer = ["claude", "codex"]
473reviewer = ["claude"]
474commit = ["claude"]
475analysis = ["claude"]
476max_retries = 5
477retry_delay_ms = 2000
478backoff_multiplier = 2.5
479max_backoff_ms = 120000
480max_cycles = 5
481
482[agent_chain.provider_fallback]
483opencode = ["-m opencode/glm-4.7-free", "-m opencode/claude-sonnet-4"]
484"#;
485        let result = validate_config_file(Path::new("test.toml"), content);
486        assert!(result.is_ok(), "legacy agent_chain should remain valid");
487    }
488
489    #[test]
490    fn test_validate_agent_chain_commit_key() {
491        // The commit key was missing from VALID_AGENT_CHAIN_KEYS
492        let content = r#"
493[agent_chain]
494developer = ["claude"]
495commit = ["claude"]
496"#;
497        let result = validate_config_file(Path::new("test.toml"), content);
498        assert!(result.is_ok(), "commit key should be valid in agent_chain");
499    }
500
501    #[test]
502    fn test_validate_agent_chain_analysis_key() {
503        // The analysis key was missing from VALID_AGENT_CHAIN_KEYS
504        let content = r#"
505[agent_chain]
506developer = ["claude"]
507analysis = ["claude"]
508"#;
509        let result = validate_config_file(Path::new("test.toml"), content);
510        assert!(
511            result.is_ok(),
512            "analysis key should be valid in agent_chain"
513        );
514    }
515
516    #[test]
517    fn test_validate_agent_chain_retry_keys() {
518        // These retry/backoff keys were missing from VALID_AGENT_CHAIN_KEYS
519        let content = r#"
520[agent_chain]
521developer = ["claude"]
522max_retries = 3
523retry_delay_ms = 5000
524backoff_multiplier = 1.5
525max_backoff_ms = 30000
526max_cycles = 2
527"#;
528        let result = validate_config_file(Path::new("test.toml"), content);
529        assert!(
530            result.is_ok(),
531            "retry/backoff keys should be valid in agent_chain"
532        );
533    }
534
535    #[test]
536    fn test_validate_agent_chain_provider_fallback_key() {
537        // The provider_fallback nested table was missing from VALID_AGENT_CHAIN_KEYS
538        let content = r#"
539[agent_chain]
540developer = ["opencode"]
541
542[agent_chain.provider_fallback]
543opencode = ["-m opencode/glm-4.7-free", "-m opencode/claude-sonnet-4"]
544"#;
545        let result = validate_config_file(Path::new("test.toml"), content);
546        assert!(
547            result.is_ok(),
548            "provider_fallback nested table should be valid in agent_chain"
549        );
550    }
551
552    #[test]
553    fn test_validate_config_file_deprecated_key_warning() {
554        let content = r"
555[general]
556verbosity = 2
557auto_rebase = true
558max_recovery_attempts = 3
559";
560        let result = validate_config_file(Path::new("test.toml"), content);
561        assert!(result.is_ok(), "Deprecated keys should not cause errors");
562
563        if let Ok(warnings) = result {
564            assert_eq!(warnings.len(), 2, "Should have 2 deprecation warnings");
565            assert!(
566                warnings.iter().any(|w| w.contains("auto_rebase")),
567                "Should warn about auto_rebase"
568            );
569            assert!(
570                warnings.iter().any(|w| w.contains("max_recovery_attempts")),
571                "Should warn about max_recovery_attempts"
572            );
573        }
574    }
575
576    #[test]
577    fn test_validate_config_file_no_warnings_without_deprecated() {
578        let content = r"
579[general]
580verbosity = 2
581developer_iters = 5
582";
583        let result = validate_config_file(Path::new("test.toml"), content);
584        assert!(result.is_ok(), "Valid config should pass");
585
586        if let Ok(warnings) = result {
587            assert_eq!(warnings.len(), 0, "Should have no warnings");
588        }
589    }
590
591    #[test]
592    fn test_validate_config_file_rejects_unknown_agent_drain_binding_target() {
593        let content = r#"
594[agent_chains]
595shared_dev = ["codex"]
596
597[agent_drains]
598planning = "missing_chain"
599"#;
600
601        let result = validate_config_file(Path::new("test.toml"), content);
602        assert!(
603            result.is_err(),
604            "unknown drain binding target should fail validation"
605        );
606
607        let errors = result.expect_err("validation should fail");
608        assert!(
609            errors.iter().any(|error| matches!(
610                error,
611                ConfigValidationError::InvalidValue { key, message, .. }
612                    if key == "agent_drains.planning"
613                        && message.contains("missing_chain")
614            )),
615            "expected invalid drain binding error, got: {errors:?}"
616        );
617    }
618
619    #[test]
620    fn test_validate_config_file_rejects_mixed_legacy_and_named_chain_schema() {
621        let content = r#"
622[agent_chain]
623developer = ["codex"]
624
625[agent_chains]
626shared_dev = ["claude"]
627"#;
628
629        let result = validate_config_file(Path::new("test.toml"), content);
630        assert!(
631            result.is_err(),
632            "mixing legacy and named chain schema should fail validation"
633        );
634
635        let errors = result.expect_err("validation should fail");
636        assert!(
637            errors.iter().any(|error| matches!(
638                error,
639                ConfigValidationError::InvalidValue { key, message, .. }
640                    if key == "agent_chain"
641                        && message.contains("agent_chains")
642                        && message.contains("agent_drains")
643            )),
644            "expected mixed schema error, got: {errors:?}"
645        );
646    }
647
648    #[test]
649    fn test_validate_config_file_rejects_incomplete_named_drain_resolution() {
650        let content = r#"
651[agent_chains]
652shared_review = ["claude"]
653
654[agent_drains]
655review = "shared_review"
656fix = "shared_review"
657"#;
658
659        let result = validate_config_file(Path::new("test.toml"), content);
660        assert!(
661            result.is_err(),
662            "incomplete drain coverage should fail validation"
663        );
664
665        let errors = result.expect_err("validation should fail");
666        assert!(
667            errors.iter().any(|error| matches!(
668                error,
669                ConfigValidationError::InvalidValue { key, message, .. }
670                    if key == "agent_drains"
671                        && message.contains("planning")
672                        && message.contains("development")
673                        && message.contains("analysis")
674            )),
675            "expected incomplete drain coverage error, got: {errors:?}"
676        );
677    }
678
679    #[test]
680    fn test_validate_config_file_accepts_commit_and_analysis_derived_from_bound_drains() {
681        let content = r#"
682[agent_chains]
683shared_dev = ["codex"]
684shared_review = ["claude"]
685
686[agent_drains]
687planning = "shared_dev"
688development = "shared_dev"
689review = "shared_review"
690fix = "shared_review"
691"#;
692
693        let result = validate_config_file(Path::new("test.toml"), content);
694        assert!(
695            result.is_ok(),
696            "commit and analysis should derive from existing bound drains"
697        );
698    }
699}