rumdl_lib/
config.rs

1//!
2//! This module defines configuration structures, loading logic, and provenance tracking for rumdl.
3//! Supports TOML, pyproject.toml, and markdownlint config formats, and provides merging and override logic.
4
5use crate::rule::Rule;
6use crate::rules;
7use lazy_static::lazy_static;
8use log;
9use serde::{Deserialize, Serialize};
10use std::collections::BTreeMap;
11use std::collections::{BTreeSet, HashMap, HashSet};
12use std::fmt;
13use std::fs;
14use std::io;
15use std::path::Path;
16use std::str::FromStr;
17use toml_edit::DocumentMut;
18
19/// Markdown flavor/dialect enumeration
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, schemars::JsonSchema)]
21#[serde(rename_all = "lowercase")]
22pub enum MarkdownFlavor {
23    /// Standard Markdown without flavor-specific adjustments
24    #[serde(rename = "standard", alias = "none", alias = "")]
25    #[default]
26    Standard,
27    /// MkDocs flavor with auto-reference support
28    #[serde(rename = "mkdocs")]
29    MkDocs,
30    // Future flavors can be added here when they have actual implementation differences
31    // Planned: GFM (GitHub Flavored Markdown) - for GitHub-specific features like tables, strikethrough
32    // Planned: CommonMark - for strict CommonMark compliance
33}
34
35impl fmt::Display for MarkdownFlavor {
36    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
37        match self {
38            MarkdownFlavor::Standard => write!(f, "standard"),
39            MarkdownFlavor::MkDocs => write!(f, "mkdocs"),
40        }
41    }
42}
43
44impl FromStr for MarkdownFlavor {
45    type Err = String;
46
47    fn from_str(s: &str) -> Result<Self, Self::Err> {
48        match s.to_lowercase().as_str() {
49            "standard" | "" | "none" => Ok(MarkdownFlavor::Standard),
50            "mkdocs" => Ok(MarkdownFlavor::MkDocs),
51            // Accept but warn about unimplemented flavors
52            "gfm" | "github" => {
53                eprintln!("Warning: GFM flavor not yet implemented, using standard");
54                Ok(MarkdownFlavor::Standard)
55            }
56            "commonmark" => {
57                eprintln!("Warning: CommonMark flavor not yet implemented, using standard");
58                Ok(MarkdownFlavor::Standard)
59            }
60            _ => Err(format!("Unknown markdown flavor: {s}")),
61        }
62    }
63}
64
65lazy_static! {
66    // Map common markdownlint config keys to rumdl rule names
67    static ref MARKDOWNLINT_KEY_MAP: HashMap<&'static str, &'static str> = {
68        let mut m = HashMap::new();
69        // Add mappings based on common markdownlint config names
70        // From https://github.com/DavidAnson/markdownlint/blob/main/schema/.markdownlint.jsonc
71        m.insert("ul-style", "md004");
72        m.insert("code-block-style", "md046");
73        m.insert("ul-indent", "md007"); // Example
74        m.insert("line-length", "md013"); // Example of a common one that might be top-level
75        // Add more mappings as needed based on markdownlint schema or observed usage
76        m
77    };
78}
79
80/// Normalizes configuration keys (rule names, option names) to lowercase kebab-case.
81pub fn normalize_key(key: &str) -> String {
82    // If the key looks like a rule name (e.g., MD013), uppercase it
83    if key.len() == 5 && key.to_ascii_lowercase().starts_with("md") && key[2..].chars().all(|c| c.is_ascii_digit()) {
84        key.to_ascii_uppercase()
85    } else {
86        key.replace('_', "-").to_ascii_lowercase()
87    }
88}
89
90/// Represents a rule-specific configuration
91#[derive(Debug, Serialize, Deserialize, Default, PartialEq, schemars::JsonSchema)]
92pub struct RuleConfig {
93    /// Configuration values for the rule
94    #[serde(flatten)]
95    #[schemars(schema_with = "arbitrary_value_schema")]
96    pub values: BTreeMap<String, toml::Value>,
97}
98
99/// Generate a JSON schema for arbitrary configuration values
100fn arbitrary_value_schema(_gen: &mut schemars::r#gen::SchemaGenerator) -> schemars::schema::Schema {
101    use schemars::schema::*;
102    Schema::Object(SchemaObject {
103        instance_type: Some(InstanceType::Object.into()),
104        object: Some(Box::new(ObjectValidation {
105            additional_properties: Some(Box::new(Schema::Bool(true))),
106            ..Default::default()
107        })),
108        ..Default::default()
109    })
110}
111
112/// Represents the complete configuration loaded from rumdl.toml
113#[derive(Debug, Serialize, Deserialize, Default, PartialEq, schemars::JsonSchema)]
114#[schemars(
115    description = "rumdl configuration for linting Markdown files. Rules can be configured individually using [MD###] sections with rule-specific options."
116)]
117pub struct Config {
118    /// Global configuration options
119    #[serde(default)]
120    pub global: GlobalConfig,
121
122    /// Per-file rule ignores: maps file patterns to lists of rules to ignore
123    /// Example: { "README.md": ["MD033"], "docs/**/*.md": ["MD013"] }
124    #[serde(default, rename = "per-file-ignores")]
125    pub per_file_ignores: HashMap<String, Vec<String>>,
126
127    /// Rule-specific configurations (e.g., MD013, MD007, MD044)
128    /// Each rule section can contain options specific to that rule.
129    ///
130    /// Common examples:
131    /// - MD013: line_length, code_blocks, tables, headings
132    /// - MD007: indent
133    /// - MD003: style ("atx", "atx_closed", "setext")
134    /// - MD044: names (array of proper names to check)
135    ///
136    /// See https://github.com/rvben/rumdl for full rule documentation.
137    #[serde(flatten)]
138    pub rules: BTreeMap<String, RuleConfig>,
139}
140
141impl Config {
142    /// Check if the Markdown flavor is set to MkDocs
143    pub fn is_mkdocs_flavor(&self) -> bool {
144        self.global.flavor == MarkdownFlavor::MkDocs
145    }
146
147    // Future methods for when GFM and CommonMark are implemented:
148    // pub fn is_gfm_flavor(&self) -> bool
149    // pub fn is_commonmark_flavor(&self) -> bool
150
151    /// Get the configured Markdown flavor
152    pub fn markdown_flavor(&self) -> MarkdownFlavor {
153        self.global.flavor
154    }
155
156    /// Legacy method for backwards compatibility - redirects to is_mkdocs_flavor
157    pub fn is_mkdocs_project(&self) -> bool {
158        self.is_mkdocs_flavor()
159    }
160
161    /// Get the set of rules that should be ignored for a specific file based on per-file-ignores configuration
162    /// Returns a HashSet of rule names (uppercase, e.g., "MD033") that match the given file path
163    pub fn get_ignored_rules_for_file(&self, file_path: &Path) -> HashSet<String> {
164        use globset::{Glob, GlobSetBuilder};
165
166        let mut ignored_rules = HashSet::new();
167
168        if self.per_file_ignores.is_empty() {
169            return ignored_rules;
170        }
171
172        // Build a globset for efficient matching
173        let mut builder = GlobSetBuilder::new();
174        let mut pattern_to_rules: Vec<(usize, &Vec<String>)> = Vec::new();
175
176        for (idx, (pattern, rules)) in self.per_file_ignores.iter().enumerate() {
177            if let Ok(glob) = Glob::new(pattern) {
178                builder.add(glob);
179                pattern_to_rules.push((idx, rules));
180            } else {
181                log::warn!("Invalid glob pattern in per-file-ignores: {pattern}");
182            }
183        }
184
185        let globset = match builder.build() {
186            Ok(gs) => gs,
187            Err(e) => {
188                log::error!("Failed to build globset for per-file-ignores: {e}");
189                return ignored_rules;
190            }
191        };
192
193        // Match the file path against all patterns
194        for match_idx in globset.matches(file_path) {
195            if let Some((_, rules)) = pattern_to_rules.get(match_idx) {
196                for rule in rules.iter() {
197                    // Normalize rule names to uppercase (MD033, md033 -> MD033)
198                    ignored_rules.insert(normalize_key(rule));
199                }
200            }
201        }
202
203        ignored_rules
204    }
205}
206
207/// Global configuration options
208#[derive(Debug, Serialize, Deserialize, PartialEq, schemars::JsonSchema)]
209#[serde(default)]
210pub struct GlobalConfig {
211    /// Enabled rules
212    #[serde(default)]
213    pub enable: Vec<String>,
214
215    /// Disabled rules
216    #[serde(default)]
217    pub disable: Vec<String>,
218
219    /// Files to exclude
220    #[serde(default)]
221    pub exclude: Vec<String>,
222
223    /// Files to include
224    #[serde(default)]
225    pub include: Vec<String>,
226
227    /// Respect .gitignore files when scanning directories
228    #[serde(default = "default_respect_gitignore")]
229    pub respect_gitignore: bool,
230
231    /// Global line length setting (used by MD013 and other rules if not overridden)
232    #[serde(default = "default_line_length")]
233    pub line_length: u64,
234
235    /// Output format for linting results (e.g., "text", "json", "pylint", etc.)
236    #[serde(skip_serializing_if = "Option::is_none")]
237    pub output_format: Option<String>,
238
239    /// Rules that are allowed to be fixed when --fix is used
240    /// If specified, only these rules will be fixed
241    #[serde(default)]
242    pub fixable: Vec<String>,
243
244    /// Rules that should never be fixed, even when --fix is used
245    /// Takes precedence over fixable
246    #[serde(default)]
247    pub unfixable: Vec<String>,
248
249    /// Markdown flavor/dialect to use (mkdocs, gfm, commonmark, etc.)
250    /// When set, adjusts parsing and validation rules for that specific Markdown variant
251    #[serde(default)]
252    pub flavor: MarkdownFlavor,
253
254    /// Whether to enforce exclude and extend-exclude patterns even for paths that are passed explicitly.
255    /// By default (false), rumdl will lint any paths passed in directly, even if they would typically be excluded.
256    /// Setting this to true will cause rumdl to respect exclusions unequivocally.
257    /// This is useful for pre-commit, which explicitly passes all changed files.
258    #[serde(default)]
259    pub force_exclude: bool,
260}
261
262fn default_respect_gitignore() -> bool {
263    true
264}
265
266fn default_line_length() -> u64 {
267    80
268}
269
270// Add the Default impl
271impl Default for GlobalConfig {
272    fn default() -> Self {
273        Self {
274            enable: Vec::new(),
275            disable: Vec::new(),
276            exclude: Vec::new(),
277            include: Vec::new(),
278            respect_gitignore: true,
279            line_length: 80,
280            output_format: None,
281            fixable: Vec::new(),
282            unfixable: Vec::new(),
283            flavor: MarkdownFlavor::default(),
284            force_exclude: false,
285        }
286    }
287}
288
289const MARKDOWNLINT_CONFIG_FILES: &[&str] = &[
290    ".markdownlint.json",
291    ".markdownlint.jsonc",
292    ".markdownlint.yaml",
293    ".markdownlint.yml",
294    "markdownlint.json",
295    "markdownlint.jsonc",
296    "markdownlint.yaml",
297    "markdownlint.yml",
298];
299
300/// Create a default configuration file at the specified path
301pub fn create_default_config(path: &str) -> Result<(), ConfigError> {
302    // Check if file already exists
303    if Path::new(path).exists() {
304        return Err(ConfigError::FileExists { path: path.to_string() });
305    }
306
307    // Default configuration content
308    let default_config = r#"# rumdl configuration file
309
310# Global configuration options
311[global]
312# List of rules to disable (uncomment and modify as needed)
313# disable = ["MD013", "MD033"]
314
315# List of rules to enable exclusively (if provided, only these rules will run)
316# enable = ["MD001", "MD003", "MD004"]
317
318# List of file/directory patterns to include for linting (if provided, only these will be linted)
319# include = [
320#    "docs/*.md",
321#    "src/**/*.md",
322#    "README.md"
323# ]
324
325# List of file/directory patterns to exclude from linting
326exclude = [
327    # Common directories to exclude
328    ".git",
329    ".github",
330    "node_modules",
331    "vendor",
332    "dist",
333    "build",
334
335    # Specific files or patterns
336    "CHANGELOG.md",
337    "LICENSE.md",
338]
339
340# Respect .gitignore files when scanning directories (default: true)
341respect_gitignore = true
342
343# Markdown flavor/dialect (uncomment to enable)
344# Options: mkdocs, gfm, commonmark
345# flavor = "mkdocs"
346
347# Rule-specific configurations (uncomment and modify as needed)
348
349# [MD003]
350# style = "atx"  # Heading style (atx, atx_closed, setext)
351
352# [MD004]
353# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
354
355# [MD007]
356# indent = 4  # Unordered list indentation
357
358# [MD013]
359# line_length = 100  # Line length
360# code_blocks = false  # Exclude code blocks from line length check
361# tables = false  # Exclude tables from line length check
362# headings = true  # Include headings in line length check
363
364# [MD044]
365# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
366# code_blocks_excluded = true  # Exclude code blocks from proper name check
367"#;
368
369    // Write the default configuration to the file
370    match fs::write(path, default_config) {
371        Ok(_) => Ok(()),
372        Err(err) => Err(ConfigError::IoError {
373            source: err,
374            path: path.to_string(),
375        }),
376    }
377}
378
379/// Errors that can occur when loading configuration
380#[derive(Debug, thiserror::Error)]
381pub enum ConfigError {
382    /// Failed to read the configuration file
383    #[error("Failed to read config file at {path}: {source}")]
384    IoError { source: io::Error, path: String },
385
386    /// Failed to parse the configuration content (TOML or JSON)
387    #[error("Failed to parse config: {0}")]
388    ParseError(String),
389
390    /// Configuration file already exists
391    #[error("Configuration file already exists at {path}")]
392    FileExists { path: String },
393}
394
395/// Get a rule-specific configuration value
396/// Automatically tries both the original key and normalized variants (kebab-case ↔ snake_case)
397/// for better markdownlint compatibility
398pub fn get_rule_config_value<T: serde::de::DeserializeOwned>(config: &Config, rule_name: &str, key: &str) -> Option<T> {
399    let norm_rule_name = rule_name.to_ascii_uppercase(); // Use uppercase for lookup
400
401    let rule_config = config.rules.get(&norm_rule_name)?;
402
403    // Try multiple key variants to support both underscore and kebab-case formats
404    let key_variants = [
405        key.to_string(),       // Original key as provided
406        normalize_key(key),    // Normalized key (lowercase, kebab-case)
407        key.replace('-', "_"), // Convert kebab-case to snake_case
408        key.replace('_', "-"), // Convert snake_case to kebab-case
409    ];
410
411    // Try each variant until we find a match
412    for variant in &key_variants {
413        if let Some(value) = rule_config.values.get(variant)
414            && let Ok(result) = T::deserialize(value.clone())
415        {
416            return Some(result);
417        }
418    }
419
420    None
421}
422
423/// Generate default rumdl configuration for pyproject.toml
424pub fn generate_pyproject_config() -> String {
425    let config_content = r#"
426[tool.rumdl]
427# Global configuration options
428line-length = 100
429disable = []
430exclude = [
431    # Common directories to exclude
432    ".git",
433    ".github",
434    "node_modules",
435    "vendor",
436    "dist",
437    "build",
438]
439respect-gitignore = true
440
441# Rule-specific configurations (uncomment and modify as needed)
442
443# [tool.rumdl.MD003]
444# style = "atx"  # Heading style (atx, atx_closed, setext)
445
446# [tool.rumdl.MD004]
447# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
448
449# [tool.rumdl.MD007]
450# indent = 4  # Unordered list indentation
451
452# [tool.rumdl.MD013]
453# line_length = 100  # Line length
454# code_blocks = false  # Exclude code blocks from line length check
455# tables = false  # Exclude tables from line length check
456# headings = true  # Include headings in line length check
457
458# [tool.rumdl.MD044]
459# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
460# code_blocks_excluded = true  # Exclude code blocks from proper name check
461"#;
462
463    config_content.to_string()
464}
465
466#[cfg(test)]
467mod tests {
468    use super::*;
469    use std::fs;
470    use tempfile::tempdir;
471
472    #[test]
473    fn test_flavor_loading() {
474        let temp_dir = tempdir().unwrap();
475        let config_path = temp_dir.path().join(".rumdl.toml");
476        let config_content = r#"
477[global]
478flavor = "mkdocs"
479disable = ["MD001"]
480"#;
481        fs::write(&config_path, config_content).unwrap();
482
483        // Load the config
484        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
485        let config: Config = sourced.into();
486
487        // Check that flavor was loaded
488        assert_eq!(config.global.flavor, MarkdownFlavor::MkDocs);
489        assert!(config.is_mkdocs_flavor());
490        assert!(config.is_mkdocs_project()); // Test backwards compatibility
491        assert_eq!(config.global.disable, vec!["MD001".to_string()]);
492    }
493
494    #[test]
495    fn test_pyproject_toml_root_level_config() {
496        let temp_dir = tempdir().unwrap();
497        let config_path = temp_dir.path().join("pyproject.toml");
498
499        // Create a test pyproject.toml with root-level configuration
500        let content = r#"
501[tool.rumdl]
502line-length = 120
503disable = ["MD033"]
504enable = ["MD001", "MD004"]
505include = ["docs/*.md"]
506exclude = ["node_modules"]
507respect-gitignore = true
508        "#;
509
510        fs::write(&config_path, content).unwrap();
511
512        // Load the config with skip_auto_discovery to avoid environment config files
513        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
514        let config: Config = sourced.into(); // Convert to plain config for assertions
515
516        // Check global settings
517        assert_eq!(config.global.disable, vec!["MD033".to_string()]);
518        assert_eq!(config.global.enable, vec!["MD001".to_string(), "MD004".to_string()]);
519        // Should now contain only the configured pattern since auto-discovery is disabled
520        assert_eq!(config.global.include, vec!["docs/*.md".to_string()]);
521        assert_eq!(config.global.exclude, vec!["node_modules".to_string()]);
522        assert!(config.global.respect_gitignore);
523
524        // Check line-length was correctly added to MD013
525        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
526        assert_eq!(line_length, Some(120));
527    }
528
529    #[test]
530    fn test_pyproject_toml_snake_case_and_kebab_case() {
531        let temp_dir = tempdir().unwrap();
532        let config_path = temp_dir.path().join("pyproject.toml");
533
534        // Test with both kebab-case and snake_case variants
535        let content = r#"
536[tool.rumdl]
537line-length = 150
538respect_gitignore = true
539        "#;
540
541        fs::write(&config_path, content).unwrap();
542
543        // Load the config with skip_auto_discovery to avoid environment config files
544        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
545        let config: Config = sourced.into(); // Convert to plain config for assertions
546
547        // Check settings were correctly loaded
548        assert!(config.global.respect_gitignore);
549        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
550        assert_eq!(line_length, Some(150));
551    }
552
553    #[test]
554    fn test_md013_key_normalization_in_rumdl_toml() {
555        let temp_dir = tempdir().unwrap();
556        let config_path = temp_dir.path().join(".rumdl.toml");
557        let config_content = r#"
558[MD013]
559line_length = 111
560line-length = 222
561"#;
562        fs::write(&config_path, config_content).unwrap();
563        // Load the config with skip_auto_discovery to avoid environment config files
564        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
565        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
566        // Now we should only get the explicitly configured key
567        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
568        assert_eq!(keys, vec!["line-length"]);
569        let val = &rule_cfg.values["line-length"].value;
570        assert_eq!(val.as_integer(), Some(222));
571        // get_rule_config_value should retrieve the value for both snake_case and kebab-case
572        let config: Config = sourced.clone().into();
573        let v1 = get_rule_config_value::<usize>(&config, "MD013", "line_length");
574        let v2 = get_rule_config_value::<usize>(&config, "MD013", "line-length");
575        assert_eq!(v1, Some(222));
576        assert_eq!(v2, Some(222));
577    }
578
579    #[test]
580    fn test_md013_section_case_insensitivity() {
581        let temp_dir = tempdir().unwrap();
582        let config_path = temp_dir.path().join(".rumdl.toml");
583        let config_content = r#"
584[md013]
585line-length = 101
586
587[Md013]
588line-length = 102
589
590[MD013]
591line-length = 103
592"#;
593        fs::write(&config_path, config_content).unwrap();
594        // Load the config with skip_auto_discovery to avoid environment config files
595        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
596        let config: Config = sourced.clone().into();
597        // Only the last section should win, and be present
598        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
599        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
600        assert_eq!(keys, vec!["line-length"]);
601        let val = &rule_cfg.values["line-length"].value;
602        assert_eq!(val.as_integer(), Some(103));
603        let v = get_rule_config_value::<usize>(&config, "MD013", "line-length");
604        assert_eq!(v, Some(103));
605    }
606
607    #[test]
608    fn test_md013_key_snake_and_kebab_case() {
609        let temp_dir = tempdir().unwrap();
610        let config_path = temp_dir.path().join(".rumdl.toml");
611        let config_content = r#"
612[MD013]
613line_length = 201
614line-length = 202
615"#;
616        fs::write(&config_path, config_content).unwrap();
617        // Load the config with skip_auto_discovery to avoid environment config files
618        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
619        let config: Config = sourced.clone().into();
620        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
621        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
622        assert_eq!(keys, vec!["line-length"]);
623        let val = &rule_cfg.values["line-length"].value;
624        assert_eq!(val.as_integer(), Some(202));
625        let v1 = get_rule_config_value::<usize>(&config, "MD013", "line_length");
626        let v2 = get_rule_config_value::<usize>(&config, "MD013", "line-length");
627        assert_eq!(v1, Some(202));
628        assert_eq!(v2, Some(202));
629    }
630
631    #[test]
632    fn test_unknown_rule_section_is_ignored() {
633        let temp_dir = tempdir().unwrap();
634        let config_path = temp_dir.path().join(".rumdl.toml");
635        let config_content = r#"
636[MD999]
637foo = 1
638bar = 2
639[MD013]
640line-length = 303
641"#;
642        fs::write(&config_path, config_content).unwrap();
643        // Load the config with skip_auto_discovery to avoid environment config files
644        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
645        let config: Config = sourced.clone().into();
646        // MD999 should not be present
647        assert!(!sourced.rules.contains_key("MD999"));
648        // MD013 should be present and correct
649        let v = get_rule_config_value::<usize>(&config, "MD013", "line-length");
650        assert_eq!(v, Some(303));
651    }
652
653    #[test]
654    fn test_invalid_toml_syntax() {
655        let temp_dir = tempdir().unwrap();
656        let config_path = temp_dir.path().join(".rumdl.toml");
657
658        // Invalid TOML with unclosed string
659        let config_content = r#"
660[MD013]
661line-length = "unclosed string
662"#;
663        fs::write(&config_path, config_content).unwrap();
664
665        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
666        assert!(result.is_err());
667        match result.unwrap_err() {
668            ConfigError::ParseError(msg) => {
669                // The actual error message from toml parser might vary
670                assert!(msg.contains("expected") || msg.contains("invalid") || msg.contains("unterminated"));
671            }
672            _ => panic!("Expected ParseError"),
673        }
674    }
675
676    #[test]
677    fn test_wrong_type_for_config_value() {
678        let temp_dir = tempdir().unwrap();
679        let config_path = temp_dir.path().join(".rumdl.toml");
680
681        // line-length should be a number, not a string
682        let config_content = r#"
683[MD013]
684line-length = "not a number"
685"#;
686        fs::write(&config_path, config_content).unwrap();
687
688        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
689        let config: Config = sourced.into();
690
691        // The value should be loaded as a string, not converted
692        let rule_config = config.rules.get("MD013").unwrap();
693        let value = rule_config.values.get("line-length").unwrap();
694        assert!(matches!(value, toml::Value::String(_)));
695    }
696
697    #[test]
698    fn test_empty_config_file() {
699        let temp_dir = tempdir().unwrap();
700        let config_path = temp_dir.path().join(".rumdl.toml");
701
702        // Empty file
703        fs::write(&config_path, "").unwrap();
704
705        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
706        let config: Config = sourced.into();
707
708        // Should have default values
709        assert_eq!(config.global.line_length, 80);
710        assert!(config.global.respect_gitignore);
711        assert!(config.rules.is_empty());
712    }
713
714    #[test]
715    fn test_malformed_pyproject_toml() {
716        let temp_dir = tempdir().unwrap();
717        let config_path = temp_dir.path().join("pyproject.toml");
718
719        // Missing closing bracket
720        let content = r#"
721[tool.rumdl
722line-length = 120
723"#;
724        fs::write(&config_path, content).unwrap();
725
726        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
727        assert!(result.is_err());
728    }
729
730    #[test]
731    fn test_conflicting_config_values() {
732        let temp_dir = tempdir().unwrap();
733        let config_path = temp_dir.path().join(".rumdl.toml");
734
735        // Both enable and disable the same rule - these need to be in a global section
736        let config_content = r#"
737[global]
738enable = ["MD013"]
739disable = ["MD013"]
740"#;
741        fs::write(&config_path, config_content).unwrap();
742
743        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
744        let config: Config = sourced.into();
745
746        // Both should be present - resolution happens at runtime
747        assert!(config.global.enable.contains(&"MD013".to_string()));
748        assert!(config.global.disable.contains(&"MD013".to_string()));
749    }
750
751    #[test]
752    fn test_invalid_rule_names() {
753        let temp_dir = tempdir().unwrap();
754        let config_path = temp_dir.path().join(".rumdl.toml");
755
756        let config_content = r#"
757[global]
758enable = ["MD001", "NOT_A_RULE", "md002", "12345"]
759disable = ["MD-001", "MD_002"]
760"#;
761        fs::write(&config_path, config_content).unwrap();
762
763        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
764        let config: Config = sourced.into();
765
766        // All values should be preserved as-is
767        assert_eq!(config.global.enable.len(), 4);
768        assert_eq!(config.global.disable.len(), 2);
769    }
770
771    #[test]
772    fn test_deeply_nested_config() {
773        let temp_dir = tempdir().unwrap();
774        let config_path = temp_dir.path().join(".rumdl.toml");
775
776        // This should be ignored as we don't support nested tables within rule configs
777        let config_content = r#"
778[MD013]
779line-length = 100
780[MD013.nested]
781value = 42
782"#;
783        fs::write(&config_path, config_content).unwrap();
784
785        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
786        let config: Config = sourced.into();
787
788        let rule_config = config.rules.get("MD013").unwrap();
789        assert_eq!(
790            rule_config.values.get("line-length").unwrap(),
791            &toml::Value::Integer(100)
792        );
793        // Nested table should not be present
794        assert!(!rule_config.values.contains_key("nested"));
795    }
796
797    #[test]
798    fn test_unicode_in_config() {
799        let temp_dir = tempdir().unwrap();
800        let config_path = temp_dir.path().join(".rumdl.toml");
801
802        let config_content = r#"
803[global]
804include = ["文档/*.md", "ドキュメント/*.md"]
805exclude = ["测试/*", "🚀/*"]
806
807[MD013]
808line-length = 80
809message = "行太长了 🚨"
810"#;
811        fs::write(&config_path, config_content).unwrap();
812
813        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
814        let config: Config = sourced.into();
815
816        assert_eq!(config.global.include.len(), 2);
817        assert_eq!(config.global.exclude.len(), 2);
818        assert!(config.global.include[0].contains("文档"));
819        assert!(config.global.exclude[1].contains("🚀"));
820
821        let rule_config = config.rules.get("MD013").unwrap();
822        let message = rule_config.values.get("message").unwrap();
823        if let toml::Value::String(s) = message {
824            assert!(s.contains("行太长了"));
825            assert!(s.contains("🚨"));
826        }
827    }
828
829    #[test]
830    fn test_extremely_long_values() {
831        let temp_dir = tempdir().unwrap();
832        let config_path = temp_dir.path().join(".rumdl.toml");
833
834        let long_string = "a".repeat(10000);
835        let config_content = format!(
836            r#"
837[global]
838exclude = ["{long_string}"]
839
840[MD013]
841line-length = 999999999
842"#
843        );
844
845        fs::write(&config_path, config_content).unwrap();
846
847        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
848        let config: Config = sourced.into();
849
850        assert_eq!(config.global.exclude[0].len(), 10000);
851        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
852        assert_eq!(line_length, Some(999999999));
853    }
854
855    #[test]
856    fn test_config_with_comments() {
857        let temp_dir = tempdir().unwrap();
858        let config_path = temp_dir.path().join(".rumdl.toml");
859
860        let config_content = r#"
861[global]
862# This is a comment
863enable = ["MD001"] # Enable MD001
864# disable = ["MD002"] # This is commented out
865
866[MD013] # Line length rule
867line-length = 100 # Set to 100 characters
868# ignored = true # This setting is commented out
869"#;
870        fs::write(&config_path, config_content).unwrap();
871
872        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
873        let config: Config = sourced.into();
874
875        assert_eq!(config.global.enable, vec!["MD001"]);
876        assert!(config.global.disable.is_empty()); // Commented out
877
878        let rule_config = config.rules.get("MD013").unwrap();
879        assert_eq!(rule_config.values.len(), 1); // Only line-length
880        assert!(!rule_config.values.contains_key("ignored"));
881    }
882
883    #[test]
884    fn test_arrays_in_rule_config() {
885        let temp_dir = tempdir().unwrap();
886        let config_path = temp_dir.path().join(".rumdl.toml");
887
888        let config_content = r#"
889[MD002]
890levels = [1, 2, 3]
891tags = ["important", "critical"]
892mixed = [1, "two", true]
893"#;
894        fs::write(&config_path, config_content).unwrap();
895
896        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
897        let config: Config = sourced.into();
898
899        // Arrays should now be properly parsed
900        let rule_config = config.rules.get("MD002").expect("MD002 config should exist");
901
902        // Check that arrays are present and correctly parsed
903        assert!(rule_config.values.contains_key("levels"));
904        assert!(rule_config.values.contains_key("tags"));
905        assert!(rule_config.values.contains_key("mixed"));
906
907        // Verify array contents
908        if let Some(toml::Value::Array(levels)) = rule_config.values.get("levels") {
909            assert_eq!(levels.len(), 3);
910            assert_eq!(levels[0], toml::Value::Integer(1));
911            assert_eq!(levels[1], toml::Value::Integer(2));
912            assert_eq!(levels[2], toml::Value::Integer(3));
913        } else {
914            panic!("levels should be an array");
915        }
916
917        if let Some(toml::Value::Array(tags)) = rule_config.values.get("tags") {
918            assert_eq!(tags.len(), 2);
919            assert_eq!(tags[0], toml::Value::String("important".to_string()));
920            assert_eq!(tags[1], toml::Value::String("critical".to_string()));
921        } else {
922            panic!("tags should be an array");
923        }
924
925        if let Some(toml::Value::Array(mixed)) = rule_config.values.get("mixed") {
926            assert_eq!(mixed.len(), 3);
927            assert_eq!(mixed[0], toml::Value::Integer(1));
928            assert_eq!(mixed[1], toml::Value::String("two".to_string()));
929            assert_eq!(mixed[2], toml::Value::Boolean(true));
930        } else {
931            panic!("mixed should be an array");
932        }
933    }
934
935    #[test]
936    fn test_normalize_key_edge_cases() {
937        // Rule names
938        assert_eq!(normalize_key("MD001"), "MD001");
939        assert_eq!(normalize_key("md001"), "MD001");
940        assert_eq!(normalize_key("Md001"), "MD001");
941        assert_eq!(normalize_key("mD001"), "MD001");
942
943        // Non-rule names
944        assert_eq!(normalize_key("line_length"), "line-length");
945        assert_eq!(normalize_key("line-length"), "line-length");
946        assert_eq!(normalize_key("LINE_LENGTH"), "line-length");
947        assert_eq!(normalize_key("respect_gitignore"), "respect-gitignore");
948
949        // Edge cases
950        assert_eq!(normalize_key("MD"), "md"); // Too short to be a rule
951        assert_eq!(normalize_key("MD00"), "md00"); // Too short
952        assert_eq!(normalize_key("MD0001"), "md0001"); // Too long
953        assert_eq!(normalize_key("MDabc"), "mdabc"); // Non-digit
954        assert_eq!(normalize_key("MD00a"), "md00a"); // Partial digit
955        assert_eq!(normalize_key(""), "");
956        assert_eq!(normalize_key("_"), "-");
957        assert_eq!(normalize_key("___"), "---");
958    }
959
960    #[test]
961    fn test_missing_config_file() {
962        let temp_dir = tempdir().unwrap();
963        let config_path = temp_dir.path().join("nonexistent.toml");
964
965        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
966        assert!(result.is_err());
967        match result.unwrap_err() {
968            ConfigError::IoError { .. } => {}
969            _ => panic!("Expected IoError for missing file"),
970        }
971    }
972
973    #[test]
974    #[cfg(unix)]
975    fn test_permission_denied_config() {
976        use std::os::unix::fs::PermissionsExt;
977
978        let temp_dir = tempdir().unwrap();
979        let config_path = temp_dir.path().join(".rumdl.toml");
980
981        fs::write(&config_path, "enable = [\"MD001\"]").unwrap();
982
983        // Remove read permissions
984        let mut perms = fs::metadata(&config_path).unwrap().permissions();
985        perms.set_mode(0o000);
986        fs::set_permissions(&config_path, perms).unwrap();
987
988        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
989
990        // Restore permissions for cleanup
991        let mut perms = fs::metadata(&config_path).unwrap().permissions();
992        perms.set_mode(0o644);
993        fs::set_permissions(&config_path, perms).unwrap();
994
995        assert!(result.is_err());
996        match result.unwrap_err() {
997            ConfigError::IoError { .. } => {}
998            _ => panic!("Expected IoError for permission denied"),
999        }
1000    }
1001
1002    #[test]
1003    fn test_circular_reference_detection() {
1004        // This test is more conceptual since TOML doesn't support circular references
1005        // But we test that deeply nested structures don't cause stack overflow
1006        let temp_dir = tempdir().unwrap();
1007        let config_path = temp_dir.path().join(".rumdl.toml");
1008
1009        let mut config_content = String::from("[MD001]\n");
1010        for i in 0..100 {
1011            config_content.push_str(&format!("key{i} = {i}\n"));
1012        }
1013
1014        fs::write(&config_path, config_content).unwrap();
1015
1016        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1017        let config: Config = sourced.into();
1018
1019        let rule_config = config.rules.get("MD001").unwrap();
1020        assert_eq!(rule_config.values.len(), 100);
1021    }
1022
1023    #[test]
1024    fn test_special_toml_values() {
1025        let temp_dir = tempdir().unwrap();
1026        let config_path = temp_dir.path().join(".rumdl.toml");
1027
1028        let config_content = r#"
1029[MD001]
1030infinity = inf
1031neg_infinity = -inf
1032not_a_number = nan
1033datetime = 1979-05-27T07:32:00Z
1034local_date = 1979-05-27
1035local_time = 07:32:00
1036"#;
1037        fs::write(&config_path, config_content).unwrap();
1038
1039        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1040        let config: Config = sourced.into();
1041
1042        // Some values might not be parsed due to parser limitations
1043        if let Some(rule_config) = config.rules.get("MD001") {
1044            // Check special float values if present
1045            if let Some(toml::Value::Float(f)) = rule_config.values.get("infinity") {
1046                assert!(f.is_infinite() && f.is_sign_positive());
1047            }
1048            if let Some(toml::Value::Float(f)) = rule_config.values.get("neg_infinity") {
1049                assert!(f.is_infinite() && f.is_sign_negative());
1050            }
1051            if let Some(toml::Value::Float(f)) = rule_config.values.get("not_a_number") {
1052                assert!(f.is_nan());
1053            }
1054
1055            // Check datetime values if present
1056            if let Some(val) = rule_config.values.get("datetime") {
1057                assert!(matches!(val, toml::Value::Datetime(_)));
1058            }
1059            // Note: local_date and local_time might not be parsed by the current implementation
1060        }
1061    }
1062
1063    #[test]
1064    fn test_default_config_passes_validation() {
1065        use crate::rules;
1066
1067        let temp_dir = tempdir().unwrap();
1068        let config_path = temp_dir.path().join(".rumdl.toml");
1069        let config_path_str = config_path.to_str().unwrap();
1070
1071        // Create the default config using the same function that `rumdl init` uses
1072        create_default_config(config_path_str).unwrap();
1073
1074        // Load it back as a SourcedConfig
1075        let sourced =
1076            SourcedConfig::load(Some(config_path_str), None).expect("Default config should load successfully");
1077
1078        // Create the rule registry
1079        let all_rules = rules::all_rules(&Config::default());
1080        let registry = RuleRegistry::from_rules(&all_rules);
1081
1082        // Validate the config
1083        let warnings = validate_config_sourced(&sourced, &registry);
1084
1085        // The default config should have no warnings
1086        if !warnings.is_empty() {
1087            for warning in &warnings {
1088                eprintln!("Config validation warning: {}", warning.message);
1089                if let Some(rule) = &warning.rule {
1090                    eprintln!("  Rule: {rule}");
1091                }
1092                if let Some(key) = &warning.key {
1093                    eprintln!("  Key: {key}");
1094                }
1095            }
1096        }
1097        assert!(
1098            warnings.is_empty(),
1099            "Default config from rumdl init should pass validation without warnings"
1100        );
1101    }
1102
1103    #[test]
1104    fn test_per_file_ignores_config_parsing() {
1105        let temp_dir = tempdir().unwrap();
1106        let config_path = temp_dir.path().join(".rumdl.toml");
1107        let config_content = r#"
1108[per-file-ignores]
1109"README.md" = ["MD033"]
1110"docs/**/*.md" = ["MD013", "MD033"]
1111"test/*.md" = ["MD041"]
1112"#;
1113        fs::write(&config_path, config_content).unwrap();
1114
1115        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1116        let config: Config = sourced.into();
1117
1118        // Verify per-file-ignores was loaded
1119        assert_eq!(config.per_file_ignores.len(), 3);
1120        assert_eq!(
1121            config.per_file_ignores.get("README.md"),
1122            Some(&vec!["MD033".to_string()])
1123        );
1124        assert_eq!(
1125            config.per_file_ignores.get("docs/**/*.md"),
1126            Some(&vec!["MD013".to_string(), "MD033".to_string()])
1127        );
1128        assert_eq!(
1129            config.per_file_ignores.get("test/*.md"),
1130            Some(&vec!["MD041".to_string()])
1131        );
1132    }
1133
1134    #[test]
1135    fn test_per_file_ignores_glob_matching() {
1136        use std::path::PathBuf;
1137
1138        let temp_dir = tempdir().unwrap();
1139        let config_path = temp_dir.path().join(".rumdl.toml");
1140        let config_content = r#"
1141[per-file-ignores]
1142"README.md" = ["MD033"]
1143"docs/**/*.md" = ["MD013"]
1144"**/test_*.md" = ["MD041"]
1145"#;
1146        fs::write(&config_path, config_content).unwrap();
1147
1148        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1149        let config: Config = sourced.into();
1150
1151        // Test exact match
1152        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1153        assert!(ignored.contains("MD033"));
1154        assert_eq!(ignored.len(), 1);
1155
1156        // Test glob pattern matching
1157        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("docs/api/overview.md"));
1158        assert!(ignored.contains("MD013"));
1159        assert_eq!(ignored.len(), 1);
1160
1161        // Test recursive glob pattern
1162        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("tests/fixtures/test_example.md"));
1163        assert!(ignored.contains("MD041"));
1164        assert_eq!(ignored.len(), 1);
1165
1166        // Test non-matching path
1167        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("other/file.md"));
1168        assert!(ignored.is_empty());
1169    }
1170
1171    #[test]
1172    fn test_per_file_ignores_pyproject_toml() {
1173        let temp_dir = tempdir().unwrap();
1174        let config_path = temp_dir.path().join("pyproject.toml");
1175        let config_content = r#"
1176[tool.rumdl]
1177[tool.rumdl.per-file-ignores]
1178"README.md" = ["MD033", "MD013"]
1179"generated/*.md" = ["MD041"]
1180"#;
1181        fs::write(&config_path, config_content).unwrap();
1182
1183        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1184        let config: Config = sourced.into();
1185
1186        // Verify per-file-ignores was loaded from pyproject.toml
1187        assert_eq!(config.per_file_ignores.len(), 2);
1188        assert_eq!(
1189            config.per_file_ignores.get("README.md"),
1190            Some(&vec!["MD033".to_string(), "MD013".to_string()])
1191        );
1192        assert_eq!(
1193            config.per_file_ignores.get("generated/*.md"),
1194            Some(&vec!["MD041".to_string()])
1195        );
1196    }
1197
1198    #[test]
1199    fn test_per_file_ignores_multiple_patterns_match() {
1200        use std::path::PathBuf;
1201
1202        let temp_dir = tempdir().unwrap();
1203        let config_path = temp_dir.path().join(".rumdl.toml");
1204        let config_content = r#"
1205[per-file-ignores]
1206"docs/**/*.md" = ["MD013"]
1207"**/api/*.md" = ["MD033"]
1208"docs/api/overview.md" = ["MD041"]
1209"#;
1210        fs::write(&config_path, config_content).unwrap();
1211
1212        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1213        let config: Config = sourced.into();
1214
1215        // File matches multiple patterns - should get union of all rules
1216        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("docs/api/overview.md"));
1217        assert_eq!(ignored.len(), 3);
1218        assert!(ignored.contains("MD013"));
1219        assert!(ignored.contains("MD033"));
1220        assert!(ignored.contains("MD041"));
1221    }
1222
1223    #[test]
1224    fn test_per_file_ignores_rule_name_normalization() {
1225        use std::path::PathBuf;
1226
1227        let temp_dir = tempdir().unwrap();
1228        let config_path = temp_dir.path().join(".rumdl.toml");
1229        let config_content = r#"
1230[per-file-ignores]
1231"README.md" = ["md033", "MD013", "Md041"]
1232"#;
1233        fs::write(&config_path, config_content).unwrap();
1234
1235        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1236        let config: Config = sourced.into();
1237
1238        // All rule names should be normalized to uppercase
1239        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1240        assert_eq!(ignored.len(), 3);
1241        assert!(ignored.contains("MD033"));
1242        assert!(ignored.contains("MD013"));
1243        assert!(ignored.contains("MD041"));
1244    }
1245
1246    #[test]
1247    fn test_per_file_ignores_invalid_glob_pattern() {
1248        use std::path::PathBuf;
1249
1250        let temp_dir = tempdir().unwrap();
1251        let config_path = temp_dir.path().join(".rumdl.toml");
1252        let config_content = r#"
1253[per-file-ignores]
1254"[invalid" = ["MD033"]
1255"valid/*.md" = ["MD013"]
1256"#;
1257        fs::write(&config_path, config_content).unwrap();
1258
1259        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1260        let config: Config = sourced.into();
1261
1262        // Invalid pattern should be skipped, valid pattern should work
1263        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("valid/test.md"));
1264        assert!(ignored.contains("MD013"));
1265
1266        // Invalid pattern should not cause issues
1267        let ignored2 = config.get_ignored_rules_for_file(&PathBuf::from("[invalid"));
1268        assert!(ignored2.is_empty());
1269    }
1270
1271    #[test]
1272    fn test_per_file_ignores_empty_section() {
1273        use std::path::PathBuf;
1274
1275        let temp_dir = tempdir().unwrap();
1276        let config_path = temp_dir.path().join(".rumdl.toml");
1277        let config_content = r#"
1278[global]
1279disable = ["MD001"]
1280
1281[per-file-ignores]
1282"#;
1283        fs::write(&config_path, config_content).unwrap();
1284
1285        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1286        let config: Config = sourced.into();
1287
1288        // Empty per-file-ignores should work fine
1289        assert_eq!(config.per_file_ignores.len(), 0);
1290        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1291        assert!(ignored.is_empty());
1292    }
1293
1294    #[test]
1295    fn test_per_file_ignores_with_underscores_in_pyproject() {
1296        let temp_dir = tempdir().unwrap();
1297        let config_path = temp_dir.path().join("pyproject.toml");
1298        let config_content = r#"
1299[tool.rumdl]
1300[tool.rumdl.per_file_ignores]
1301"README.md" = ["MD033"]
1302"#;
1303        fs::write(&config_path, config_content).unwrap();
1304
1305        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1306        let config: Config = sourced.into();
1307
1308        // Should support both per-file-ignores and per_file_ignores
1309        assert_eq!(config.per_file_ignores.len(), 1);
1310        assert_eq!(
1311            config.per_file_ignores.get("README.md"),
1312            Some(&vec!["MD033".to_string()])
1313        );
1314    }
1315
1316    #[test]
1317    fn test_generate_json_schema() {
1318        use schemars::schema_for;
1319        use std::env;
1320
1321        let schema = schema_for!(Config);
1322        let schema_json = serde_json::to_string_pretty(&schema).expect("Failed to serialize schema");
1323
1324        // Write schema to file if RUMDL_UPDATE_SCHEMA env var is set
1325        if env::var("RUMDL_UPDATE_SCHEMA").is_ok() {
1326            let schema_path = env::current_dir().unwrap().join("rumdl.schema.json");
1327            fs::write(&schema_path, &schema_json).expect("Failed to write schema file");
1328            println!("Schema written to: {}", schema_path.display());
1329        }
1330
1331        // Basic validation that schema was generated
1332        assert!(schema_json.contains("\"title\": \"Config\""));
1333        assert!(schema_json.contains("\"global\""));
1334        assert!(schema_json.contains("\"per-file-ignores\""));
1335    }
1336}
1337
1338#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1339pub enum ConfigSource {
1340    Default,
1341    RumdlToml,
1342    PyprojectToml,
1343    Cli,
1344    /// Value was loaded from a markdownlint config file (e.g. .markdownlint.json, .markdownlint.yaml)
1345    Markdownlint,
1346}
1347
1348#[derive(Debug, Clone)]
1349pub struct ConfigOverride<T> {
1350    pub value: T,
1351    pub source: ConfigSource,
1352    pub file: Option<String>,
1353    pub line: Option<usize>,
1354}
1355
1356#[derive(Debug, Clone)]
1357pub struct SourcedValue<T> {
1358    pub value: T,
1359    pub source: ConfigSource,
1360    pub overrides: Vec<ConfigOverride<T>>,
1361}
1362
1363impl<T: Clone> SourcedValue<T> {
1364    pub fn new(value: T, source: ConfigSource) -> Self {
1365        Self {
1366            value: value.clone(),
1367            source,
1368            overrides: vec![ConfigOverride {
1369                value,
1370                source,
1371                file: None,
1372                line: None,
1373            }],
1374        }
1375    }
1376
1377    /// Merges a new override into this SourcedValue based on source precedence.
1378    /// If the new source has higher or equal precedence, the value and source are updated,
1379    /// and the new override is added to the history.
1380    pub fn merge_override(
1381        &mut self,
1382        new_value: T,
1383        new_source: ConfigSource,
1384        new_file: Option<String>,
1385        new_line: Option<usize>,
1386    ) {
1387        // Helper function to get precedence, defined locally or globally
1388        fn source_precedence(src: ConfigSource) -> u8 {
1389            match src {
1390                ConfigSource::Default => 0,
1391                ConfigSource::PyprojectToml => 1,
1392                ConfigSource::Markdownlint => 2,
1393                ConfigSource::RumdlToml => 3,
1394                ConfigSource::Cli => 4,
1395            }
1396        }
1397
1398        if source_precedence(new_source) >= source_precedence(self.source) {
1399            self.value = new_value.clone();
1400            self.source = new_source;
1401            self.overrides.push(ConfigOverride {
1402                value: new_value,
1403                source: new_source,
1404                file: new_file,
1405                line: new_line,
1406            });
1407        }
1408    }
1409
1410    pub fn push_override(&mut self, value: T, source: ConfigSource, file: Option<String>, line: Option<usize>) {
1411        // This is essentially merge_override without the precedence check
1412        // We might consolidate these later, but keep separate for now during refactor
1413        self.value = value.clone();
1414        self.source = source;
1415        self.overrides.push(ConfigOverride {
1416            value,
1417            source,
1418            file,
1419            line,
1420        });
1421    }
1422}
1423
1424#[derive(Debug, Clone)]
1425pub struct SourcedGlobalConfig {
1426    pub enable: SourcedValue<Vec<String>>,
1427    pub disable: SourcedValue<Vec<String>>,
1428    pub exclude: SourcedValue<Vec<String>>,
1429    pub include: SourcedValue<Vec<String>>,
1430    pub respect_gitignore: SourcedValue<bool>,
1431    pub line_length: SourcedValue<u64>,
1432    pub output_format: Option<SourcedValue<String>>,
1433    pub fixable: SourcedValue<Vec<String>>,
1434    pub unfixable: SourcedValue<Vec<String>>,
1435    pub flavor: SourcedValue<MarkdownFlavor>,
1436    pub force_exclude: SourcedValue<bool>,
1437}
1438
1439impl Default for SourcedGlobalConfig {
1440    fn default() -> Self {
1441        SourcedGlobalConfig {
1442            enable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1443            disable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1444            exclude: SourcedValue::new(Vec::new(), ConfigSource::Default),
1445            include: SourcedValue::new(Vec::new(), ConfigSource::Default),
1446            respect_gitignore: SourcedValue::new(true, ConfigSource::Default),
1447            line_length: SourcedValue::new(80, ConfigSource::Default),
1448            output_format: None,
1449            fixable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1450            unfixable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1451            flavor: SourcedValue::new(MarkdownFlavor::default(), ConfigSource::Default),
1452            force_exclude: SourcedValue::new(false, ConfigSource::Default),
1453        }
1454    }
1455}
1456
1457#[derive(Debug, Default, Clone)]
1458pub struct SourcedRuleConfig {
1459    pub values: BTreeMap<String, SourcedValue<toml::Value>>,
1460}
1461
1462/// Represents configuration loaded from a single source file, with provenance.
1463/// Used as an intermediate step before merging into the final SourcedConfig.
1464#[derive(Debug, Clone)]
1465pub struct SourcedConfigFragment {
1466    pub global: SourcedGlobalConfig,
1467    pub per_file_ignores: SourcedValue<HashMap<String, Vec<String>>>,
1468    pub rules: BTreeMap<String, SourcedRuleConfig>,
1469    // Note: Does not include loaded_files or unknown_keys, as those are tracked globally.
1470}
1471
1472impl Default for SourcedConfigFragment {
1473    fn default() -> Self {
1474        Self {
1475            global: SourcedGlobalConfig::default(),
1476            per_file_ignores: SourcedValue::new(HashMap::new(), ConfigSource::Default),
1477            rules: BTreeMap::new(),
1478        }
1479    }
1480}
1481
1482#[derive(Debug, Clone)]
1483pub struct SourcedConfig {
1484    pub global: SourcedGlobalConfig,
1485    pub per_file_ignores: SourcedValue<HashMap<String, Vec<String>>>,
1486    pub rules: BTreeMap<String, SourcedRuleConfig>,
1487    pub loaded_files: Vec<String>,
1488    pub unknown_keys: Vec<(String, String)>, // (section, key)
1489}
1490
1491impl Default for SourcedConfig {
1492    fn default() -> Self {
1493        Self {
1494            global: SourcedGlobalConfig::default(),
1495            per_file_ignores: SourcedValue::new(HashMap::new(), ConfigSource::Default),
1496            rules: BTreeMap::new(),
1497            loaded_files: Vec::new(),
1498            unknown_keys: Vec::new(),
1499        }
1500    }
1501}
1502
1503impl SourcedConfig {
1504    /// Merges another SourcedConfigFragment into this SourcedConfig.
1505    /// Uses source precedence to determine which values take effect.
1506    fn merge(&mut self, fragment: SourcedConfigFragment) {
1507        // Merge global config
1508        self.global.enable.merge_override(
1509            fragment.global.enable.value,
1510            fragment.global.enable.source,
1511            fragment.global.enable.overrides.first().and_then(|o| o.file.clone()),
1512            fragment.global.enable.overrides.first().and_then(|o| o.line),
1513        );
1514        self.global.disable.merge_override(
1515            fragment.global.disable.value,
1516            fragment.global.disable.source,
1517            fragment.global.disable.overrides.first().and_then(|o| o.file.clone()),
1518            fragment.global.disable.overrides.first().and_then(|o| o.line),
1519        );
1520        self.global.include.merge_override(
1521            fragment.global.include.value,
1522            fragment.global.include.source,
1523            fragment.global.include.overrides.first().and_then(|o| o.file.clone()),
1524            fragment.global.include.overrides.first().and_then(|o| o.line),
1525        );
1526        self.global.exclude.merge_override(
1527            fragment.global.exclude.value,
1528            fragment.global.exclude.source,
1529            fragment.global.exclude.overrides.first().and_then(|o| o.file.clone()),
1530            fragment.global.exclude.overrides.first().and_then(|o| o.line),
1531        );
1532        self.global.respect_gitignore.merge_override(
1533            fragment.global.respect_gitignore.value,
1534            fragment.global.respect_gitignore.source,
1535            fragment
1536                .global
1537                .respect_gitignore
1538                .overrides
1539                .first()
1540                .and_then(|o| o.file.clone()),
1541            fragment.global.respect_gitignore.overrides.first().and_then(|o| o.line),
1542        );
1543        self.global.line_length.merge_override(
1544            fragment.global.line_length.value,
1545            fragment.global.line_length.source,
1546            fragment
1547                .global
1548                .line_length
1549                .overrides
1550                .first()
1551                .and_then(|o| o.file.clone()),
1552            fragment.global.line_length.overrides.first().and_then(|o| o.line),
1553        );
1554        self.global.fixable.merge_override(
1555            fragment.global.fixable.value,
1556            fragment.global.fixable.source,
1557            fragment.global.fixable.overrides.first().and_then(|o| o.file.clone()),
1558            fragment.global.fixable.overrides.first().and_then(|o| o.line),
1559        );
1560        self.global.unfixable.merge_override(
1561            fragment.global.unfixable.value,
1562            fragment.global.unfixable.source,
1563            fragment.global.unfixable.overrides.first().and_then(|o| o.file.clone()),
1564            fragment.global.unfixable.overrides.first().and_then(|o| o.line),
1565        );
1566
1567        // Merge flavor
1568        self.global.flavor.merge_override(
1569            fragment.global.flavor.value,
1570            fragment.global.flavor.source,
1571            fragment.global.flavor.overrides.first().and_then(|o| o.file.clone()),
1572            fragment.global.flavor.overrides.first().and_then(|o| o.line),
1573        );
1574
1575        // Merge force_exclude
1576        self.global.force_exclude.merge_override(
1577            fragment.global.force_exclude.value,
1578            fragment.global.force_exclude.source,
1579            fragment
1580                .global
1581                .force_exclude
1582                .overrides
1583                .first()
1584                .and_then(|o| o.file.clone()),
1585            fragment.global.force_exclude.overrides.first().and_then(|o| o.line),
1586        );
1587
1588        // Merge output_format if present
1589        if let Some(output_format_fragment) = fragment.global.output_format {
1590            if let Some(ref mut output_format) = self.global.output_format {
1591                output_format.merge_override(
1592                    output_format_fragment.value,
1593                    output_format_fragment.source,
1594                    output_format_fragment.overrides.first().and_then(|o| o.file.clone()),
1595                    output_format_fragment.overrides.first().and_then(|o| o.line),
1596                );
1597            } else {
1598                self.global.output_format = Some(output_format_fragment);
1599            }
1600        }
1601
1602        // Merge per_file_ignores
1603        self.per_file_ignores.merge_override(
1604            fragment.per_file_ignores.value,
1605            fragment.per_file_ignores.source,
1606            fragment.per_file_ignores.overrides.first().and_then(|o| o.file.clone()),
1607            fragment.per_file_ignores.overrides.first().and_then(|o| o.line),
1608        );
1609
1610        // Merge rule configs
1611        for (rule_name, rule_fragment) in fragment.rules {
1612            let norm_rule_name = rule_name.to_ascii_uppercase(); // Normalize to uppercase for case-insensitivity
1613            let rule_entry = self.rules.entry(norm_rule_name).or_default();
1614            for (key, sourced_value_fragment) in rule_fragment.values {
1615                let sv_entry = rule_entry
1616                    .values
1617                    .entry(key.clone())
1618                    .or_insert_with(|| SourcedValue::new(sourced_value_fragment.value.clone(), ConfigSource::Default));
1619                let file_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.file.clone());
1620                let line_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.line);
1621                sv_entry.merge_override(
1622                    sourced_value_fragment.value,  // Use the value from the fragment
1623                    sourced_value_fragment.source, // Use the source from the fragment
1624                    file_from_fragment,            // Pass the file path from the fragment override
1625                    line_from_fragment,            // Pass the line number from the fragment override
1626                );
1627            }
1628        }
1629    }
1630
1631    /// Load and merge configurations from files and CLI overrides.
1632    pub fn load(config_path: Option<&str>, cli_overrides: Option<&SourcedGlobalConfig>) -> Result<Self, ConfigError> {
1633        Self::load_with_discovery(config_path, cli_overrides, false)
1634    }
1635
1636    /// Discover configuration file by traversing up the directory tree.
1637    /// Returns the first configuration file found.
1638    fn discover_config_upward() -> Option<std::path::PathBuf> {
1639        use std::env;
1640
1641        const CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", "pyproject.toml"];
1642        const MAX_DEPTH: usize = 100; // Prevent infinite traversal
1643
1644        let start_dir = match env::current_dir() {
1645            Ok(dir) => dir,
1646            Err(e) => {
1647                log::debug!("[rumdl-config] Failed to get current directory: {e}");
1648                return None;
1649            }
1650        };
1651
1652        let mut current_dir = start_dir.clone();
1653        let mut depth = 0;
1654
1655        loop {
1656            if depth >= MAX_DEPTH {
1657                log::debug!("[rumdl-config] Maximum traversal depth reached");
1658                break;
1659            }
1660
1661            log::debug!("[rumdl-config] Searching for config in: {}", current_dir.display());
1662
1663            // Check for config files in order of precedence
1664            for config_name in CONFIG_FILES {
1665                let config_path = current_dir.join(config_name);
1666
1667                if config_path.exists() {
1668                    // For pyproject.toml, verify it contains [tool.rumdl] section
1669                    if *config_name == "pyproject.toml" {
1670                        if let Ok(content) = std::fs::read_to_string(&config_path) {
1671                            if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
1672                                log::debug!("[rumdl-config] Found config file: {}", config_path.display());
1673                                return Some(config_path);
1674                            }
1675                            log::debug!("[rumdl-config] Found pyproject.toml but no [tool.rumdl] section");
1676                            continue;
1677                        }
1678                    } else {
1679                        log::debug!("[rumdl-config] Found config file: {}", config_path.display());
1680                        return Some(config_path);
1681                    }
1682                }
1683            }
1684
1685            // Check for .git directory (stop boundary)
1686            if current_dir.join(".git").exists() {
1687                log::debug!("[rumdl-config] Stopping at .git directory");
1688                break;
1689            }
1690
1691            // Move to parent directory
1692            match current_dir.parent() {
1693                Some(parent) => {
1694                    current_dir = parent.to_owned();
1695                    depth += 1;
1696                }
1697                None => {
1698                    log::debug!("[rumdl-config] Reached filesystem root");
1699                    break;
1700                }
1701            }
1702        }
1703
1704        None
1705    }
1706
1707    /// Discover user-level configuration file from platform-specific config directory.
1708    /// Returns the first configuration file found in the user config directory.
1709    fn user_configuration_path() -> Option<std::path::PathBuf> {
1710        use etcetera::{BaseStrategy, choose_base_strategy};
1711
1712        match choose_base_strategy() {
1713            Ok(strategy) => {
1714                let config_dir = strategy.config_dir().join("rumdl");
1715
1716                // Check for config files in precedence order (same as project discovery)
1717                const USER_CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", "pyproject.toml"];
1718
1719                log::debug!(
1720                    "[rumdl-config] Checking for user configuration in: {}",
1721                    config_dir.display()
1722                );
1723
1724                for filename in USER_CONFIG_FILES {
1725                    let config_path = config_dir.join(filename);
1726
1727                    if config_path.exists() {
1728                        // For pyproject.toml, verify it contains [tool.rumdl] section
1729                        if *filename == "pyproject.toml" {
1730                            if let Ok(content) = std::fs::read_to_string(&config_path) {
1731                                if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
1732                                    log::debug!(
1733                                        "[rumdl-config] Found user configuration at: {}",
1734                                        config_path.display()
1735                                    );
1736                                    return Some(config_path);
1737                                }
1738                                log::debug!("[rumdl-config] Found user pyproject.toml but no [tool.rumdl] section");
1739                                continue;
1740                            }
1741                        } else {
1742                            log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
1743                            return Some(config_path);
1744                        }
1745                    }
1746                }
1747
1748                log::debug!(
1749                    "[rumdl-config] No user configuration found in: {}",
1750                    config_dir.display()
1751                );
1752                None
1753            }
1754            Err(e) => {
1755                log::debug!("[rumdl-config] Failed to determine user config directory: {e}");
1756                None
1757            }
1758        }
1759    }
1760
1761    /// Load and merge configurations from files and CLI overrides.
1762    /// If skip_auto_discovery is true, only explicit config paths are loaded.
1763    pub fn load_with_discovery(
1764        config_path: Option<&str>,
1765        cli_overrides: Option<&SourcedGlobalConfig>,
1766        skip_auto_discovery: bool,
1767    ) -> Result<Self, ConfigError> {
1768        use std::env;
1769        log::debug!("[rumdl-config] Current working directory: {:?}", env::current_dir());
1770        if config_path.is_none() {
1771            if skip_auto_discovery {
1772                log::debug!("[rumdl-config] Skipping auto-discovery due to --no-config flag");
1773            } else {
1774                log::debug!("[rumdl-config] No explicit config_path provided, will search default locations");
1775            }
1776        } else {
1777            log::debug!("[rumdl-config] Explicit config_path provided: {config_path:?}");
1778        }
1779        let mut sourced_config = SourcedConfig::default();
1780
1781        // 1. Load explicit config path if provided
1782        if let Some(path) = config_path {
1783            let path_obj = Path::new(path);
1784            let filename = path_obj.file_name().and_then(|name| name.to_str()).unwrap_or("");
1785            log::debug!("[rumdl-config] Trying to load config file: {filename}");
1786            let path_str = path.to_string();
1787
1788            // Known markdownlint config files
1789            const MARKDOWNLINT_FILENAMES: &[&str] = &[".markdownlint.json", ".markdownlint.yaml", ".markdownlint.yml"];
1790
1791            if filename == "pyproject.toml" || filename == ".rumdl.toml" || filename == "rumdl.toml" {
1792                let content = std::fs::read_to_string(path).map_err(|e| ConfigError::IoError {
1793                    source: e,
1794                    path: path_str.clone(),
1795                })?;
1796                if filename == "pyproject.toml" {
1797                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
1798                        sourced_config.merge(fragment);
1799                        sourced_config.loaded_files.push(path_str.clone());
1800                    }
1801                } else {
1802                    let fragment = parse_rumdl_toml(&content, &path_str)?;
1803                    sourced_config.merge(fragment);
1804                    sourced_config.loaded_files.push(path_str.clone());
1805                }
1806            } else if MARKDOWNLINT_FILENAMES.contains(&filename)
1807                || path_str.ends_with(".json")
1808                || path_str.ends_with(".jsonc")
1809                || path_str.ends_with(".yaml")
1810                || path_str.ends_with(".yml")
1811            {
1812                // Parse as markdownlint config (JSON/YAML)
1813                let fragment = load_from_markdownlint(&path_str)?;
1814                sourced_config.merge(fragment);
1815                sourced_config.loaded_files.push(path_str.clone());
1816                // markdownlint is fallback only
1817            } else {
1818                // Try TOML only
1819                let content = std::fs::read_to_string(path).map_err(|e| ConfigError::IoError {
1820                    source: e,
1821                    path: path_str.clone(),
1822                })?;
1823                let fragment = parse_rumdl_toml(&content, &path_str)?;
1824                sourced_config.merge(fragment);
1825                sourced_config.loaded_files.push(path_str.clone());
1826            }
1827        }
1828
1829        // Only perform auto-discovery if not skipped AND no explicit config path provided
1830        if !skip_auto_discovery && config_path.is_none() {
1831            // Step 1: Load user configuration first (as a base)
1832            if let Some(user_config_path) = Self::user_configuration_path() {
1833                let path_str = user_config_path.display().to_string();
1834                let filename = user_config_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1835
1836                log::debug!("[rumdl-config] Loading user configuration file: {path_str}");
1837
1838                if filename == "pyproject.toml" {
1839                    let content = std::fs::read_to_string(&user_config_path).map_err(|e| ConfigError::IoError {
1840                        source: e,
1841                        path: path_str.clone(),
1842                    })?;
1843                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
1844                        sourced_config.merge(fragment);
1845                        sourced_config.loaded_files.push(path_str);
1846                    }
1847                } else {
1848                    let content = std::fs::read_to_string(&user_config_path).map_err(|e| ConfigError::IoError {
1849                        source: e,
1850                        path: path_str.clone(),
1851                    })?;
1852                    let fragment = parse_rumdl_toml(&content, &path_str)?;
1853                    sourced_config.merge(fragment);
1854                    sourced_config.loaded_files.push(path_str);
1855                }
1856            } else {
1857                log::debug!("[rumdl-config] No user configuration file found");
1858            }
1859
1860            // Step 2: Look for project configuration files (override user config)
1861            if let Some(config_file) = Self::discover_config_upward() {
1862                let path_str = config_file.display().to_string();
1863                let filename = config_file.file_name().and_then(|n| n.to_str()).unwrap_or("");
1864
1865                log::debug!("[rumdl-config] Loading discovered config file: {path_str}");
1866
1867                if filename == "pyproject.toml" {
1868                    let content = std::fs::read_to_string(&config_file).map_err(|e| ConfigError::IoError {
1869                        source: e,
1870                        path: path_str.clone(),
1871                    })?;
1872                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
1873                        sourced_config.merge(fragment);
1874                        sourced_config.loaded_files.push(path_str);
1875                    }
1876                } else if filename == ".rumdl.toml" || filename == "rumdl.toml" {
1877                    let content = std::fs::read_to_string(&config_file).map_err(|e| ConfigError::IoError {
1878                        source: e,
1879                        path: path_str.clone(),
1880                    })?;
1881                    let fragment = parse_rumdl_toml(&content, &path_str)?;
1882                    sourced_config.merge(fragment);
1883                    sourced_config.loaded_files.push(path_str);
1884                }
1885            } else {
1886                log::debug!("[rumdl-config] No configuration file found via upward traversal");
1887
1888                // Step 3: If no project config found, fallback to markdownlint config in current directory
1889                let mut found_markdownlint = false;
1890                for filename in MARKDOWNLINT_CONFIG_FILES {
1891                    if std::path::Path::new(filename).exists() {
1892                        match load_from_markdownlint(filename) {
1893                            Ok(fragment) => {
1894                                sourced_config.merge(fragment);
1895                                sourced_config.loaded_files.push(filename.to_string());
1896                                found_markdownlint = true;
1897                                break; // Load only the first one found
1898                            }
1899                            Err(_e) => {
1900                                // Log error but continue (it's just a fallback)
1901                            }
1902                        }
1903                    }
1904                }
1905
1906                if !found_markdownlint {
1907                    log::debug!("[rumdl-config] No markdownlint configuration file found");
1908                }
1909            }
1910        }
1911
1912        // 5. Apply CLI overrides (highest precedence)
1913        if let Some(cli) = cli_overrides {
1914            sourced_config
1915                .global
1916                .enable
1917                .merge_override(cli.enable.value.clone(), ConfigSource::Cli, None, None);
1918            sourced_config
1919                .global
1920                .disable
1921                .merge_override(cli.disable.value.clone(), ConfigSource::Cli, None, None);
1922            sourced_config
1923                .global
1924                .exclude
1925                .merge_override(cli.exclude.value.clone(), ConfigSource::Cli, None, None);
1926            sourced_config
1927                .global
1928                .include
1929                .merge_override(cli.include.value.clone(), ConfigSource::Cli, None, None);
1930            sourced_config.global.respect_gitignore.merge_override(
1931                cli.respect_gitignore.value,
1932                ConfigSource::Cli,
1933                None,
1934                None,
1935            );
1936            sourced_config
1937                .global
1938                .fixable
1939                .merge_override(cli.fixable.value.clone(), ConfigSource::Cli, None, None);
1940            sourced_config
1941                .global
1942                .unfixable
1943                .merge_override(cli.unfixable.value.clone(), ConfigSource::Cli, None, None);
1944            // No rule-specific CLI overrides implemented yet
1945        }
1946
1947        // TODO: Handle unknown keys collected during parsing/merging
1948
1949        Ok(sourced_config)
1950    }
1951}
1952
1953impl From<SourcedConfig> for Config {
1954    fn from(sourced: SourcedConfig) -> Self {
1955        let mut rules = BTreeMap::new();
1956        for (rule_name, sourced_rule_cfg) in sourced.rules {
1957            // Normalize rule name to uppercase for case-insensitive lookup
1958            let normalized_rule_name = rule_name.to_ascii_uppercase();
1959            let mut values = BTreeMap::new();
1960            for (key, sourced_val) in sourced_rule_cfg.values {
1961                values.insert(key, sourced_val.value);
1962            }
1963            rules.insert(normalized_rule_name, RuleConfig { values });
1964        }
1965        let global = GlobalConfig {
1966            enable: sourced.global.enable.value,
1967            disable: sourced.global.disable.value,
1968            exclude: sourced.global.exclude.value,
1969            include: sourced.global.include.value,
1970            respect_gitignore: sourced.global.respect_gitignore.value,
1971            line_length: sourced.global.line_length.value,
1972            output_format: sourced.global.output_format.as_ref().map(|v| v.value.clone()),
1973            fixable: sourced.global.fixable.value,
1974            unfixable: sourced.global.unfixable.value,
1975            flavor: sourced.global.flavor.value,
1976            force_exclude: sourced.global.force_exclude.value,
1977        };
1978        Config {
1979            global,
1980            per_file_ignores: sourced.per_file_ignores.value,
1981            rules,
1982        }
1983    }
1984}
1985
1986/// Registry of all known rules and their config schemas
1987pub struct RuleRegistry {
1988    /// Map of rule name (e.g. "MD013") to set of valid config keys and their TOML value types
1989    pub rule_schemas: std::collections::BTreeMap<String, toml::map::Map<String, toml::Value>>,
1990    /// Map of rule name to config key aliases
1991    pub rule_aliases: std::collections::BTreeMap<String, std::collections::HashMap<String, String>>,
1992}
1993
1994impl RuleRegistry {
1995    /// Build a registry from a list of rules
1996    pub fn from_rules(rules: &[Box<dyn Rule>]) -> Self {
1997        let mut rule_schemas = std::collections::BTreeMap::new();
1998        let mut rule_aliases = std::collections::BTreeMap::new();
1999
2000        for rule in rules {
2001            let norm_name = if let Some((name, toml::Value::Table(table))) = rule.default_config_section() {
2002                let norm_name = normalize_key(&name); // Normalize the name from default_config_section
2003                rule_schemas.insert(norm_name.clone(), table);
2004                norm_name
2005            } else {
2006                let norm_name = normalize_key(rule.name()); // Normalize the name from rule.name()
2007                rule_schemas.insert(norm_name.clone(), toml::map::Map::new());
2008                norm_name
2009            };
2010
2011            // Store aliases if the rule provides them
2012            if let Some(aliases) = rule.config_aliases() {
2013                rule_aliases.insert(norm_name, aliases);
2014            }
2015        }
2016
2017        RuleRegistry {
2018            rule_schemas,
2019            rule_aliases,
2020        }
2021    }
2022
2023    /// Get all known rule names
2024    pub fn rule_names(&self) -> std::collections::BTreeSet<String> {
2025        self.rule_schemas.keys().cloned().collect()
2026    }
2027
2028    /// Get the valid configuration keys for a rule, including both original and normalized variants
2029    pub fn config_keys_for(&self, rule: &str) -> Option<std::collections::BTreeSet<String>> {
2030        self.rule_schemas.get(rule).map(|schema| {
2031            let mut all_keys = std::collections::BTreeSet::new();
2032
2033            // Add original keys from schema
2034            for key in schema.keys() {
2035                all_keys.insert(key.clone());
2036            }
2037
2038            // Add normalized variants for markdownlint compatibility
2039            for key in schema.keys() {
2040                // Add kebab-case variant
2041                all_keys.insert(key.replace('_', "-"));
2042                // Add snake_case variant
2043                all_keys.insert(key.replace('-', "_"));
2044                // Add normalized variant
2045                all_keys.insert(normalize_key(key));
2046            }
2047
2048            // Add any aliases defined by the rule
2049            if let Some(aliases) = self.rule_aliases.get(rule) {
2050                for alias_key in aliases.keys() {
2051                    all_keys.insert(alias_key.clone());
2052                    // Also add normalized variants of the alias
2053                    all_keys.insert(alias_key.replace('_', "-"));
2054                    all_keys.insert(alias_key.replace('-', "_"));
2055                    all_keys.insert(normalize_key(alias_key));
2056                }
2057            }
2058
2059            all_keys
2060        })
2061    }
2062
2063    /// Get the expected value type for a rule's configuration key, trying variants
2064    pub fn expected_value_for(&self, rule: &str, key: &str) -> Option<&toml::Value> {
2065        if let Some(schema) = self.rule_schemas.get(rule) {
2066            // Check if this key is an alias
2067            if let Some(aliases) = self.rule_aliases.get(rule)
2068                && let Some(canonical_key) = aliases.get(key)
2069            {
2070                // Use the canonical key for schema lookup
2071                if let Some(value) = schema.get(canonical_key) {
2072                    return Some(value);
2073                }
2074            }
2075
2076            // Try the original key
2077            if let Some(value) = schema.get(key) {
2078                return Some(value);
2079            }
2080
2081            // Try key variants
2082            let key_variants = [
2083                key.replace('-', "_"), // Convert kebab-case to snake_case
2084                key.replace('_', "-"), // Convert snake_case to kebab-case
2085                normalize_key(key),    // Normalized key (lowercase, kebab-case)
2086            ];
2087
2088            for variant in &key_variants {
2089                if let Some(value) = schema.get(variant) {
2090                    return Some(value);
2091                }
2092            }
2093        }
2094        None
2095    }
2096}
2097
2098/// Represents a config validation warning or error
2099#[derive(Debug, Clone)]
2100pub struct ConfigValidationWarning {
2101    pub message: String,
2102    pub rule: Option<String>,
2103    pub key: Option<String>,
2104}
2105
2106/// Validate a loaded config against the rule registry, using SourcedConfig for unknown key tracking
2107pub fn validate_config_sourced(sourced: &SourcedConfig, registry: &RuleRegistry) -> Vec<ConfigValidationWarning> {
2108    let mut warnings = Vec::new();
2109    let known_rules = registry.rule_names();
2110    // 1. Unknown rules
2111    for rule in sourced.rules.keys() {
2112        if !known_rules.contains(rule) {
2113            warnings.push(ConfigValidationWarning {
2114                message: format!("Unknown rule in config: {rule}"),
2115                rule: Some(rule.clone()),
2116                key: None,
2117            });
2118        }
2119    }
2120    // 2. Unknown options and type mismatches
2121    for (rule, rule_cfg) in &sourced.rules {
2122        if let Some(valid_keys) = registry.config_keys_for(rule) {
2123            for key in rule_cfg.values.keys() {
2124                if !valid_keys.contains(key) {
2125                    warnings.push(ConfigValidationWarning {
2126                        message: format!("Unknown option for rule {rule}: {key}"),
2127                        rule: Some(rule.clone()),
2128                        key: Some(key.clone()),
2129                    });
2130                } else {
2131                    // Type check: compare type of value to type of default
2132                    if let Some(expected) = registry.expected_value_for(rule, key) {
2133                        let actual = &rule_cfg.values[key].value;
2134                        if !toml_value_type_matches(expected, actual) {
2135                            warnings.push(ConfigValidationWarning {
2136                                message: format!(
2137                                    "Type mismatch for {}.{}: expected {}, got {}",
2138                                    rule,
2139                                    key,
2140                                    toml_type_name(expected),
2141                                    toml_type_name(actual)
2142                                ),
2143                                rule: Some(rule.clone()),
2144                                key: Some(key.clone()),
2145                            });
2146                        }
2147                    }
2148                }
2149            }
2150        }
2151    }
2152    // 3. Unknown global options (from unknown_keys)
2153    for (section, key) in &sourced.unknown_keys {
2154        if section.contains("[global]") {
2155            warnings.push(ConfigValidationWarning {
2156                message: format!("Unknown global option: {key}"),
2157                rule: None,
2158                key: Some(key.clone()),
2159            });
2160        }
2161    }
2162    warnings
2163}
2164
2165fn toml_type_name(val: &toml::Value) -> &'static str {
2166    match val {
2167        toml::Value::String(_) => "string",
2168        toml::Value::Integer(_) => "integer",
2169        toml::Value::Float(_) => "float",
2170        toml::Value::Boolean(_) => "boolean",
2171        toml::Value::Array(_) => "array",
2172        toml::Value::Table(_) => "table",
2173        toml::Value::Datetime(_) => "datetime",
2174    }
2175}
2176
2177fn toml_value_type_matches(expected: &toml::Value, actual: &toml::Value) -> bool {
2178    use toml::Value::*;
2179    match (expected, actual) {
2180        (String(_), String(_)) => true,
2181        (Integer(_), Integer(_)) => true,
2182        (Float(_), Float(_)) => true,
2183        (Boolean(_), Boolean(_)) => true,
2184        (Array(_), Array(_)) => true,
2185        (Table(_), Table(_)) => true,
2186        (Datetime(_), Datetime(_)) => true,
2187        // Allow integer for float
2188        (Float(_), Integer(_)) => true,
2189        _ => false,
2190    }
2191}
2192
2193/// Parses pyproject.toml content and extracts the [tool.rumdl] section if present.
2194fn parse_pyproject_toml(content: &str, path: &str) -> Result<Option<SourcedConfigFragment>, ConfigError> {
2195    let doc: toml::Value =
2196        toml::from_str(content).map_err(|e| ConfigError::ParseError(format!("{path}: Failed to parse TOML: {e}")))?;
2197    let mut fragment = SourcedConfigFragment::default();
2198    let source = ConfigSource::PyprojectToml;
2199    let file = Some(path.to_string());
2200
2201    // 1. Handle [tool.rumdl] and [tool.rumdl.global] sections
2202    if let Some(rumdl_config) = doc.get("tool").and_then(|t| t.get("rumdl"))
2203        && let Some(rumdl_table) = rumdl_config.as_table()
2204    {
2205        // Helper function to extract global config from a table
2206        let extract_global_config = |fragment: &mut SourcedConfigFragment, table: &toml::value::Table| {
2207            // Extract global options from the given table
2208            if let Some(enable) = table.get("enable")
2209                && let Ok(values) = Vec::<String>::deserialize(enable.clone())
2210            {
2211                // Normalize rule names in the list
2212                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2213                fragment
2214                    .global
2215                    .enable
2216                    .push_override(normalized_values, source, file.clone(), None);
2217            }
2218
2219            if let Some(disable) = table.get("disable")
2220                && let Ok(values) = Vec::<String>::deserialize(disable.clone())
2221            {
2222                // Re-enable normalization
2223                let normalized_values: Vec<String> = values.into_iter().map(|s| normalize_key(&s)).collect();
2224                fragment
2225                    .global
2226                    .disable
2227                    .push_override(normalized_values, source, file.clone(), None);
2228            }
2229
2230            if let Some(include) = table.get("include")
2231                && let Ok(values) = Vec::<String>::deserialize(include.clone())
2232            {
2233                fragment
2234                    .global
2235                    .include
2236                    .push_override(values, source, file.clone(), None);
2237            }
2238
2239            if let Some(exclude) = table.get("exclude")
2240                && let Ok(values) = Vec::<String>::deserialize(exclude.clone())
2241            {
2242                fragment
2243                    .global
2244                    .exclude
2245                    .push_override(values, source, file.clone(), None);
2246            }
2247
2248            if let Some(respect_gitignore) = table
2249                .get("respect-gitignore")
2250                .or_else(|| table.get("respect_gitignore"))
2251                && let Ok(value) = bool::deserialize(respect_gitignore.clone())
2252            {
2253                fragment
2254                    .global
2255                    .respect_gitignore
2256                    .push_override(value, source, file.clone(), None);
2257            }
2258
2259            if let Some(force_exclude) = table.get("force-exclude").or_else(|| table.get("force_exclude"))
2260                && let Ok(value) = bool::deserialize(force_exclude.clone())
2261            {
2262                fragment
2263                    .global
2264                    .force_exclude
2265                    .push_override(value, source, file.clone(), None);
2266            }
2267
2268            if let Some(output_format) = table.get("output-format").or_else(|| table.get("output_format"))
2269                && let Ok(value) = String::deserialize(output_format.clone())
2270            {
2271                if fragment.global.output_format.is_none() {
2272                    fragment.global.output_format = Some(SourcedValue::new(value.clone(), source));
2273                } else {
2274                    fragment
2275                        .global
2276                        .output_format
2277                        .as_mut()
2278                        .unwrap()
2279                        .push_override(value, source, file.clone(), None);
2280                }
2281            }
2282
2283            if let Some(fixable) = table.get("fixable")
2284                && let Ok(values) = Vec::<String>::deserialize(fixable.clone())
2285            {
2286                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2287                fragment
2288                    .global
2289                    .fixable
2290                    .push_override(normalized_values, source, file.clone(), None);
2291            }
2292
2293            if let Some(unfixable) = table.get("unfixable")
2294                && let Ok(values) = Vec::<String>::deserialize(unfixable.clone())
2295            {
2296                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2297                fragment
2298                    .global
2299                    .unfixable
2300                    .push_override(normalized_values, source, file.clone(), None);
2301            }
2302
2303            if let Some(flavor) = table.get("flavor")
2304                && let Ok(value) = MarkdownFlavor::deserialize(flavor.clone())
2305            {
2306                fragment.global.flavor.push_override(value, source, file.clone(), None);
2307            }
2308
2309            // Handle line-length special case - this should set the global line_length
2310            if let Some(line_length) = table.get("line-length").or_else(|| table.get("line_length"))
2311                && let Ok(value) = u64::deserialize(line_length.clone())
2312            {
2313                fragment
2314                    .global
2315                    .line_length
2316                    .push_override(value, source, file.clone(), None);
2317
2318                // Also add to MD013 rule config for backward compatibility
2319                let norm_md013_key = normalize_key("MD013");
2320                let rule_entry = fragment.rules.entry(norm_md013_key).or_default();
2321                let norm_line_length_key = normalize_key("line-length");
2322                let sv = rule_entry
2323                    .values
2324                    .entry(norm_line_length_key)
2325                    .or_insert_with(|| SourcedValue::new(line_length.clone(), ConfigSource::Default));
2326                sv.push_override(line_length.clone(), source, file.clone(), None);
2327            }
2328        };
2329
2330        // First, check for [tool.rumdl.global] section
2331        if let Some(global_table) = rumdl_table.get("global").and_then(|g| g.as_table()) {
2332            extract_global_config(&mut fragment, global_table);
2333        }
2334
2335        // Also extract global options from [tool.rumdl] directly (for flat structure)
2336        extract_global_config(&mut fragment, rumdl_table);
2337
2338        // --- Extract per-file-ignores configurations ---
2339        // Check both hyphenated and underscored versions for compatibility
2340        let per_file_ignores_key = rumdl_table
2341            .get("per-file-ignores")
2342            .or_else(|| rumdl_table.get("per_file_ignores"));
2343
2344        if let Some(per_file_ignores_value) = per_file_ignores_key
2345            && let Some(per_file_table) = per_file_ignores_value.as_table()
2346        {
2347            let mut per_file_map = HashMap::new();
2348            for (pattern, rules_value) in per_file_table {
2349                if let Ok(rules) = Vec::<String>::deserialize(rules_value.clone()) {
2350                    let normalized_rules = rules.into_iter().map(|s| normalize_key(&s)).collect();
2351                    per_file_map.insert(pattern.clone(), normalized_rules);
2352                } else {
2353                    log::warn!(
2354                        "[WARN] Expected array for per-file-ignores pattern '{pattern}' in {path}, found {rules_value:?}"
2355                    );
2356                }
2357            }
2358            fragment
2359                .per_file_ignores
2360                .push_override(per_file_map, source, file.clone(), None);
2361        }
2362
2363        // --- Extract rule-specific configurations ---
2364        for (key, value) in rumdl_table {
2365            let norm_rule_key = normalize_key(key);
2366
2367            // Skip keys already handled as global or special cases
2368            if [
2369                "enable",
2370                "disable",
2371                "include",
2372                "exclude",
2373                "respect_gitignore",
2374                "respect-gitignore", // Added kebab-case here too
2375                "force_exclude",
2376                "force-exclude",
2377                "line_length",
2378                "line-length",
2379                "output_format",
2380                "output-format",
2381                "fixable",
2382                "unfixable",
2383                "per-file-ignores",
2384                "per_file_ignores",
2385                "global",
2386            ]
2387            .contains(&norm_rule_key.as_str())
2388            {
2389                continue;
2390            }
2391
2392            // Explicitly check if the key looks like a rule name (e.g., starts with 'md')
2393            // AND if the value is actually a TOML table before processing as rule config.
2394            // This prevents misinterpreting other top-level keys under [tool.rumdl]
2395            let norm_rule_key_upper = norm_rule_key.to_ascii_uppercase();
2396            if norm_rule_key_upper.len() == 5
2397                && norm_rule_key_upper.starts_with("MD")
2398                && norm_rule_key_upper[2..].chars().all(|c| c.is_ascii_digit())
2399                && value.is_table()
2400            {
2401                if let Some(rule_config_table) = value.as_table() {
2402                    // Get the entry for this rule (e.g., "md013")
2403                    let rule_entry = fragment.rules.entry(norm_rule_key_upper).or_default();
2404                    for (rk, rv) in rule_config_table {
2405                        let norm_rk = normalize_key(rk); // Normalize the config key itself
2406
2407                        let toml_val = rv.clone();
2408
2409                        let sv = rule_entry
2410                            .values
2411                            .entry(norm_rk.clone())
2412                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), ConfigSource::Default));
2413                        sv.push_override(toml_val, source, file.clone(), None);
2414                    }
2415                }
2416            } else {
2417                // Key is not a global/special key, doesn't start with 'md', or isn't a table.
2418                // TODO: Track unknown keys/sections if necessary for validation later.
2419                // eprintln!("[DEBUG parse_pyproject] Skipping key '{}' as it's not a recognized rule table.", key);
2420            }
2421        }
2422    }
2423
2424    // 2. Handle [tool.rumdl.MDxxx] sections as rule-specific config (nested under [tool])
2425    if let Some(tool_table) = doc.get("tool").and_then(|t| t.as_table()) {
2426        for (key, value) in tool_table.iter() {
2427            if let Some(rule_name) = key.strip_prefix("rumdl.") {
2428                let norm_rule_name = normalize_key(rule_name);
2429                if norm_rule_name.len() == 5
2430                    && norm_rule_name.to_ascii_uppercase().starts_with("MD")
2431                    && norm_rule_name[2..].chars().all(|c| c.is_ascii_digit())
2432                    && let Some(rule_table) = value.as_table()
2433                {
2434                    let rule_entry = fragment.rules.entry(norm_rule_name.to_ascii_uppercase()).or_default();
2435                    for (rk, rv) in rule_table {
2436                        let norm_rk = normalize_key(rk);
2437                        let toml_val = rv.clone();
2438                        let sv = rule_entry
2439                            .values
2440                            .entry(norm_rk.clone())
2441                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), source));
2442                        sv.push_override(toml_val, source, file.clone(), None);
2443                    }
2444                }
2445            }
2446        }
2447    }
2448
2449    // 3. Handle [tool.rumdl.MDxxx] sections as top-level keys (e.g., [tool.rumdl.MD007])
2450    if let Some(doc_table) = doc.as_table() {
2451        for (key, value) in doc_table.iter() {
2452            if let Some(rule_name) = key.strip_prefix("tool.rumdl.") {
2453                let norm_rule_name = normalize_key(rule_name);
2454                if norm_rule_name.len() == 5
2455                    && norm_rule_name.to_ascii_uppercase().starts_with("MD")
2456                    && norm_rule_name[2..].chars().all(|c| c.is_ascii_digit())
2457                    && let Some(rule_table) = value.as_table()
2458                {
2459                    let rule_entry = fragment.rules.entry(norm_rule_name.to_ascii_uppercase()).or_default();
2460                    for (rk, rv) in rule_table {
2461                        let norm_rk = normalize_key(rk);
2462                        let toml_val = rv.clone();
2463                        let sv = rule_entry
2464                            .values
2465                            .entry(norm_rk.clone())
2466                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), source));
2467                        sv.push_override(toml_val, source, file.clone(), None);
2468                    }
2469                }
2470            }
2471        }
2472    }
2473
2474    // Only return Some(fragment) if any config was found
2475    let has_any = !fragment.global.enable.value.is_empty()
2476        || !fragment.global.disable.value.is_empty()
2477        || !fragment.global.include.value.is_empty()
2478        || !fragment.global.exclude.value.is_empty()
2479        || !fragment.global.fixable.value.is_empty()
2480        || !fragment.global.unfixable.value.is_empty()
2481        || fragment.global.output_format.is_some()
2482        || !fragment.per_file_ignores.value.is_empty()
2483        || !fragment.rules.is_empty();
2484    if has_any { Ok(Some(fragment)) } else { Ok(None) }
2485}
2486
2487/// Parses rumdl.toml / .rumdl.toml content.
2488fn parse_rumdl_toml(content: &str, path: &str) -> Result<SourcedConfigFragment, ConfigError> {
2489    let doc = content
2490        .parse::<DocumentMut>()
2491        .map_err(|e| ConfigError::ParseError(format!("{path}: Failed to parse TOML: {e}")))?;
2492    let mut fragment = SourcedConfigFragment::default();
2493    let source = ConfigSource::RumdlToml;
2494    let file = Some(path.to_string());
2495
2496    // Define known rules before the loop
2497    let all_rules = rules::all_rules(&Config::default());
2498    let registry = RuleRegistry::from_rules(&all_rules);
2499    let known_rule_names: BTreeSet<String> = registry
2500        .rule_names()
2501        .into_iter()
2502        .map(|s| s.to_ascii_uppercase())
2503        .collect();
2504
2505    // Handle [global] section
2506    if let Some(global_item) = doc.get("global")
2507        && let Some(global_table) = global_item.as_table()
2508    {
2509        for (key, value_item) in global_table.iter() {
2510            let norm_key = normalize_key(key);
2511            match norm_key.as_str() {
2512                "enable" | "disable" | "include" | "exclude" => {
2513                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
2514                        // Corrected: Iterate directly over the Formatted<Array>
2515                        let values: Vec<String> = formatted_array
2516                                .iter()
2517                                .filter_map(|item| item.as_str()) // Extract strings
2518                                .map(|s| s.to_string())
2519                                .collect();
2520
2521                        // Normalize rule names for enable/disable
2522                        let final_values = if norm_key == "enable" || norm_key == "disable" {
2523                            // Corrected: Pass &str to normalize_key
2524                            values.into_iter().map(|s| normalize_key(&s)).collect()
2525                        } else {
2526                            values
2527                        };
2528
2529                        match norm_key.as_str() {
2530                            "enable" => fragment
2531                                .global
2532                                .enable
2533                                .push_override(final_values, source, file.clone(), None),
2534                            "disable" => {
2535                                fragment
2536                                    .global
2537                                    .disable
2538                                    .push_override(final_values, source, file.clone(), None)
2539                            }
2540                            "include" => {
2541                                fragment
2542                                    .global
2543                                    .include
2544                                    .push_override(final_values, source, file.clone(), None)
2545                            }
2546                            "exclude" => {
2547                                fragment
2548                                    .global
2549                                    .exclude
2550                                    .push_override(final_values, source, file.clone(), None)
2551                            }
2552                            _ => unreachable!(), // Should not happen due to outer match
2553                        }
2554                    } else {
2555                        log::warn!(
2556                            "[WARN] Expected array for global key '{}' in {}, found {}",
2557                            key,
2558                            path,
2559                            value_item.type_name()
2560                        );
2561                    }
2562                }
2563                "respect_gitignore" | "respect-gitignore" => {
2564                    // Handle both cases
2565                    if let Some(toml_edit::Value::Boolean(formatted_bool)) = value_item.as_value() {
2566                        let val = *formatted_bool.value();
2567                        fragment
2568                            .global
2569                            .respect_gitignore
2570                            .push_override(val, source, file.clone(), None);
2571                    } else {
2572                        log::warn!(
2573                            "[WARN] Expected boolean for global key '{}' in {}, found {}",
2574                            key,
2575                            path,
2576                            value_item.type_name()
2577                        );
2578                    }
2579                }
2580                "force_exclude" | "force-exclude" => {
2581                    // Handle both cases
2582                    if let Some(toml_edit::Value::Boolean(formatted_bool)) = value_item.as_value() {
2583                        let val = *formatted_bool.value();
2584                        fragment
2585                            .global
2586                            .force_exclude
2587                            .push_override(val, source, file.clone(), None);
2588                    } else {
2589                        log::warn!(
2590                            "[WARN] Expected boolean for global key '{}' in {}, found {}",
2591                            key,
2592                            path,
2593                            value_item.type_name()
2594                        );
2595                    }
2596                }
2597                "line_length" | "line-length" => {
2598                    // Handle both cases
2599                    if let Some(toml_edit::Value::Integer(formatted_int)) = value_item.as_value() {
2600                        let val = *formatted_int.value() as u64;
2601                        fragment
2602                            .global
2603                            .line_length
2604                            .push_override(val, source, file.clone(), None);
2605                    } else {
2606                        log::warn!(
2607                            "[WARN] Expected integer for global key '{}' in {}, found {}",
2608                            key,
2609                            path,
2610                            value_item.type_name()
2611                        );
2612                    }
2613                }
2614                "output_format" | "output-format" => {
2615                    // Handle both cases
2616                    if let Some(toml_edit::Value::String(formatted_string)) = value_item.as_value() {
2617                        let val = formatted_string.value().clone();
2618                        if fragment.global.output_format.is_none() {
2619                            fragment.global.output_format = Some(SourcedValue::new(val.clone(), source));
2620                        } else {
2621                            fragment.global.output_format.as_mut().unwrap().push_override(
2622                                val,
2623                                source,
2624                                file.clone(),
2625                                None,
2626                            );
2627                        }
2628                    } else {
2629                        log::warn!(
2630                            "[WARN] Expected string for global key '{}' in {}, found {}",
2631                            key,
2632                            path,
2633                            value_item.type_name()
2634                        );
2635                    }
2636                }
2637                "fixable" => {
2638                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
2639                        let values: Vec<String> = formatted_array
2640                            .iter()
2641                            .filter_map(|item| item.as_str())
2642                            .map(normalize_key)
2643                            .collect();
2644                        fragment
2645                            .global
2646                            .fixable
2647                            .push_override(values, source, file.clone(), None);
2648                    } else {
2649                        log::warn!(
2650                            "[WARN] Expected array for global key '{}' in {}, found {}",
2651                            key,
2652                            path,
2653                            value_item.type_name()
2654                        );
2655                    }
2656                }
2657                "unfixable" => {
2658                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
2659                        let values: Vec<String> = formatted_array
2660                            .iter()
2661                            .filter_map(|item| item.as_str())
2662                            .map(normalize_key)
2663                            .collect();
2664                        fragment
2665                            .global
2666                            .unfixable
2667                            .push_override(values, source, file.clone(), None);
2668                    } else {
2669                        log::warn!(
2670                            "[WARN] Expected array for global key '{}' in {}, found {}",
2671                            key,
2672                            path,
2673                            value_item.type_name()
2674                        );
2675                    }
2676                }
2677                "flavor" => {
2678                    if let Some(toml_edit::Value::String(formatted_string)) = value_item.as_value() {
2679                        let val = formatted_string.value();
2680                        if let Ok(flavor) = MarkdownFlavor::from_str(val) {
2681                            fragment.global.flavor.push_override(flavor, source, file.clone(), None);
2682                        } else {
2683                            log::warn!("[WARN] Unknown markdown flavor '{val}' in {path}");
2684                        }
2685                    } else {
2686                        log::warn!(
2687                            "[WARN] Expected string for global key '{}' in {}, found {}",
2688                            key,
2689                            path,
2690                            value_item.type_name()
2691                        );
2692                    }
2693                }
2694                _ => {
2695                    // Add to unknown_keys for potential validation later
2696                    // fragment.unknown_keys.push(("[global]".to_string(), key.to_string()));
2697                    log::warn!("[WARN] Unknown key in [global] section of {path}: {key}");
2698                }
2699            }
2700        }
2701    }
2702
2703    // Handle [per-file-ignores] section
2704    if let Some(per_file_item) = doc.get("per-file-ignores")
2705        && let Some(per_file_table) = per_file_item.as_table()
2706    {
2707        let mut per_file_map = HashMap::new();
2708        for (pattern, value_item) in per_file_table.iter() {
2709            if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
2710                let rules: Vec<String> = formatted_array
2711                    .iter()
2712                    .filter_map(|item| item.as_str())
2713                    .map(normalize_key)
2714                    .collect();
2715                per_file_map.insert(pattern.to_string(), rules);
2716            } else {
2717                let type_name = value_item.type_name();
2718                log::warn!(
2719                    "[WARN] Expected array for per-file-ignores pattern '{pattern}' in {path}, found {type_name}"
2720                );
2721            }
2722        }
2723        fragment
2724            .per_file_ignores
2725            .push_override(per_file_map, source, file.clone(), None);
2726    }
2727
2728    // Rule-specific: all other top-level tables
2729    for (key, item) in doc.iter() {
2730        let norm_rule_name = key.to_ascii_uppercase();
2731        if !known_rule_names.contains(&norm_rule_name) {
2732            continue;
2733        }
2734        if let Some(tbl) = item.as_table() {
2735            let rule_entry = fragment.rules.entry(norm_rule_name.clone()).or_default();
2736            for (rk, rv_item) in tbl.iter() {
2737                let norm_rk = normalize_key(rk);
2738                let maybe_toml_val: Option<toml::Value> = match rv_item.as_value() {
2739                    Some(toml_edit::Value::String(formatted)) => Some(toml::Value::String(formatted.value().clone())),
2740                    Some(toml_edit::Value::Integer(formatted)) => Some(toml::Value::Integer(*formatted.value())),
2741                    Some(toml_edit::Value::Float(formatted)) => Some(toml::Value::Float(*formatted.value())),
2742                    Some(toml_edit::Value::Boolean(formatted)) => Some(toml::Value::Boolean(*formatted.value())),
2743                    Some(toml_edit::Value::Datetime(formatted)) => Some(toml::Value::Datetime(*formatted.value())),
2744                    Some(toml_edit::Value::Array(formatted_array)) => {
2745                        // Convert toml_edit Array to toml::Value::Array
2746                        let mut values = Vec::new();
2747                        for item in formatted_array.iter() {
2748                            match item {
2749                                toml_edit::Value::String(formatted) => {
2750                                    values.push(toml::Value::String(formatted.value().clone()))
2751                                }
2752                                toml_edit::Value::Integer(formatted) => {
2753                                    values.push(toml::Value::Integer(*formatted.value()))
2754                                }
2755                                toml_edit::Value::Float(formatted) => {
2756                                    values.push(toml::Value::Float(*formatted.value()))
2757                                }
2758                                toml_edit::Value::Boolean(formatted) => {
2759                                    values.push(toml::Value::Boolean(*formatted.value()))
2760                                }
2761                                toml_edit::Value::Datetime(formatted) => {
2762                                    values.push(toml::Value::Datetime(*formatted.value()))
2763                                }
2764                                _ => {
2765                                    log::warn!(
2766                                        "[WARN] Skipping unsupported array element type in key '{norm_rule_name}.{norm_rk}' in {path}"
2767                                    );
2768                                }
2769                            }
2770                        }
2771                        Some(toml::Value::Array(values))
2772                    }
2773                    Some(toml_edit::Value::InlineTable(_)) => {
2774                        log::warn!(
2775                            "[WARN] Skipping inline table value for key '{norm_rule_name}.{norm_rk}' in {path}. Table conversion not yet fully implemented in parser."
2776                        );
2777                        None
2778                    }
2779                    None => {
2780                        log::warn!(
2781                            "[WARN] Skipping non-value item for key '{norm_rule_name}.{norm_rk}' in {path}. Expected simple value."
2782                        );
2783                        None
2784                    }
2785                };
2786                if let Some(toml_val) = maybe_toml_val {
2787                    let sv = rule_entry
2788                        .values
2789                        .entry(norm_rk.clone())
2790                        .or_insert_with(|| SourcedValue::new(toml_val.clone(), ConfigSource::Default));
2791                    sv.push_override(toml_val, source, file.clone(), None);
2792                }
2793            }
2794        } else if item.is_value() {
2795            log::warn!("[WARN] Ignoring top-level value key in {path}: '{key}'. Expected a table like [{key}].");
2796        }
2797    }
2798
2799    Ok(fragment)
2800}
2801
2802/// Loads and converts a markdownlint config file (.json or .yaml) into a SourcedConfigFragment.
2803fn load_from_markdownlint(path: &str) -> Result<SourcedConfigFragment, ConfigError> {
2804    // Use the unified loader from markdownlint_config.rs
2805    let ml_config = crate::markdownlint_config::load_markdownlint_config(path)
2806        .map_err(|e| ConfigError::ParseError(format!("{path}: {e}")))?;
2807    Ok(ml_config.map_to_sourced_rumdl_config_fragment(Some(path)))
2808}