rumdl_lib/
config.rs

1//!
2//! This module defines configuration structures, loading logic, and provenance tracking for rumdl.
3//! Supports TOML, pyproject.toml, and markdownlint config formats, and provides merging and override logic.
4
5use crate::rule::Rule;
6use crate::rules;
7use lazy_static::lazy_static;
8use log;
9use serde::{Deserialize, Serialize};
10use std::collections::BTreeMap;
11use std::collections::{BTreeSet, HashMap, HashSet};
12use std::fmt;
13use std::fs;
14use std::io;
15use std::path::Path;
16use std::str::FromStr;
17use toml_edit::DocumentMut;
18
19/// Markdown flavor/dialect enumeration
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, schemars::JsonSchema)]
21#[serde(rename_all = "lowercase")]
22pub enum MarkdownFlavor {
23    /// Standard Markdown without flavor-specific adjustments
24    #[serde(rename = "standard", alias = "none", alias = "")]
25    #[default]
26    Standard,
27    /// MkDocs flavor with auto-reference support
28    #[serde(rename = "mkdocs")]
29    MkDocs,
30    // Future flavors can be added here when they have actual implementation differences
31    // Planned: GFM (GitHub Flavored Markdown) - for GitHub-specific features like tables, strikethrough
32    // Planned: CommonMark - for strict CommonMark compliance
33}
34
35impl fmt::Display for MarkdownFlavor {
36    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
37        match self {
38            MarkdownFlavor::Standard => write!(f, "standard"),
39            MarkdownFlavor::MkDocs => write!(f, "mkdocs"),
40        }
41    }
42}
43
44impl FromStr for MarkdownFlavor {
45    type Err = String;
46
47    fn from_str(s: &str) -> Result<Self, Self::Err> {
48        match s.to_lowercase().as_str() {
49            "standard" | "" | "none" => Ok(MarkdownFlavor::Standard),
50            "mkdocs" => Ok(MarkdownFlavor::MkDocs),
51            // Accept but warn about unimplemented flavors
52            "gfm" | "github" => {
53                eprintln!("Warning: GFM flavor not yet implemented, using standard");
54                Ok(MarkdownFlavor::Standard)
55            }
56            "commonmark" => {
57                eprintln!("Warning: CommonMark flavor not yet implemented, using standard");
58                Ok(MarkdownFlavor::Standard)
59            }
60            _ => Err(format!("Unknown markdown flavor: {s}")),
61        }
62    }
63}
64
65lazy_static! {
66    // Map common markdownlint config keys to rumdl rule names
67    static ref MARKDOWNLINT_KEY_MAP: HashMap<&'static str, &'static str> = {
68        let mut m = HashMap::new();
69        // Add mappings based on common markdownlint config names
70        // From https://github.com/DavidAnson/markdownlint/blob/main/schema/.markdownlint.jsonc
71        m.insert("ul-style", "md004");
72        m.insert("code-block-style", "md046");
73        m.insert("ul-indent", "md007"); // Example
74        m.insert("line-length", "md013"); // Example of a common one that might be top-level
75        // Add more mappings as needed based on markdownlint schema or observed usage
76        m
77    };
78}
79
80/// Normalizes configuration keys (rule names, option names) to lowercase kebab-case.
81pub fn normalize_key(key: &str) -> String {
82    // If the key looks like a rule name (e.g., MD013), uppercase it
83    if key.len() == 5 && key.to_ascii_lowercase().starts_with("md") && key[2..].chars().all(|c| c.is_ascii_digit()) {
84        key.to_ascii_uppercase()
85    } else {
86        key.replace('_', "-").to_ascii_lowercase()
87    }
88}
89
90/// Represents a rule-specific configuration
91#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, schemars::JsonSchema)]
92pub struct RuleConfig {
93    /// Configuration values for the rule
94    #[serde(flatten)]
95    #[schemars(schema_with = "arbitrary_value_schema")]
96    pub values: BTreeMap<String, toml::Value>,
97}
98
99/// Generate a JSON schema for arbitrary configuration values
100fn arbitrary_value_schema(_gen: &mut schemars::r#gen::SchemaGenerator) -> schemars::schema::Schema {
101    use schemars::schema::*;
102    Schema::Object(SchemaObject {
103        instance_type: Some(InstanceType::Object.into()),
104        object: Some(Box::new(ObjectValidation {
105            additional_properties: Some(Box::new(Schema::Bool(true))),
106            ..Default::default()
107        })),
108        ..Default::default()
109    })
110}
111
112/// Represents the complete configuration loaded from rumdl.toml
113#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, schemars::JsonSchema)]
114#[schemars(
115    description = "rumdl configuration for linting Markdown files. Rules can be configured individually using [MD###] sections with rule-specific options."
116)]
117pub struct Config {
118    /// Global configuration options
119    #[serde(default)]
120    pub global: GlobalConfig,
121
122    /// Per-file rule ignores: maps file patterns to lists of rules to ignore
123    /// Example: { "README.md": ["MD033"], "docs/**/*.md": ["MD013"] }
124    #[serde(default, rename = "per-file-ignores")]
125    pub per_file_ignores: HashMap<String, Vec<String>>,
126
127    /// Rule-specific configurations (e.g., MD013, MD007, MD044)
128    /// Each rule section can contain options specific to that rule.
129    ///
130    /// Common examples:
131    /// - MD013: line_length, code_blocks, tables, headings
132    /// - MD007: indent
133    /// - MD003: style ("atx", "atx_closed", "setext")
134    /// - MD044: names (array of proper names to check)
135    ///
136    /// See https://github.com/rvben/rumdl for full rule documentation.
137    #[serde(flatten)]
138    pub rules: BTreeMap<String, RuleConfig>,
139}
140
141impl Config {
142    /// Check if the Markdown flavor is set to MkDocs
143    pub fn is_mkdocs_flavor(&self) -> bool {
144        self.global.flavor == MarkdownFlavor::MkDocs
145    }
146
147    // Future methods for when GFM and CommonMark are implemented:
148    // pub fn is_gfm_flavor(&self) -> bool
149    // pub fn is_commonmark_flavor(&self) -> bool
150
151    /// Get the configured Markdown flavor
152    pub fn markdown_flavor(&self) -> MarkdownFlavor {
153        self.global.flavor
154    }
155
156    /// Legacy method for backwards compatibility - redirects to is_mkdocs_flavor
157    pub fn is_mkdocs_project(&self) -> bool {
158        self.is_mkdocs_flavor()
159    }
160
161    /// Get the set of rules that should be ignored for a specific file based on per-file-ignores configuration
162    /// Returns a HashSet of rule names (uppercase, e.g., "MD033") that match the given file path
163    pub fn get_ignored_rules_for_file(&self, file_path: &Path) -> HashSet<String> {
164        use globset::{Glob, GlobSetBuilder};
165
166        let mut ignored_rules = HashSet::new();
167
168        if self.per_file_ignores.is_empty() {
169            return ignored_rules;
170        }
171
172        // Build a globset for efficient matching
173        let mut builder = GlobSetBuilder::new();
174        let mut pattern_to_rules: Vec<(usize, &Vec<String>)> = Vec::new();
175
176        for (idx, (pattern, rules)) in self.per_file_ignores.iter().enumerate() {
177            if let Ok(glob) = Glob::new(pattern) {
178                builder.add(glob);
179                pattern_to_rules.push((idx, rules));
180            } else {
181                log::warn!("Invalid glob pattern in per-file-ignores: {pattern}");
182            }
183        }
184
185        let globset = match builder.build() {
186            Ok(gs) => gs,
187            Err(e) => {
188                log::error!("Failed to build globset for per-file-ignores: {e}");
189                return ignored_rules;
190            }
191        };
192
193        // Match the file path against all patterns
194        for match_idx in globset.matches(file_path) {
195            if let Some((_, rules)) = pattern_to_rules.get(match_idx) {
196                for rule in rules.iter() {
197                    // Normalize rule names to uppercase (MD033, md033 -> MD033)
198                    ignored_rules.insert(normalize_key(rule));
199                }
200            }
201        }
202
203        ignored_rules
204    }
205}
206
207/// Global configuration options
208#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, schemars::JsonSchema)]
209#[serde(default)]
210pub struct GlobalConfig {
211    /// Enabled rules
212    #[serde(default)]
213    pub enable: Vec<String>,
214
215    /// Disabled rules
216    #[serde(default)]
217    pub disable: Vec<String>,
218
219    /// Files to exclude
220    #[serde(default)]
221    pub exclude: Vec<String>,
222
223    /// Files to include
224    #[serde(default)]
225    pub include: Vec<String>,
226
227    /// Respect .gitignore files when scanning directories
228    #[serde(default = "default_respect_gitignore")]
229    pub respect_gitignore: bool,
230
231    /// Global line length setting (used by MD013 and other rules if not overridden)
232    #[serde(default = "default_line_length")]
233    pub line_length: u64,
234
235    /// Output format for linting results (e.g., "text", "json", "pylint", etc.)
236    #[serde(skip_serializing_if = "Option::is_none")]
237    pub output_format: Option<String>,
238
239    /// Rules that are allowed to be fixed when --fix is used
240    /// If specified, only these rules will be fixed
241    #[serde(default)]
242    pub fixable: Vec<String>,
243
244    /// Rules that should never be fixed, even when --fix is used
245    /// Takes precedence over fixable
246    #[serde(default)]
247    pub unfixable: Vec<String>,
248
249    /// Markdown flavor/dialect to use (mkdocs, gfm, commonmark, etc.)
250    /// When set, adjusts parsing and validation rules for that specific Markdown variant
251    #[serde(default)]
252    pub flavor: MarkdownFlavor,
253
254    /// [DEPRECATED] Whether to enforce exclude patterns for explicitly passed paths.
255    /// This option is deprecated as of v0.0.156 and has no effect.
256    /// Exclude patterns are now always respected, even for explicitly provided files.
257    /// This prevents duplication between rumdl config and tool configs like pre-commit.
258    #[serde(default)]
259    #[deprecated(since = "0.0.156", note = "Exclude patterns are now always respected")]
260    pub force_exclude: bool,
261}
262
263fn default_respect_gitignore() -> bool {
264    true
265}
266
267fn default_line_length() -> u64 {
268    80
269}
270
271// Add the Default impl
272impl Default for GlobalConfig {
273    #[allow(deprecated)]
274    fn default() -> Self {
275        Self {
276            enable: Vec::new(),
277            disable: Vec::new(),
278            exclude: Vec::new(),
279            include: Vec::new(),
280            respect_gitignore: true,
281            line_length: 80,
282            output_format: None,
283            fixable: Vec::new(),
284            unfixable: Vec::new(),
285            flavor: MarkdownFlavor::default(),
286            force_exclude: false,
287        }
288    }
289}
290
291const MARKDOWNLINT_CONFIG_FILES: &[&str] = &[
292    ".markdownlint.json",
293    ".markdownlint.jsonc",
294    ".markdownlint.yaml",
295    ".markdownlint.yml",
296    "markdownlint.json",
297    "markdownlint.jsonc",
298    "markdownlint.yaml",
299    "markdownlint.yml",
300];
301
302/// Create a default configuration file at the specified path
303pub fn create_default_config(path: &str) -> Result<(), ConfigError> {
304    // Check if file already exists
305    if Path::new(path).exists() {
306        return Err(ConfigError::FileExists { path: path.to_string() });
307    }
308
309    // Default configuration content
310    let default_config = r#"# rumdl configuration file
311
312# Global configuration options
313[global]
314# List of rules to disable (uncomment and modify as needed)
315# disable = ["MD013", "MD033"]
316
317# List of rules to enable exclusively (if provided, only these rules will run)
318# enable = ["MD001", "MD003", "MD004"]
319
320# List of file/directory patterns to include for linting (if provided, only these will be linted)
321# include = [
322#    "docs/*.md",
323#    "src/**/*.md",
324#    "README.md"
325# ]
326
327# List of file/directory patterns to exclude from linting
328exclude = [
329    # Common directories to exclude
330    ".git",
331    ".github",
332    "node_modules",
333    "vendor",
334    "dist",
335    "build",
336
337    # Specific files or patterns
338    "CHANGELOG.md",
339    "LICENSE.md",
340]
341
342# Respect .gitignore files when scanning directories (default: true)
343respect-gitignore = true
344
345# Markdown flavor/dialect (uncomment to enable)
346# Options: mkdocs, gfm, commonmark
347# flavor = "mkdocs"
348
349# Rule-specific configurations (uncomment and modify as needed)
350
351# [MD003]
352# style = "atx"  # Heading style (atx, atx_closed, setext)
353
354# [MD004]
355# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
356
357# [MD007]
358# indent = 4  # Unordered list indentation
359
360# [MD013]
361# line-length = 100  # Line length
362# code-blocks = false  # Exclude code blocks from line length check
363# tables = false  # Exclude tables from line length check
364# headings = true  # Include headings in line length check
365
366# [MD044]
367# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
368# code-blocks = false  # Check code blocks for proper names (default: false, skips code blocks)
369"#;
370
371    // Write the default configuration to the file
372    match fs::write(path, default_config) {
373        Ok(_) => Ok(()),
374        Err(err) => Err(ConfigError::IoError {
375            source: err,
376            path: path.to_string(),
377        }),
378    }
379}
380
381/// Errors that can occur when loading configuration
382#[derive(Debug, thiserror::Error)]
383pub enum ConfigError {
384    /// Failed to read the configuration file
385    #[error("Failed to read config file at {path}: {source}")]
386    IoError { source: io::Error, path: String },
387
388    /// Failed to parse the configuration content (TOML or JSON)
389    #[error("Failed to parse config: {0}")]
390    ParseError(String),
391
392    /// Configuration file already exists
393    #[error("Configuration file already exists at {path}")]
394    FileExists { path: String },
395}
396
397/// Get a rule-specific configuration value
398/// Automatically tries both the original key and normalized variants (kebab-case ↔ snake_case)
399/// for better markdownlint compatibility
400pub fn get_rule_config_value<T: serde::de::DeserializeOwned>(config: &Config, rule_name: &str, key: &str) -> Option<T> {
401    let norm_rule_name = rule_name.to_ascii_uppercase(); // Use uppercase for lookup
402
403    let rule_config = config.rules.get(&norm_rule_name)?;
404
405    // Try multiple key variants to support both underscore and kebab-case formats
406    let key_variants = [
407        key.to_string(),       // Original key as provided
408        normalize_key(key),    // Normalized key (lowercase, kebab-case)
409        key.replace('-', "_"), // Convert kebab-case to snake_case
410        key.replace('_', "-"), // Convert snake_case to kebab-case
411    ];
412
413    // Try each variant until we find a match
414    for variant in &key_variants {
415        if let Some(value) = rule_config.values.get(variant)
416            && let Ok(result) = T::deserialize(value.clone())
417        {
418            return Some(result);
419        }
420    }
421
422    None
423}
424
425/// Generate default rumdl configuration for pyproject.toml
426pub fn generate_pyproject_config() -> String {
427    let config_content = r#"
428[tool.rumdl]
429# Global configuration options
430line-length = 100
431disable = []
432exclude = [
433    # Common directories to exclude
434    ".git",
435    ".github",
436    "node_modules",
437    "vendor",
438    "dist",
439    "build",
440]
441respect-gitignore = true
442
443# Rule-specific configurations (uncomment and modify as needed)
444
445# [tool.rumdl.MD003]
446# style = "atx"  # Heading style (atx, atx_closed, setext)
447
448# [tool.rumdl.MD004]
449# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
450
451# [tool.rumdl.MD007]
452# indent = 4  # Unordered list indentation
453
454# [tool.rumdl.MD013]
455# line-length = 100  # Line length
456# code-blocks = false  # Exclude code blocks from line length check
457# tables = false  # Exclude tables from line length check
458# headings = true  # Include headings in line length check
459
460# [tool.rumdl.MD044]
461# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
462# code-blocks = false  # Check code blocks for proper names (default: false, skips code blocks)
463"#;
464
465    config_content.to_string()
466}
467
468#[cfg(test)]
469mod tests {
470    use super::*;
471    use std::fs;
472    use tempfile::tempdir;
473
474    #[test]
475    fn test_flavor_loading() {
476        let temp_dir = tempdir().unwrap();
477        let config_path = temp_dir.path().join(".rumdl.toml");
478        let config_content = r#"
479[global]
480flavor = "mkdocs"
481disable = ["MD001"]
482"#;
483        fs::write(&config_path, config_content).unwrap();
484
485        // Load the config
486        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
487        let config: Config = sourced.into();
488
489        // Check that flavor was loaded
490        assert_eq!(config.global.flavor, MarkdownFlavor::MkDocs);
491        assert!(config.is_mkdocs_flavor());
492        assert!(config.is_mkdocs_project()); // Test backwards compatibility
493        assert_eq!(config.global.disable, vec!["MD001".to_string()]);
494    }
495
496    #[test]
497    fn test_pyproject_toml_root_level_config() {
498        let temp_dir = tempdir().unwrap();
499        let config_path = temp_dir.path().join("pyproject.toml");
500
501        // Create a test pyproject.toml with root-level configuration
502        let content = r#"
503[tool.rumdl]
504line-length = 120
505disable = ["MD033"]
506enable = ["MD001", "MD004"]
507include = ["docs/*.md"]
508exclude = ["node_modules"]
509respect-gitignore = true
510        "#;
511
512        fs::write(&config_path, content).unwrap();
513
514        // Load the config with skip_auto_discovery to avoid environment config files
515        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
516        let config: Config = sourced.into(); // Convert to plain config for assertions
517
518        // Check global settings
519        assert_eq!(config.global.disable, vec!["MD033".to_string()]);
520        assert_eq!(config.global.enable, vec!["MD001".to_string(), "MD004".to_string()]);
521        // Should now contain only the configured pattern since auto-discovery is disabled
522        assert_eq!(config.global.include, vec!["docs/*.md".to_string()]);
523        assert_eq!(config.global.exclude, vec!["node_modules".to_string()]);
524        assert!(config.global.respect_gitignore);
525
526        // Check line-length was correctly added to MD013
527        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
528        assert_eq!(line_length, Some(120));
529    }
530
531    #[test]
532    fn test_pyproject_toml_snake_case_and_kebab_case() {
533        let temp_dir = tempdir().unwrap();
534        let config_path = temp_dir.path().join("pyproject.toml");
535
536        // Test with both kebab-case and snake_case variants
537        let content = r#"
538[tool.rumdl]
539line-length = 150
540respect_gitignore = true
541        "#;
542
543        fs::write(&config_path, content).unwrap();
544
545        // Load the config with skip_auto_discovery to avoid environment config files
546        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
547        let config: Config = sourced.into(); // Convert to plain config for assertions
548
549        // Check settings were correctly loaded
550        assert!(config.global.respect_gitignore);
551        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
552        assert_eq!(line_length, Some(150));
553    }
554
555    #[test]
556    fn test_md013_key_normalization_in_rumdl_toml() {
557        let temp_dir = tempdir().unwrap();
558        let config_path = temp_dir.path().join(".rumdl.toml");
559        let config_content = r#"
560[MD013]
561line_length = 111
562line-length = 222
563"#;
564        fs::write(&config_path, config_content).unwrap();
565        // Load the config with skip_auto_discovery to avoid environment config files
566        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
567        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
568        // Now we should only get the explicitly configured key
569        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
570        assert_eq!(keys, vec!["line-length"]);
571        let val = &rule_cfg.values["line-length"].value;
572        assert_eq!(val.as_integer(), Some(222));
573        // get_rule_config_value should retrieve the value for both snake_case and kebab-case
574        let config: Config = sourced.clone().into();
575        let v1 = get_rule_config_value::<usize>(&config, "MD013", "line_length");
576        let v2 = get_rule_config_value::<usize>(&config, "MD013", "line-length");
577        assert_eq!(v1, Some(222));
578        assert_eq!(v2, Some(222));
579    }
580
581    #[test]
582    fn test_md013_section_case_insensitivity() {
583        let temp_dir = tempdir().unwrap();
584        let config_path = temp_dir.path().join(".rumdl.toml");
585        let config_content = r#"
586[md013]
587line-length = 101
588
589[Md013]
590line-length = 102
591
592[MD013]
593line-length = 103
594"#;
595        fs::write(&config_path, config_content).unwrap();
596        // Load the config with skip_auto_discovery to avoid environment config files
597        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
598        let config: Config = sourced.clone().into();
599        // Only the last section should win, and be present
600        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
601        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
602        assert_eq!(keys, vec!["line-length"]);
603        let val = &rule_cfg.values["line-length"].value;
604        assert_eq!(val.as_integer(), Some(103));
605        let v = get_rule_config_value::<usize>(&config, "MD013", "line-length");
606        assert_eq!(v, Some(103));
607    }
608
609    #[test]
610    fn test_md013_key_snake_and_kebab_case() {
611        let temp_dir = tempdir().unwrap();
612        let config_path = temp_dir.path().join(".rumdl.toml");
613        let config_content = r#"
614[MD013]
615line_length = 201
616line-length = 202
617"#;
618        fs::write(&config_path, config_content).unwrap();
619        // Load the config with skip_auto_discovery to avoid environment config files
620        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
621        let config: Config = sourced.clone().into();
622        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
623        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
624        assert_eq!(keys, vec!["line-length"]);
625        let val = &rule_cfg.values["line-length"].value;
626        assert_eq!(val.as_integer(), Some(202));
627        let v1 = get_rule_config_value::<usize>(&config, "MD013", "line_length");
628        let v2 = get_rule_config_value::<usize>(&config, "MD013", "line-length");
629        assert_eq!(v1, Some(202));
630        assert_eq!(v2, Some(202));
631    }
632
633    #[test]
634    fn test_unknown_rule_section_is_ignored() {
635        let temp_dir = tempdir().unwrap();
636        let config_path = temp_dir.path().join(".rumdl.toml");
637        let config_content = r#"
638[MD999]
639foo = 1
640bar = 2
641[MD013]
642line-length = 303
643"#;
644        fs::write(&config_path, config_content).unwrap();
645        // Load the config with skip_auto_discovery to avoid environment config files
646        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
647        let config: Config = sourced.clone().into();
648        // MD999 should not be present
649        assert!(!sourced.rules.contains_key("MD999"));
650        // MD013 should be present and correct
651        let v = get_rule_config_value::<usize>(&config, "MD013", "line-length");
652        assert_eq!(v, Some(303));
653    }
654
655    #[test]
656    fn test_invalid_toml_syntax() {
657        let temp_dir = tempdir().unwrap();
658        let config_path = temp_dir.path().join(".rumdl.toml");
659
660        // Invalid TOML with unclosed string
661        let config_content = r#"
662[MD013]
663line-length = "unclosed string
664"#;
665        fs::write(&config_path, config_content).unwrap();
666
667        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
668        assert!(result.is_err());
669        match result.unwrap_err() {
670            ConfigError::ParseError(msg) => {
671                // The actual error message from toml parser might vary
672                assert!(msg.contains("expected") || msg.contains("invalid") || msg.contains("unterminated"));
673            }
674            _ => panic!("Expected ParseError"),
675        }
676    }
677
678    #[test]
679    fn test_wrong_type_for_config_value() {
680        let temp_dir = tempdir().unwrap();
681        let config_path = temp_dir.path().join(".rumdl.toml");
682
683        // line-length should be a number, not a string
684        let config_content = r#"
685[MD013]
686line-length = "not a number"
687"#;
688        fs::write(&config_path, config_content).unwrap();
689
690        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
691        let config: Config = sourced.into();
692
693        // The value should be loaded as a string, not converted
694        let rule_config = config.rules.get("MD013").unwrap();
695        let value = rule_config.values.get("line-length").unwrap();
696        assert!(matches!(value, toml::Value::String(_)));
697    }
698
699    #[test]
700    fn test_empty_config_file() {
701        let temp_dir = tempdir().unwrap();
702        let config_path = temp_dir.path().join(".rumdl.toml");
703
704        // Empty file
705        fs::write(&config_path, "").unwrap();
706
707        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
708        let config: Config = sourced.into();
709
710        // Should have default values
711        assert_eq!(config.global.line_length, 80);
712        assert!(config.global.respect_gitignore);
713        assert!(config.rules.is_empty());
714    }
715
716    #[test]
717    fn test_malformed_pyproject_toml() {
718        let temp_dir = tempdir().unwrap();
719        let config_path = temp_dir.path().join("pyproject.toml");
720
721        // Missing closing bracket
722        let content = r#"
723[tool.rumdl
724line-length = 120
725"#;
726        fs::write(&config_path, content).unwrap();
727
728        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
729        assert!(result.is_err());
730    }
731
732    #[test]
733    fn test_conflicting_config_values() {
734        let temp_dir = tempdir().unwrap();
735        let config_path = temp_dir.path().join(".rumdl.toml");
736
737        // Both enable and disable the same rule - these need to be in a global section
738        let config_content = r#"
739[global]
740enable = ["MD013"]
741disable = ["MD013"]
742"#;
743        fs::write(&config_path, config_content).unwrap();
744
745        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
746        let config: Config = sourced.into();
747
748        // Both should be present - resolution happens at runtime
749        assert!(config.global.enable.contains(&"MD013".to_string()));
750        assert!(config.global.disable.contains(&"MD013".to_string()));
751    }
752
753    #[test]
754    fn test_invalid_rule_names() {
755        let temp_dir = tempdir().unwrap();
756        let config_path = temp_dir.path().join(".rumdl.toml");
757
758        let config_content = r#"
759[global]
760enable = ["MD001", "NOT_A_RULE", "md002", "12345"]
761disable = ["MD-001", "MD_002"]
762"#;
763        fs::write(&config_path, config_content).unwrap();
764
765        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
766        let config: Config = sourced.into();
767
768        // All values should be preserved as-is
769        assert_eq!(config.global.enable.len(), 4);
770        assert_eq!(config.global.disable.len(), 2);
771    }
772
773    #[test]
774    fn test_deeply_nested_config() {
775        let temp_dir = tempdir().unwrap();
776        let config_path = temp_dir.path().join(".rumdl.toml");
777
778        // This should be ignored as we don't support nested tables within rule configs
779        let config_content = r#"
780[MD013]
781line-length = 100
782[MD013.nested]
783value = 42
784"#;
785        fs::write(&config_path, config_content).unwrap();
786
787        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
788        let config: Config = sourced.into();
789
790        let rule_config = config.rules.get("MD013").unwrap();
791        assert_eq!(
792            rule_config.values.get("line-length").unwrap(),
793            &toml::Value::Integer(100)
794        );
795        // Nested table should not be present
796        assert!(!rule_config.values.contains_key("nested"));
797    }
798
799    #[test]
800    fn test_unicode_in_config() {
801        let temp_dir = tempdir().unwrap();
802        let config_path = temp_dir.path().join(".rumdl.toml");
803
804        let config_content = r#"
805[global]
806include = ["文档/*.md", "ドキュメント/*.md"]
807exclude = ["测试/*", "🚀/*"]
808
809[MD013]
810line-length = 80
811message = "行太长了 🚨"
812"#;
813        fs::write(&config_path, config_content).unwrap();
814
815        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
816        let config: Config = sourced.into();
817
818        assert_eq!(config.global.include.len(), 2);
819        assert_eq!(config.global.exclude.len(), 2);
820        assert!(config.global.include[0].contains("文档"));
821        assert!(config.global.exclude[1].contains("🚀"));
822
823        let rule_config = config.rules.get("MD013").unwrap();
824        let message = rule_config.values.get("message").unwrap();
825        if let toml::Value::String(s) = message {
826            assert!(s.contains("行太长了"));
827            assert!(s.contains("🚨"));
828        }
829    }
830
831    #[test]
832    fn test_extremely_long_values() {
833        let temp_dir = tempdir().unwrap();
834        let config_path = temp_dir.path().join(".rumdl.toml");
835
836        let long_string = "a".repeat(10000);
837        let config_content = format!(
838            r#"
839[global]
840exclude = ["{long_string}"]
841
842[MD013]
843line-length = 999999999
844"#
845        );
846
847        fs::write(&config_path, config_content).unwrap();
848
849        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
850        let config: Config = sourced.into();
851
852        assert_eq!(config.global.exclude[0].len(), 10000);
853        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
854        assert_eq!(line_length, Some(999999999));
855    }
856
857    #[test]
858    fn test_config_with_comments() {
859        let temp_dir = tempdir().unwrap();
860        let config_path = temp_dir.path().join(".rumdl.toml");
861
862        let config_content = r#"
863[global]
864# This is a comment
865enable = ["MD001"] # Enable MD001
866# disable = ["MD002"] # This is commented out
867
868[MD013] # Line length rule
869line-length = 100 # Set to 100 characters
870# ignored = true # This setting is commented out
871"#;
872        fs::write(&config_path, config_content).unwrap();
873
874        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
875        let config: Config = sourced.into();
876
877        assert_eq!(config.global.enable, vec!["MD001"]);
878        assert!(config.global.disable.is_empty()); // Commented out
879
880        let rule_config = config.rules.get("MD013").unwrap();
881        assert_eq!(rule_config.values.len(), 1); // Only line-length
882        assert!(!rule_config.values.contains_key("ignored"));
883    }
884
885    #[test]
886    fn test_arrays_in_rule_config() {
887        let temp_dir = tempdir().unwrap();
888        let config_path = temp_dir.path().join(".rumdl.toml");
889
890        let config_content = r#"
891[MD002]
892levels = [1, 2, 3]
893tags = ["important", "critical"]
894mixed = [1, "two", true]
895"#;
896        fs::write(&config_path, config_content).unwrap();
897
898        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
899        let config: Config = sourced.into();
900
901        // Arrays should now be properly parsed
902        let rule_config = config.rules.get("MD002").expect("MD002 config should exist");
903
904        // Check that arrays are present and correctly parsed
905        assert!(rule_config.values.contains_key("levels"));
906        assert!(rule_config.values.contains_key("tags"));
907        assert!(rule_config.values.contains_key("mixed"));
908
909        // Verify array contents
910        if let Some(toml::Value::Array(levels)) = rule_config.values.get("levels") {
911            assert_eq!(levels.len(), 3);
912            assert_eq!(levels[0], toml::Value::Integer(1));
913            assert_eq!(levels[1], toml::Value::Integer(2));
914            assert_eq!(levels[2], toml::Value::Integer(3));
915        } else {
916            panic!("levels should be an array");
917        }
918
919        if let Some(toml::Value::Array(tags)) = rule_config.values.get("tags") {
920            assert_eq!(tags.len(), 2);
921            assert_eq!(tags[0], toml::Value::String("important".to_string()));
922            assert_eq!(tags[1], toml::Value::String("critical".to_string()));
923        } else {
924            panic!("tags should be an array");
925        }
926
927        if let Some(toml::Value::Array(mixed)) = rule_config.values.get("mixed") {
928            assert_eq!(mixed.len(), 3);
929            assert_eq!(mixed[0], toml::Value::Integer(1));
930            assert_eq!(mixed[1], toml::Value::String("two".to_string()));
931            assert_eq!(mixed[2], toml::Value::Boolean(true));
932        } else {
933            panic!("mixed should be an array");
934        }
935    }
936
937    #[test]
938    fn test_normalize_key_edge_cases() {
939        // Rule names
940        assert_eq!(normalize_key("MD001"), "MD001");
941        assert_eq!(normalize_key("md001"), "MD001");
942        assert_eq!(normalize_key("Md001"), "MD001");
943        assert_eq!(normalize_key("mD001"), "MD001");
944
945        // Non-rule names
946        assert_eq!(normalize_key("line_length"), "line-length");
947        assert_eq!(normalize_key("line-length"), "line-length");
948        assert_eq!(normalize_key("LINE_LENGTH"), "line-length");
949        assert_eq!(normalize_key("respect_gitignore"), "respect-gitignore");
950
951        // Edge cases
952        assert_eq!(normalize_key("MD"), "md"); // Too short to be a rule
953        assert_eq!(normalize_key("MD00"), "md00"); // Too short
954        assert_eq!(normalize_key("MD0001"), "md0001"); // Too long
955        assert_eq!(normalize_key("MDabc"), "mdabc"); // Non-digit
956        assert_eq!(normalize_key("MD00a"), "md00a"); // Partial digit
957        assert_eq!(normalize_key(""), "");
958        assert_eq!(normalize_key("_"), "-");
959        assert_eq!(normalize_key("___"), "---");
960    }
961
962    #[test]
963    fn test_missing_config_file() {
964        let temp_dir = tempdir().unwrap();
965        let config_path = temp_dir.path().join("nonexistent.toml");
966
967        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
968        assert!(result.is_err());
969        match result.unwrap_err() {
970            ConfigError::IoError { .. } => {}
971            _ => panic!("Expected IoError for missing file"),
972        }
973    }
974
975    #[test]
976    #[cfg(unix)]
977    fn test_permission_denied_config() {
978        use std::os::unix::fs::PermissionsExt;
979
980        let temp_dir = tempdir().unwrap();
981        let config_path = temp_dir.path().join(".rumdl.toml");
982
983        fs::write(&config_path, "enable = [\"MD001\"]").unwrap();
984
985        // Remove read permissions
986        let mut perms = fs::metadata(&config_path).unwrap().permissions();
987        perms.set_mode(0o000);
988        fs::set_permissions(&config_path, perms).unwrap();
989
990        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
991
992        // Restore permissions for cleanup
993        let mut perms = fs::metadata(&config_path).unwrap().permissions();
994        perms.set_mode(0o644);
995        fs::set_permissions(&config_path, perms).unwrap();
996
997        assert!(result.is_err());
998        match result.unwrap_err() {
999            ConfigError::IoError { .. } => {}
1000            _ => panic!("Expected IoError for permission denied"),
1001        }
1002    }
1003
1004    #[test]
1005    fn test_circular_reference_detection() {
1006        // This test is more conceptual since TOML doesn't support circular references
1007        // But we test that deeply nested structures don't cause stack overflow
1008        let temp_dir = tempdir().unwrap();
1009        let config_path = temp_dir.path().join(".rumdl.toml");
1010
1011        let mut config_content = String::from("[MD001]\n");
1012        for i in 0..100 {
1013            config_content.push_str(&format!("key{i} = {i}\n"));
1014        }
1015
1016        fs::write(&config_path, config_content).unwrap();
1017
1018        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1019        let config: Config = sourced.into();
1020
1021        let rule_config = config.rules.get("MD001").unwrap();
1022        assert_eq!(rule_config.values.len(), 100);
1023    }
1024
1025    #[test]
1026    fn test_special_toml_values() {
1027        let temp_dir = tempdir().unwrap();
1028        let config_path = temp_dir.path().join(".rumdl.toml");
1029
1030        let config_content = r#"
1031[MD001]
1032infinity = inf
1033neg_infinity = -inf
1034not_a_number = nan
1035datetime = 1979-05-27T07:32:00Z
1036local_date = 1979-05-27
1037local_time = 07:32:00
1038"#;
1039        fs::write(&config_path, config_content).unwrap();
1040
1041        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1042        let config: Config = sourced.into();
1043
1044        // Some values might not be parsed due to parser limitations
1045        if let Some(rule_config) = config.rules.get("MD001") {
1046            // Check special float values if present
1047            if let Some(toml::Value::Float(f)) = rule_config.values.get("infinity") {
1048                assert!(f.is_infinite() && f.is_sign_positive());
1049            }
1050            if let Some(toml::Value::Float(f)) = rule_config.values.get("neg_infinity") {
1051                assert!(f.is_infinite() && f.is_sign_negative());
1052            }
1053            if let Some(toml::Value::Float(f)) = rule_config.values.get("not_a_number") {
1054                assert!(f.is_nan());
1055            }
1056
1057            // Check datetime values if present
1058            if let Some(val) = rule_config.values.get("datetime") {
1059                assert!(matches!(val, toml::Value::Datetime(_)));
1060            }
1061            // Note: local_date and local_time might not be parsed by the current implementation
1062        }
1063    }
1064
1065    #[test]
1066    fn test_default_config_passes_validation() {
1067        use crate::rules;
1068
1069        let temp_dir = tempdir().unwrap();
1070        let config_path = temp_dir.path().join(".rumdl.toml");
1071        let config_path_str = config_path.to_str().unwrap();
1072
1073        // Create the default config using the same function that `rumdl init` uses
1074        create_default_config(config_path_str).unwrap();
1075
1076        // Load it back as a SourcedConfig
1077        let sourced =
1078            SourcedConfig::load(Some(config_path_str), None).expect("Default config should load successfully");
1079
1080        // Create the rule registry
1081        let all_rules = rules::all_rules(&Config::default());
1082        let registry = RuleRegistry::from_rules(&all_rules);
1083
1084        // Validate the config
1085        let warnings = validate_config_sourced(&sourced, &registry);
1086
1087        // The default config should have no warnings
1088        if !warnings.is_empty() {
1089            for warning in &warnings {
1090                eprintln!("Config validation warning: {}", warning.message);
1091                if let Some(rule) = &warning.rule {
1092                    eprintln!("  Rule: {rule}");
1093                }
1094                if let Some(key) = &warning.key {
1095                    eprintln!("  Key: {key}");
1096                }
1097            }
1098        }
1099        assert!(
1100            warnings.is_empty(),
1101            "Default config from rumdl init should pass validation without warnings"
1102        );
1103    }
1104
1105    #[test]
1106    fn test_per_file_ignores_config_parsing() {
1107        let temp_dir = tempdir().unwrap();
1108        let config_path = temp_dir.path().join(".rumdl.toml");
1109        let config_content = r#"
1110[per-file-ignores]
1111"README.md" = ["MD033"]
1112"docs/**/*.md" = ["MD013", "MD033"]
1113"test/*.md" = ["MD041"]
1114"#;
1115        fs::write(&config_path, config_content).unwrap();
1116
1117        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1118        let config: Config = sourced.into();
1119
1120        // Verify per-file-ignores was loaded
1121        assert_eq!(config.per_file_ignores.len(), 3);
1122        assert_eq!(
1123            config.per_file_ignores.get("README.md"),
1124            Some(&vec!["MD033".to_string()])
1125        );
1126        assert_eq!(
1127            config.per_file_ignores.get("docs/**/*.md"),
1128            Some(&vec!["MD013".to_string(), "MD033".to_string()])
1129        );
1130        assert_eq!(
1131            config.per_file_ignores.get("test/*.md"),
1132            Some(&vec!["MD041".to_string()])
1133        );
1134    }
1135
1136    #[test]
1137    fn test_per_file_ignores_glob_matching() {
1138        use std::path::PathBuf;
1139
1140        let temp_dir = tempdir().unwrap();
1141        let config_path = temp_dir.path().join(".rumdl.toml");
1142        let config_content = r#"
1143[per-file-ignores]
1144"README.md" = ["MD033"]
1145"docs/**/*.md" = ["MD013"]
1146"**/test_*.md" = ["MD041"]
1147"#;
1148        fs::write(&config_path, config_content).unwrap();
1149
1150        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1151        let config: Config = sourced.into();
1152
1153        // Test exact match
1154        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1155        assert!(ignored.contains("MD033"));
1156        assert_eq!(ignored.len(), 1);
1157
1158        // Test glob pattern matching
1159        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("docs/api/overview.md"));
1160        assert!(ignored.contains("MD013"));
1161        assert_eq!(ignored.len(), 1);
1162
1163        // Test recursive glob pattern
1164        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("tests/fixtures/test_example.md"));
1165        assert!(ignored.contains("MD041"));
1166        assert_eq!(ignored.len(), 1);
1167
1168        // Test non-matching path
1169        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("other/file.md"));
1170        assert!(ignored.is_empty());
1171    }
1172
1173    #[test]
1174    fn test_per_file_ignores_pyproject_toml() {
1175        let temp_dir = tempdir().unwrap();
1176        let config_path = temp_dir.path().join("pyproject.toml");
1177        let config_content = r#"
1178[tool.rumdl]
1179[tool.rumdl.per-file-ignores]
1180"README.md" = ["MD033", "MD013"]
1181"generated/*.md" = ["MD041"]
1182"#;
1183        fs::write(&config_path, config_content).unwrap();
1184
1185        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1186        let config: Config = sourced.into();
1187
1188        // Verify per-file-ignores was loaded from pyproject.toml
1189        assert_eq!(config.per_file_ignores.len(), 2);
1190        assert_eq!(
1191            config.per_file_ignores.get("README.md"),
1192            Some(&vec!["MD033".to_string(), "MD013".to_string()])
1193        );
1194        assert_eq!(
1195            config.per_file_ignores.get("generated/*.md"),
1196            Some(&vec!["MD041".to_string()])
1197        );
1198    }
1199
1200    #[test]
1201    fn test_per_file_ignores_multiple_patterns_match() {
1202        use std::path::PathBuf;
1203
1204        let temp_dir = tempdir().unwrap();
1205        let config_path = temp_dir.path().join(".rumdl.toml");
1206        let config_content = r#"
1207[per-file-ignores]
1208"docs/**/*.md" = ["MD013"]
1209"**/api/*.md" = ["MD033"]
1210"docs/api/overview.md" = ["MD041"]
1211"#;
1212        fs::write(&config_path, config_content).unwrap();
1213
1214        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1215        let config: Config = sourced.into();
1216
1217        // File matches multiple patterns - should get union of all rules
1218        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("docs/api/overview.md"));
1219        assert_eq!(ignored.len(), 3);
1220        assert!(ignored.contains("MD013"));
1221        assert!(ignored.contains("MD033"));
1222        assert!(ignored.contains("MD041"));
1223    }
1224
1225    #[test]
1226    fn test_per_file_ignores_rule_name_normalization() {
1227        use std::path::PathBuf;
1228
1229        let temp_dir = tempdir().unwrap();
1230        let config_path = temp_dir.path().join(".rumdl.toml");
1231        let config_content = r#"
1232[per-file-ignores]
1233"README.md" = ["md033", "MD013", "Md041"]
1234"#;
1235        fs::write(&config_path, config_content).unwrap();
1236
1237        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1238        let config: Config = sourced.into();
1239
1240        // All rule names should be normalized to uppercase
1241        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1242        assert_eq!(ignored.len(), 3);
1243        assert!(ignored.contains("MD033"));
1244        assert!(ignored.contains("MD013"));
1245        assert!(ignored.contains("MD041"));
1246    }
1247
1248    #[test]
1249    fn test_per_file_ignores_invalid_glob_pattern() {
1250        use std::path::PathBuf;
1251
1252        let temp_dir = tempdir().unwrap();
1253        let config_path = temp_dir.path().join(".rumdl.toml");
1254        let config_content = r#"
1255[per-file-ignores]
1256"[invalid" = ["MD033"]
1257"valid/*.md" = ["MD013"]
1258"#;
1259        fs::write(&config_path, config_content).unwrap();
1260
1261        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1262        let config: Config = sourced.into();
1263
1264        // Invalid pattern should be skipped, valid pattern should work
1265        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("valid/test.md"));
1266        assert!(ignored.contains("MD013"));
1267
1268        // Invalid pattern should not cause issues
1269        let ignored2 = config.get_ignored_rules_for_file(&PathBuf::from("[invalid"));
1270        assert!(ignored2.is_empty());
1271    }
1272
1273    #[test]
1274    fn test_per_file_ignores_empty_section() {
1275        use std::path::PathBuf;
1276
1277        let temp_dir = tempdir().unwrap();
1278        let config_path = temp_dir.path().join(".rumdl.toml");
1279        let config_content = r#"
1280[global]
1281disable = ["MD001"]
1282
1283[per-file-ignores]
1284"#;
1285        fs::write(&config_path, config_content).unwrap();
1286
1287        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1288        let config: Config = sourced.into();
1289
1290        // Empty per-file-ignores should work fine
1291        assert_eq!(config.per_file_ignores.len(), 0);
1292        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1293        assert!(ignored.is_empty());
1294    }
1295
1296    #[test]
1297    fn test_per_file_ignores_with_underscores_in_pyproject() {
1298        let temp_dir = tempdir().unwrap();
1299        let config_path = temp_dir.path().join("pyproject.toml");
1300        let config_content = r#"
1301[tool.rumdl]
1302[tool.rumdl.per_file_ignores]
1303"README.md" = ["MD033"]
1304"#;
1305        fs::write(&config_path, config_content).unwrap();
1306
1307        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1308        let config: Config = sourced.into();
1309
1310        // Should support both per-file-ignores and per_file_ignores
1311        assert_eq!(config.per_file_ignores.len(), 1);
1312        assert_eq!(
1313            config.per_file_ignores.get("README.md"),
1314            Some(&vec!["MD033".to_string()])
1315        );
1316    }
1317
1318    #[test]
1319    fn test_generate_json_schema() {
1320        use schemars::schema_for;
1321        use std::env;
1322
1323        let schema = schema_for!(Config);
1324        let schema_json = serde_json::to_string_pretty(&schema).expect("Failed to serialize schema");
1325
1326        // Write schema to file if RUMDL_UPDATE_SCHEMA env var is set
1327        if env::var("RUMDL_UPDATE_SCHEMA").is_ok() {
1328            let schema_path = env::current_dir().unwrap().join("rumdl.schema.json");
1329            fs::write(&schema_path, &schema_json).expect("Failed to write schema file");
1330            println!("Schema written to: {}", schema_path.display());
1331        }
1332
1333        // Basic validation that schema was generated
1334        assert!(schema_json.contains("\"title\": \"Config\""));
1335        assert!(schema_json.contains("\"global\""));
1336        assert!(schema_json.contains("\"per-file-ignores\""));
1337    }
1338}
1339
1340#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1341pub enum ConfigSource {
1342    Default,
1343    RumdlToml,
1344    PyprojectToml,
1345    Cli,
1346    /// Value was loaded from a markdownlint config file (e.g. .markdownlint.json, .markdownlint.yaml)
1347    Markdownlint,
1348}
1349
1350#[derive(Debug, Clone)]
1351pub struct ConfigOverride<T> {
1352    pub value: T,
1353    pub source: ConfigSource,
1354    pub file: Option<String>,
1355    pub line: Option<usize>,
1356}
1357
1358#[derive(Debug, Clone)]
1359pub struct SourcedValue<T> {
1360    pub value: T,
1361    pub source: ConfigSource,
1362    pub overrides: Vec<ConfigOverride<T>>,
1363}
1364
1365impl<T: Clone> SourcedValue<T> {
1366    pub fn new(value: T, source: ConfigSource) -> Self {
1367        Self {
1368            value: value.clone(),
1369            source,
1370            overrides: vec![ConfigOverride {
1371                value,
1372                source,
1373                file: None,
1374                line: None,
1375            }],
1376        }
1377    }
1378
1379    /// Merges a new override into this SourcedValue based on source precedence.
1380    /// If the new source has higher or equal precedence, the value and source are updated,
1381    /// and the new override is added to the history.
1382    pub fn merge_override(
1383        &mut self,
1384        new_value: T,
1385        new_source: ConfigSource,
1386        new_file: Option<String>,
1387        new_line: Option<usize>,
1388    ) {
1389        // Helper function to get precedence, defined locally or globally
1390        fn source_precedence(src: ConfigSource) -> u8 {
1391            match src {
1392                ConfigSource::Default => 0,
1393                ConfigSource::PyprojectToml => 1,
1394                ConfigSource::Markdownlint => 2,
1395                ConfigSource::RumdlToml => 3,
1396                ConfigSource::Cli => 4,
1397            }
1398        }
1399
1400        if source_precedence(new_source) >= source_precedence(self.source) {
1401            self.value = new_value.clone();
1402            self.source = new_source;
1403            self.overrides.push(ConfigOverride {
1404                value: new_value,
1405                source: new_source,
1406                file: new_file,
1407                line: new_line,
1408            });
1409        }
1410    }
1411
1412    pub fn push_override(&mut self, value: T, source: ConfigSource, file: Option<String>, line: Option<usize>) {
1413        // This is essentially merge_override without the precedence check
1414        // We might consolidate these later, but keep separate for now during refactor
1415        self.value = value.clone();
1416        self.source = source;
1417        self.overrides.push(ConfigOverride {
1418            value,
1419            source,
1420            file,
1421            line,
1422        });
1423    }
1424}
1425
1426#[derive(Debug, Clone)]
1427pub struct SourcedGlobalConfig {
1428    pub enable: SourcedValue<Vec<String>>,
1429    pub disable: SourcedValue<Vec<String>>,
1430    pub exclude: SourcedValue<Vec<String>>,
1431    pub include: SourcedValue<Vec<String>>,
1432    pub respect_gitignore: SourcedValue<bool>,
1433    pub line_length: SourcedValue<u64>,
1434    pub output_format: Option<SourcedValue<String>>,
1435    pub fixable: SourcedValue<Vec<String>>,
1436    pub unfixable: SourcedValue<Vec<String>>,
1437    pub flavor: SourcedValue<MarkdownFlavor>,
1438    pub force_exclude: SourcedValue<bool>,
1439}
1440
1441impl Default for SourcedGlobalConfig {
1442    fn default() -> Self {
1443        SourcedGlobalConfig {
1444            enable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1445            disable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1446            exclude: SourcedValue::new(Vec::new(), ConfigSource::Default),
1447            include: SourcedValue::new(Vec::new(), ConfigSource::Default),
1448            respect_gitignore: SourcedValue::new(true, ConfigSource::Default),
1449            line_length: SourcedValue::new(80, ConfigSource::Default),
1450            output_format: None,
1451            fixable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1452            unfixable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1453            flavor: SourcedValue::new(MarkdownFlavor::default(), ConfigSource::Default),
1454            force_exclude: SourcedValue::new(false, ConfigSource::Default),
1455        }
1456    }
1457}
1458
1459#[derive(Debug, Default, Clone)]
1460pub struct SourcedRuleConfig {
1461    pub values: BTreeMap<String, SourcedValue<toml::Value>>,
1462}
1463
1464/// Represents configuration loaded from a single source file, with provenance.
1465/// Used as an intermediate step before merging into the final SourcedConfig.
1466#[derive(Debug, Clone)]
1467pub struct SourcedConfigFragment {
1468    pub global: SourcedGlobalConfig,
1469    pub per_file_ignores: SourcedValue<HashMap<String, Vec<String>>>,
1470    pub rules: BTreeMap<String, SourcedRuleConfig>,
1471    pub unknown_keys: Vec<(String, String, Option<String>)>, // (section, key, file_path)
1472                                                             // Note: loaded_files is tracked globally in SourcedConfig.
1473}
1474
1475impl Default for SourcedConfigFragment {
1476    fn default() -> Self {
1477        Self {
1478            global: SourcedGlobalConfig::default(),
1479            per_file_ignores: SourcedValue::new(HashMap::new(), ConfigSource::Default),
1480            rules: BTreeMap::new(),
1481            unknown_keys: Vec::new(),
1482        }
1483    }
1484}
1485
1486#[derive(Debug, Clone)]
1487pub struct SourcedConfig {
1488    pub global: SourcedGlobalConfig,
1489    pub per_file_ignores: SourcedValue<HashMap<String, Vec<String>>>,
1490    pub rules: BTreeMap<String, SourcedRuleConfig>,
1491    pub loaded_files: Vec<String>,
1492    pub unknown_keys: Vec<(String, String, Option<String>)>, // (section, key, file_path)
1493}
1494
1495impl Default for SourcedConfig {
1496    fn default() -> Self {
1497        Self {
1498            global: SourcedGlobalConfig::default(),
1499            per_file_ignores: SourcedValue::new(HashMap::new(), ConfigSource::Default),
1500            rules: BTreeMap::new(),
1501            loaded_files: Vec::new(),
1502            unknown_keys: Vec::new(),
1503        }
1504    }
1505}
1506
1507impl SourcedConfig {
1508    /// Merges another SourcedConfigFragment into this SourcedConfig.
1509    /// Uses source precedence to determine which values take effect.
1510    fn merge(&mut self, fragment: SourcedConfigFragment) {
1511        // Merge global config
1512        self.global.enable.merge_override(
1513            fragment.global.enable.value,
1514            fragment.global.enable.source,
1515            fragment.global.enable.overrides.first().and_then(|o| o.file.clone()),
1516            fragment.global.enable.overrides.first().and_then(|o| o.line),
1517        );
1518        self.global.disable.merge_override(
1519            fragment.global.disable.value,
1520            fragment.global.disable.source,
1521            fragment.global.disable.overrides.first().and_then(|o| o.file.clone()),
1522            fragment.global.disable.overrides.first().and_then(|o| o.line),
1523        );
1524        self.global.include.merge_override(
1525            fragment.global.include.value,
1526            fragment.global.include.source,
1527            fragment.global.include.overrides.first().and_then(|o| o.file.clone()),
1528            fragment.global.include.overrides.first().and_then(|o| o.line),
1529        );
1530        self.global.exclude.merge_override(
1531            fragment.global.exclude.value,
1532            fragment.global.exclude.source,
1533            fragment.global.exclude.overrides.first().and_then(|o| o.file.clone()),
1534            fragment.global.exclude.overrides.first().and_then(|o| o.line),
1535        );
1536        self.global.respect_gitignore.merge_override(
1537            fragment.global.respect_gitignore.value,
1538            fragment.global.respect_gitignore.source,
1539            fragment
1540                .global
1541                .respect_gitignore
1542                .overrides
1543                .first()
1544                .and_then(|o| o.file.clone()),
1545            fragment.global.respect_gitignore.overrides.first().and_then(|o| o.line),
1546        );
1547        self.global.line_length.merge_override(
1548            fragment.global.line_length.value,
1549            fragment.global.line_length.source,
1550            fragment
1551                .global
1552                .line_length
1553                .overrides
1554                .first()
1555                .and_then(|o| o.file.clone()),
1556            fragment.global.line_length.overrides.first().and_then(|o| o.line),
1557        );
1558        self.global.fixable.merge_override(
1559            fragment.global.fixable.value,
1560            fragment.global.fixable.source,
1561            fragment.global.fixable.overrides.first().and_then(|o| o.file.clone()),
1562            fragment.global.fixable.overrides.first().and_then(|o| o.line),
1563        );
1564        self.global.unfixable.merge_override(
1565            fragment.global.unfixable.value,
1566            fragment.global.unfixable.source,
1567            fragment.global.unfixable.overrides.first().and_then(|o| o.file.clone()),
1568            fragment.global.unfixable.overrides.first().and_then(|o| o.line),
1569        );
1570
1571        // Merge flavor
1572        self.global.flavor.merge_override(
1573            fragment.global.flavor.value,
1574            fragment.global.flavor.source,
1575            fragment.global.flavor.overrides.first().and_then(|o| o.file.clone()),
1576            fragment.global.flavor.overrides.first().and_then(|o| o.line),
1577        );
1578
1579        // Merge force_exclude
1580        self.global.force_exclude.merge_override(
1581            fragment.global.force_exclude.value,
1582            fragment.global.force_exclude.source,
1583            fragment
1584                .global
1585                .force_exclude
1586                .overrides
1587                .first()
1588                .and_then(|o| o.file.clone()),
1589            fragment.global.force_exclude.overrides.first().and_then(|o| o.line),
1590        );
1591
1592        // Merge output_format if present
1593        if let Some(output_format_fragment) = fragment.global.output_format {
1594            if let Some(ref mut output_format) = self.global.output_format {
1595                output_format.merge_override(
1596                    output_format_fragment.value,
1597                    output_format_fragment.source,
1598                    output_format_fragment.overrides.first().and_then(|o| o.file.clone()),
1599                    output_format_fragment.overrides.first().and_then(|o| o.line),
1600                );
1601            } else {
1602                self.global.output_format = Some(output_format_fragment);
1603            }
1604        }
1605
1606        // Merge per_file_ignores
1607        self.per_file_ignores.merge_override(
1608            fragment.per_file_ignores.value,
1609            fragment.per_file_ignores.source,
1610            fragment.per_file_ignores.overrides.first().and_then(|o| o.file.clone()),
1611            fragment.per_file_ignores.overrides.first().and_then(|o| o.line),
1612        );
1613
1614        // Merge rule configs
1615        for (rule_name, rule_fragment) in fragment.rules {
1616            let norm_rule_name = rule_name.to_ascii_uppercase(); // Normalize to uppercase for case-insensitivity
1617            let rule_entry = self.rules.entry(norm_rule_name).or_default();
1618            for (key, sourced_value_fragment) in rule_fragment.values {
1619                let sv_entry = rule_entry
1620                    .values
1621                    .entry(key.clone())
1622                    .or_insert_with(|| SourcedValue::new(sourced_value_fragment.value.clone(), ConfigSource::Default));
1623                let file_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.file.clone());
1624                let line_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.line);
1625                sv_entry.merge_override(
1626                    sourced_value_fragment.value,  // Use the value from the fragment
1627                    sourced_value_fragment.source, // Use the source from the fragment
1628                    file_from_fragment,            // Pass the file path from the fragment override
1629                    line_from_fragment,            // Pass the line number from the fragment override
1630                );
1631            }
1632        }
1633
1634        // Merge unknown_keys from fragment
1635        for (section, key, file_path) in fragment.unknown_keys {
1636            // Deduplicate: only add if not already present
1637            if !self.unknown_keys.iter().any(|(s, k, _)| s == &section && k == &key) {
1638                self.unknown_keys.push((section, key, file_path));
1639            }
1640        }
1641    }
1642
1643    /// Load and merge configurations from files and CLI overrides.
1644    pub fn load(config_path: Option<&str>, cli_overrides: Option<&SourcedGlobalConfig>) -> Result<Self, ConfigError> {
1645        Self::load_with_discovery(config_path, cli_overrides, false)
1646    }
1647
1648    /// Discover configuration file by traversing up the directory tree.
1649    /// Returns the first configuration file found.
1650    fn discover_config_upward() -> Option<std::path::PathBuf> {
1651        use std::env;
1652
1653        const CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", "pyproject.toml"];
1654        const MAX_DEPTH: usize = 100; // Prevent infinite traversal
1655
1656        let start_dir = match env::current_dir() {
1657            Ok(dir) => dir,
1658            Err(e) => {
1659                log::debug!("[rumdl-config] Failed to get current directory: {e}");
1660                return None;
1661            }
1662        };
1663
1664        let mut current_dir = start_dir.clone();
1665        let mut depth = 0;
1666
1667        loop {
1668            if depth >= MAX_DEPTH {
1669                log::debug!("[rumdl-config] Maximum traversal depth reached");
1670                break;
1671            }
1672
1673            log::debug!("[rumdl-config] Searching for config in: {}", current_dir.display());
1674
1675            // Check for config files in order of precedence
1676            for config_name in CONFIG_FILES {
1677                let config_path = current_dir.join(config_name);
1678
1679                if config_path.exists() {
1680                    // For pyproject.toml, verify it contains [tool.rumdl] section
1681                    if *config_name == "pyproject.toml" {
1682                        if let Ok(content) = std::fs::read_to_string(&config_path) {
1683                            if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
1684                                log::debug!("[rumdl-config] Found config file: {}", config_path.display());
1685                                return Some(config_path);
1686                            }
1687                            log::debug!("[rumdl-config] Found pyproject.toml but no [tool.rumdl] section");
1688                            continue;
1689                        }
1690                    } else {
1691                        log::debug!("[rumdl-config] Found config file: {}", config_path.display());
1692                        return Some(config_path);
1693                    }
1694                }
1695            }
1696
1697            // Check for .git directory (stop boundary)
1698            if current_dir.join(".git").exists() {
1699                log::debug!("[rumdl-config] Stopping at .git directory");
1700                break;
1701            }
1702
1703            // Move to parent directory
1704            match current_dir.parent() {
1705                Some(parent) => {
1706                    current_dir = parent.to_owned();
1707                    depth += 1;
1708                }
1709                None => {
1710                    log::debug!("[rumdl-config] Reached filesystem root");
1711                    break;
1712                }
1713            }
1714        }
1715
1716        None
1717    }
1718
1719    /// Internal implementation that accepts config directory for testing
1720    fn user_configuration_path_impl(config_dir: &Path) -> Option<std::path::PathBuf> {
1721        let config_dir = config_dir.join("rumdl");
1722
1723        // Check for config files in precedence order (same as project discovery)
1724        const USER_CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", "pyproject.toml"];
1725
1726        log::debug!(
1727            "[rumdl-config] Checking for user configuration in: {}",
1728            config_dir.display()
1729        );
1730
1731        for filename in USER_CONFIG_FILES {
1732            let config_path = config_dir.join(filename);
1733
1734            if config_path.exists() {
1735                // For pyproject.toml, verify it contains [tool.rumdl] section
1736                if *filename == "pyproject.toml" {
1737                    if let Ok(content) = std::fs::read_to_string(&config_path) {
1738                        if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
1739                            log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
1740                            return Some(config_path);
1741                        }
1742                        log::debug!("[rumdl-config] Found user pyproject.toml but no [tool.rumdl] section");
1743                        continue;
1744                    }
1745                } else {
1746                    log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
1747                    return Some(config_path);
1748                }
1749            }
1750        }
1751
1752        log::debug!(
1753            "[rumdl-config] No user configuration found in: {}",
1754            config_dir.display()
1755        );
1756        None
1757    }
1758
1759    /// Discover user-level configuration file from platform-specific config directory.
1760    /// Returns the first configuration file found in the user config directory.
1761    fn user_configuration_path() -> Option<std::path::PathBuf> {
1762        use etcetera::{BaseStrategy, choose_base_strategy};
1763
1764        match choose_base_strategy() {
1765            Ok(strategy) => {
1766                let config_dir = strategy.config_dir();
1767                Self::user_configuration_path_impl(&config_dir)
1768            }
1769            Err(e) => {
1770                log::debug!("[rumdl-config] Failed to determine user config directory: {e}");
1771                None
1772            }
1773        }
1774    }
1775
1776    /// Internal implementation that accepts user config directory for testing
1777    #[doc(hidden)]
1778    pub fn load_with_discovery_impl(
1779        config_path: Option<&str>,
1780        cli_overrides: Option<&SourcedGlobalConfig>,
1781        skip_auto_discovery: bool,
1782        user_config_dir: Option<&Path>,
1783    ) -> Result<Self, ConfigError> {
1784        use std::env;
1785        log::debug!("[rumdl-config] Current working directory: {:?}", env::current_dir());
1786        if config_path.is_none() {
1787            if skip_auto_discovery {
1788                log::debug!("[rumdl-config] Skipping auto-discovery due to --no-config flag");
1789            } else {
1790                log::debug!("[rumdl-config] No explicit config_path provided, will search default locations");
1791            }
1792        } else {
1793            log::debug!("[rumdl-config] Explicit config_path provided: {config_path:?}");
1794        }
1795        let mut sourced_config = SourcedConfig::default();
1796
1797        // 1. Load explicit config path if provided
1798        if let Some(path) = config_path {
1799            let path_obj = Path::new(path);
1800            let filename = path_obj.file_name().and_then(|name| name.to_str()).unwrap_or("");
1801            log::debug!("[rumdl-config] Trying to load config file: {filename}");
1802            let path_str = path.to_string();
1803
1804            // Known markdownlint config files
1805            const MARKDOWNLINT_FILENAMES: &[&str] = &[".markdownlint.json", ".markdownlint.yaml", ".markdownlint.yml"];
1806
1807            if filename == "pyproject.toml" || filename == ".rumdl.toml" || filename == "rumdl.toml" {
1808                let content = std::fs::read_to_string(path).map_err(|e| ConfigError::IoError {
1809                    source: e,
1810                    path: path_str.clone(),
1811                })?;
1812                if filename == "pyproject.toml" {
1813                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
1814                        sourced_config.merge(fragment);
1815                        sourced_config.loaded_files.push(path_str.clone());
1816                    }
1817                } else {
1818                    let fragment = parse_rumdl_toml(&content, &path_str)?;
1819                    sourced_config.merge(fragment);
1820                    sourced_config.loaded_files.push(path_str.clone());
1821                }
1822            } else if MARKDOWNLINT_FILENAMES.contains(&filename)
1823                || path_str.ends_with(".json")
1824                || path_str.ends_with(".jsonc")
1825                || path_str.ends_with(".yaml")
1826                || path_str.ends_with(".yml")
1827            {
1828                // Parse as markdownlint config (JSON/YAML)
1829                let fragment = load_from_markdownlint(&path_str)?;
1830                sourced_config.merge(fragment);
1831                sourced_config.loaded_files.push(path_str.clone());
1832                // markdownlint is fallback only
1833            } else {
1834                // Try TOML only
1835                let content = std::fs::read_to_string(path).map_err(|e| ConfigError::IoError {
1836                    source: e,
1837                    path: path_str.clone(),
1838                })?;
1839                let fragment = parse_rumdl_toml(&content, &path_str)?;
1840                sourced_config.merge(fragment);
1841                sourced_config.loaded_files.push(path_str.clone());
1842            }
1843        }
1844
1845        // Only perform auto-discovery if not skipped AND no explicit config path provided
1846        if !skip_auto_discovery && config_path.is_none() {
1847            // Step 1: Load user configuration first (as a base)
1848            let user_config_path = if let Some(dir) = user_config_dir {
1849                Self::user_configuration_path_impl(dir)
1850            } else {
1851                Self::user_configuration_path()
1852            };
1853
1854            if let Some(user_config_path) = user_config_path {
1855                let path_str = user_config_path.display().to_string();
1856                let filename = user_config_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1857
1858                log::debug!("[rumdl-config] Loading user configuration file: {path_str}");
1859
1860                if filename == "pyproject.toml" {
1861                    let content = std::fs::read_to_string(&user_config_path).map_err(|e| ConfigError::IoError {
1862                        source: e,
1863                        path: path_str.clone(),
1864                    })?;
1865                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
1866                        sourced_config.merge(fragment);
1867                        sourced_config.loaded_files.push(path_str);
1868                    }
1869                } else {
1870                    let content = std::fs::read_to_string(&user_config_path).map_err(|e| ConfigError::IoError {
1871                        source: e,
1872                        path: path_str.clone(),
1873                    })?;
1874                    let fragment = parse_rumdl_toml(&content, &path_str)?;
1875                    sourced_config.merge(fragment);
1876                    sourced_config.loaded_files.push(path_str);
1877                }
1878            } else {
1879                log::debug!("[rumdl-config] No user configuration file found");
1880            }
1881
1882            // Step 2: Look for project configuration files (override user config)
1883            if let Some(config_file) = Self::discover_config_upward() {
1884                let path_str = config_file.display().to_string();
1885                let filename = config_file.file_name().and_then(|n| n.to_str()).unwrap_or("");
1886
1887                log::debug!("[rumdl-config] Loading discovered config file: {path_str}");
1888
1889                if filename == "pyproject.toml" {
1890                    let content = std::fs::read_to_string(&config_file).map_err(|e| ConfigError::IoError {
1891                        source: e,
1892                        path: path_str.clone(),
1893                    })?;
1894                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
1895                        sourced_config.merge(fragment);
1896                        sourced_config.loaded_files.push(path_str);
1897                    }
1898                } else if filename == ".rumdl.toml" || filename == "rumdl.toml" {
1899                    let content = std::fs::read_to_string(&config_file).map_err(|e| ConfigError::IoError {
1900                        source: e,
1901                        path: path_str.clone(),
1902                    })?;
1903                    let fragment = parse_rumdl_toml(&content, &path_str)?;
1904                    sourced_config.merge(fragment);
1905                    sourced_config.loaded_files.push(path_str);
1906                }
1907            } else {
1908                log::debug!("[rumdl-config] No configuration file found via upward traversal");
1909
1910                // Step 3: If no project config found, fallback to markdownlint config in current directory
1911                let mut found_markdownlint = false;
1912                for filename in MARKDOWNLINT_CONFIG_FILES {
1913                    if std::path::Path::new(filename).exists() {
1914                        match load_from_markdownlint(filename) {
1915                            Ok(fragment) => {
1916                                sourced_config.merge(fragment);
1917                                sourced_config.loaded_files.push(filename.to_string());
1918                                found_markdownlint = true;
1919                                break; // Load only the first one found
1920                            }
1921                            Err(_e) => {
1922                                // Log error but continue (it's just a fallback)
1923                            }
1924                        }
1925                    }
1926                }
1927
1928                if !found_markdownlint {
1929                    log::debug!("[rumdl-config] No markdownlint configuration file found");
1930                }
1931            }
1932        }
1933
1934        // 5. Apply CLI overrides (highest precedence)
1935        if let Some(cli) = cli_overrides {
1936            sourced_config
1937                .global
1938                .enable
1939                .merge_override(cli.enable.value.clone(), ConfigSource::Cli, None, None);
1940            sourced_config
1941                .global
1942                .disable
1943                .merge_override(cli.disable.value.clone(), ConfigSource::Cli, None, None);
1944            sourced_config
1945                .global
1946                .exclude
1947                .merge_override(cli.exclude.value.clone(), ConfigSource::Cli, None, None);
1948            sourced_config
1949                .global
1950                .include
1951                .merge_override(cli.include.value.clone(), ConfigSource::Cli, None, None);
1952            sourced_config.global.respect_gitignore.merge_override(
1953                cli.respect_gitignore.value,
1954                ConfigSource::Cli,
1955                None,
1956                None,
1957            );
1958            sourced_config
1959                .global
1960                .fixable
1961                .merge_override(cli.fixable.value.clone(), ConfigSource::Cli, None, None);
1962            sourced_config
1963                .global
1964                .unfixable
1965                .merge_override(cli.unfixable.value.clone(), ConfigSource::Cli, None, None);
1966            // No rule-specific CLI overrides implemented yet
1967        }
1968
1969        // Unknown keys are now collected during parsing and validated via validate_config_sourced()
1970
1971        Ok(sourced_config)
1972    }
1973
1974    /// Load and merge configurations from files and CLI overrides.
1975    /// If skip_auto_discovery is true, only explicit config paths are loaded.
1976    pub fn load_with_discovery(
1977        config_path: Option<&str>,
1978        cli_overrides: Option<&SourcedGlobalConfig>,
1979        skip_auto_discovery: bool,
1980    ) -> Result<Self, ConfigError> {
1981        Self::load_with_discovery_impl(config_path, cli_overrides, skip_auto_discovery, None)
1982    }
1983}
1984
1985impl From<SourcedConfig> for Config {
1986    fn from(sourced: SourcedConfig) -> Self {
1987        let mut rules = BTreeMap::new();
1988        for (rule_name, sourced_rule_cfg) in sourced.rules {
1989            // Normalize rule name to uppercase for case-insensitive lookup
1990            let normalized_rule_name = rule_name.to_ascii_uppercase();
1991            let mut values = BTreeMap::new();
1992            for (key, sourced_val) in sourced_rule_cfg.values {
1993                values.insert(key, sourced_val.value);
1994            }
1995            rules.insert(normalized_rule_name, RuleConfig { values });
1996        }
1997        #[allow(deprecated)]
1998        let global = GlobalConfig {
1999            enable: sourced.global.enable.value,
2000            disable: sourced.global.disable.value,
2001            exclude: sourced.global.exclude.value,
2002            include: sourced.global.include.value,
2003            respect_gitignore: sourced.global.respect_gitignore.value,
2004            line_length: sourced.global.line_length.value,
2005            output_format: sourced.global.output_format.as_ref().map(|v| v.value.clone()),
2006            fixable: sourced.global.fixable.value,
2007            unfixable: sourced.global.unfixable.value,
2008            flavor: sourced.global.flavor.value,
2009            force_exclude: sourced.global.force_exclude.value,
2010        };
2011        Config {
2012            global,
2013            per_file_ignores: sourced.per_file_ignores.value,
2014            rules,
2015        }
2016    }
2017}
2018
2019/// Registry of all known rules and their config schemas
2020pub struct RuleRegistry {
2021    /// Map of rule name (e.g. "MD013") to set of valid config keys and their TOML value types
2022    pub rule_schemas: std::collections::BTreeMap<String, toml::map::Map<String, toml::Value>>,
2023    /// Map of rule name to config key aliases
2024    pub rule_aliases: std::collections::BTreeMap<String, std::collections::HashMap<String, String>>,
2025}
2026
2027impl RuleRegistry {
2028    /// Build a registry from a list of rules
2029    pub fn from_rules(rules: &[Box<dyn Rule>]) -> Self {
2030        let mut rule_schemas = std::collections::BTreeMap::new();
2031        let mut rule_aliases = std::collections::BTreeMap::new();
2032
2033        for rule in rules {
2034            let norm_name = if let Some((name, toml::Value::Table(table))) = rule.default_config_section() {
2035                let norm_name = normalize_key(&name); // Normalize the name from default_config_section
2036                rule_schemas.insert(norm_name.clone(), table);
2037                norm_name
2038            } else {
2039                let norm_name = normalize_key(rule.name()); // Normalize the name from rule.name()
2040                rule_schemas.insert(norm_name.clone(), toml::map::Map::new());
2041                norm_name
2042            };
2043
2044            // Store aliases if the rule provides them
2045            if let Some(aliases) = rule.config_aliases() {
2046                rule_aliases.insert(norm_name, aliases);
2047            }
2048        }
2049
2050        RuleRegistry {
2051            rule_schemas,
2052            rule_aliases,
2053        }
2054    }
2055
2056    /// Get all known rule names
2057    pub fn rule_names(&self) -> std::collections::BTreeSet<String> {
2058        self.rule_schemas.keys().cloned().collect()
2059    }
2060
2061    /// Get the valid configuration keys for a rule, including both original and normalized variants
2062    pub fn config_keys_for(&self, rule: &str) -> Option<std::collections::BTreeSet<String>> {
2063        self.rule_schemas.get(rule).map(|schema| {
2064            let mut all_keys = std::collections::BTreeSet::new();
2065
2066            // Add original keys from schema
2067            for key in schema.keys() {
2068                all_keys.insert(key.clone());
2069            }
2070
2071            // Add normalized variants for markdownlint compatibility
2072            for key in schema.keys() {
2073                // Add kebab-case variant
2074                all_keys.insert(key.replace('_', "-"));
2075                // Add snake_case variant
2076                all_keys.insert(key.replace('-', "_"));
2077                // Add normalized variant
2078                all_keys.insert(normalize_key(key));
2079            }
2080
2081            // Add any aliases defined by the rule
2082            if let Some(aliases) = self.rule_aliases.get(rule) {
2083                for alias_key in aliases.keys() {
2084                    all_keys.insert(alias_key.clone());
2085                    // Also add normalized variants of the alias
2086                    all_keys.insert(alias_key.replace('_', "-"));
2087                    all_keys.insert(alias_key.replace('-', "_"));
2088                    all_keys.insert(normalize_key(alias_key));
2089                }
2090            }
2091
2092            all_keys
2093        })
2094    }
2095
2096    /// Get the expected value type for a rule's configuration key, trying variants
2097    pub fn expected_value_for(&self, rule: &str, key: &str) -> Option<&toml::Value> {
2098        if let Some(schema) = self.rule_schemas.get(rule) {
2099            // Check if this key is an alias
2100            if let Some(aliases) = self.rule_aliases.get(rule)
2101                && let Some(canonical_key) = aliases.get(key)
2102            {
2103                // Use the canonical key for schema lookup
2104                if let Some(value) = schema.get(canonical_key) {
2105                    return Some(value);
2106                }
2107            }
2108
2109            // Try the original key
2110            if let Some(value) = schema.get(key) {
2111                return Some(value);
2112            }
2113
2114            // Try key variants
2115            let key_variants = [
2116                key.replace('-', "_"), // Convert kebab-case to snake_case
2117                key.replace('_', "-"), // Convert snake_case to kebab-case
2118                normalize_key(key),    // Normalized key (lowercase, kebab-case)
2119            ];
2120
2121            for variant in &key_variants {
2122                if let Some(value) = schema.get(variant) {
2123                    return Some(value);
2124                }
2125            }
2126        }
2127        None
2128    }
2129}
2130
2131/// Represents a config validation warning or error
2132#[derive(Debug, Clone)]
2133pub struct ConfigValidationWarning {
2134    pub message: String,
2135    pub rule: Option<String>,
2136    pub key: Option<String>,
2137}
2138
2139/// Validate a loaded config against the rule registry, using SourcedConfig for unknown key tracking
2140pub fn validate_config_sourced(sourced: &SourcedConfig, registry: &RuleRegistry) -> Vec<ConfigValidationWarning> {
2141    let mut warnings = Vec::new();
2142    let known_rules = registry.rule_names();
2143    // 1. Unknown rules
2144    for rule in sourced.rules.keys() {
2145        if !known_rules.contains(rule) {
2146            warnings.push(ConfigValidationWarning {
2147                message: format!("Unknown rule in config: {rule}"),
2148                rule: Some(rule.clone()),
2149                key: None,
2150            });
2151        }
2152    }
2153    // 2. Unknown options and type mismatches
2154    for (rule, rule_cfg) in &sourced.rules {
2155        if let Some(valid_keys) = registry.config_keys_for(rule) {
2156            for key in rule_cfg.values.keys() {
2157                if !valid_keys.contains(key) {
2158                    let valid_keys_vec: Vec<String> = valid_keys.iter().cloned().collect();
2159                    let message = if let Some(suggestion) = suggest_similar_key(key, &valid_keys_vec) {
2160                        format!("Unknown option for rule {rule}: {key} (did you mean: {suggestion}?)")
2161                    } else {
2162                        format!("Unknown option for rule {rule}: {key}")
2163                    };
2164                    warnings.push(ConfigValidationWarning {
2165                        message,
2166                        rule: Some(rule.clone()),
2167                        key: Some(key.clone()),
2168                    });
2169                } else {
2170                    // Type check: compare type of value to type of default
2171                    if let Some(expected) = registry.expected_value_for(rule, key) {
2172                        let actual = &rule_cfg.values[key].value;
2173                        if !toml_value_type_matches(expected, actual) {
2174                            warnings.push(ConfigValidationWarning {
2175                                message: format!(
2176                                    "Type mismatch for {}.{}: expected {}, got {}",
2177                                    rule,
2178                                    key,
2179                                    toml_type_name(expected),
2180                                    toml_type_name(actual)
2181                                ),
2182                                rule: Some(rule.clone()),
2183                                key: Some(key.clone()),
2184                            });
2185                        }
2186                    }
2187                }
2188            }
2189        }
2190    }
2191    // 3. Unknown global options (from unknown_keys)
2192    let known_global_keys = vec![
2193        "enable".to_string(),
2194        "disable".to_string(),
2195        "include".to_string(),
2196        "exclude".to_string(),
2197        "respect-gitignore".to_string(),
2198        "line-length".to_string(),
2199        "fixable".to_string(),
2200        "unfixable".to_string(),
2201        "flavor".to_string(),
2202        "force-exclude".to_string(),
2203        "output-format".to_string(),
2204    ];
2205
2206    for (section, key, file_path) in &sourced.unknown_keys {
2207        if section.contains("[global]") || section.contains("[tool.rumdl]") {
2208            let message = if let Some(suggestion) = suggest_similar_key(key, &known_global_keys) {
2209                if let Some(path) = file_path {
2210                    format!("Unknown global option in {path}: {key} (did you mean: {suggestion}?)")
2211                } else {
2212                    format!("Unknown global option: {key} (did you mean: {suggestion}?)")
2213                }
2214            } else if let Some(path) = file_path {
2215                format!("Unknown global option in {path}: {key}")
2216            } else {
2217                format!("Unknown global option: {key}")
2218            };
2219            warnings.push(ConfigValidationWarning {
2220                message,
2221                rule: None,
2222                key: Some(key.clone()),
2223            });
2224        } else if !key.is_empty() {
2225            // This is an unknown rule section (key is empty means it's a section header)
2226            // No suggestions for rule names - just warn
2227            continue;
2228        } else {
2229            // Unknown rule section
2230            let message = if let Some(path) = file_path {
2231                format!(
2232                    "Unknown rule in {path}: {}",
2233                    section.trim_matches(|c| c == '[' || c == ']')
2234                )
2235            } else {
2236                format!(
2237                    "Unknown rule in config: {}",
2238                    section.trim_matches(|c| c == '[' || c == ']')
2239                )
2240            };
2241            warnings.push(ConfigValidationWarning {
2242                message,
2243                rule: None,
2244                key: None,
2245            });
2246        }
2247    }
2248    warnings
2249}
2250
2251fn toml_type_name(val: &toml::Value) -> &'static str {
2252    match val {
2253        toml::Value::String(_) => "string",
2254        toml::Value::Integer(_) => "integer",
2255        toml::Value::Float(_) => "float",
2256        toml::Value::Boolean(_) => "boolean",
2257        toml::Value::Array(_) => "array",
2258        toml::Value::Table(_) => "table",
2259        toml::Value::Datetime(_) => "datetime",
2260    }
2261}
2262
2263/// Calculate Levenshtein distance between two strings (simple implementation)
2264fn levenshtein_distance(s1: &str, s2: &str) -> usize {
2265    let len1 = s1.len();
2266    let len2 = s2.len();
2267
2268    if len1 == 0 {
2269        return len2;
2270    }
2271    if len2 == 0 {
2272        return len1;
2273    }
2274
2275    let s1_chars: Vec<char> = s1.chars().collect();
2276    let s2_chars: Vec<char> = s2.chars().collect();
2277
2278    let mut prev_row: Vec<usize> = (0..=len2).collect();
2279    let mut curr_row = vec![0; len2 + 1];
2280
2281    for i in 1..=len1 {
2282        curr_row[0] = i;
2283        for j in 1..=len2 {
2284            let cost = if s1_chars[i - 1] == s2_chars[j - 1] { 0 } else { 1 };
2285            curr_row[j] = (prev_row[j] + 1)          // deletion
2286                .min(curr_row[j - 1] + 1)            // insertion
2287                .min(prev_row[j - 1] + cost); // substitution
2288        }
2289        std::mem::swap(&mut prev_row, &mut curr_row);
2290    }
2291
2292    prev_row[len2]
2293}
2294
2295/// Suggest a similar key from a list of valid keys using fuzzy matching
2296fn suggest_similar_key(unknown: &str, valid_keys: &[String]) -> Option<String> {
2297    let unknown_lower = unknown.to_lowercase();
2298    let max_distance = 2.max(unknown.len() / 3); // Allow up to 2 edits or 30% of string length
2299
2300    let mut best_match: Option<(String, usize)> = None;
2301
2302    for valid in valid_keys {
2303        let valid_lower = valid.to_lowercase();
2304        let distance = levenshtein_distance(&unknown_lower, &valid_lower);
2305
2306        if distance <= max_distance {
2307            if let Some((_, best_dist)) = &best_match {
2308                if distance < *best_dist {
2309                    best_match = Some((valid.clone(), distance));
2310                }
2311            } else {
2312                best_match = Some((valid.clone(), distance));
2313            }
2314        }
2315    }
2316
2317    best_match.map(|(key, _)| key)
2318}
2319
2320fn toml_value_type_matches(expected: &toml::Value, actual: &toml::Value) -> bool {
2321    use toml::Value::*;
2322    match (expected, actual) {
2323        (String(_), String(_)) => true,
2324        (Integer(_), Integer(_)) => true,
2325        (Float(_), Float(_)) => true,
2326        (Boolean(_), Boolean(_)) => true,
2327        (Array(_), Array(_)) => true,
2328        (Table(_), Table(_)) => true,
2329        (Datetime(_), Datetime(_)) => true,
2330        // Allow integer for float
2331        (Float(_), Integer(_)) => true,
2332        _ => false,
2333    }
2334}
2335
2336/// Parses pyproject.toml content and extracts the [tool.rumdl] section if present.
2337fn parse_pyproject_toml(content: &str, path: &str) -> Result<Option<SourcedConfigFragment>, ConfigError> {
2338    let doc: toml::Value =
2339        toml::from_str(content).map_err(|e| ConfigError::ParseError(format!("{path}: Failed to parse TOML: {e}")))?;
2340    let mut fragment = SourcedConfigFragment::default();
2341    let source = ConfigSource::PyprojectToml;
2342    let file = Some(path.to_string());
2343
2344    // 1. Handle [tool.rumdl] and [tool.rumdl.global] sections
2345    if let Some(rumdl_config) = doc.get("tool").and_then(|t| t.get("rumdl"))
2346        && let Some(rumdl_table) = rumdl_config.as_table()
2347    {
2348        // Helper function to extract global config from a table
2349        let extract_global_config = |fragment: &mut SourcedConfigFragment, table: &toml::value::Table| {
2350            // Extract global options from the given table
2351            if let Some(enable) = table.get("enable")
2352                && let Ok(values) = Vec::<String>::deserialize(enable.clone())
2353            {
2354                // Normalize rule names in the list
2355                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2356                fragment
2357                    .global
2358                    .enable
2359                    .push_override(normalized_values, source, file.clone(), None);
2360            }
2361
2362            if let Some(disable) = table.get("disable")
2363                && let Ok(values) = Vec::<String>::deserialize(disable.clone())
2364            {
2365                // Re-enable normalization
2366                let normalized_values: Vec<String> = values.into_iter().map(|s| normalize_key(&s)).collect();
2367                fragment
2368                    .global
2369                    .disable
2370                    .push_override(normalized_values, source, file.clone(), None);
2371            }
2372
2373            if let Some(include) = table.get("include")
2374                && let Ok(values) = Vec::<String>::deserialize(include.clone())
2375            {
2376                fragment
2377                    .global
2378                    .include
2379                    .push_override(values, source, file.clone(), None);
2380            }
2381
2382            if let Some(exclude) = table.get("exclude")
2383                && let Ok(values) = Vec::<String>::deserialize(exclude.clone())
2384            {
2385                fragment
2386                    .global
2387                    .exclude
2388                    .push_override(values, source, file.clone(), None);
2389            }
2390
2391            if let Some(respect_gitignore) = table
2392                .get("respect-gitignore")
2393                .or_else(|| table.get("respect_gitignore"))
2394                && let Ok(value) = bool::deserialize(respect_gitignore.clone())
2395            {
2396                fragment
2397                    .global
2398                    .respect_gitignore
2399                    .push_override(value, source, file.clone(), None);
2400            }
2401
2402            if let Some(force_exclude) = table.get("force-exclude").or_else(|| table.get("force_exclude"))
2403                && let Ok(value) = bool::deserialize(force_exclude.clone())
2404            {
2405                fragment
2406                    .global
2407                    .force_exclude
2408                    .push_override(value, source, file.clone(), None);
2409            }
2410
2411            if let Some(output_format) = table.get("output-format").or_else(|| table.get("output_format"))
2412                && let Ok(value) = String::deserialize(output_format.clone())
2413            {
2414                if fragment.global.output_format.is_none() {
2415                    fragment.global.output_format = Some(SourcedValue::new(value.clone(), source));
2416                } else {
2417                    fragment
2418                        .global
2419                        .output_format
2420                        .as_mut()
2421                        .unwrap()
2422                        .push_override(value, source, file.clone(), None);
2423                }
2424            }
2425
2426            if let Some(fixable) = table.get("fixable")
2427                && let Ok(values) = Vec::<String>::deserialize(fixable.clone())
2428            {
2429                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2430                fragment
2431                    .global
2432                    .fixable
2433                    .push_override(normalized_values, source, file.clone(), None);
2434            }
2435
2436            if let Some(unfixable) = table.get("unfixable")
2437                && let Ok(values) = Vec::<String>::deserialize(unfixable.clone())
2438            {
2439                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2440                fragment
2441                    .global
2442                    .unfixable
2443                    .push_override(normalized_values, source, file.clone(), None);
2444            }
2445
2446            if let Some(flavor) = table.get("flavor")
2447                && let Ok(value) = MarkdownFlavor::deserialize(flavor.clone())
2448            {
2449                fragment.global.flavor.push_override(value, source, file.clone(), None);
2450            }
2451
2452            // Handle line-length special case - this should set the global line_length
2453            if let Some(line_length) = table.get("line-length").or_else(|| table.get("line_length"))
2454                && let Ok(value) = u64::deserialize(line_length.clone())
2455            {
2456                fragment
2457                    .global
2458                    .line_length
2459                    .push_override(value, source, file.clone(), None);
2460
2461                // Also add to MD013 rule config for backward compatibility
2462                let norm_md013_key = normalize_key("MD013");
2463                let rule_entry = fragment.rules.entry(norm_md013_key).or_default();
2464                let norm_line_length_key = normalize_key("line-length");
2465                let sv = rule_entry
2466                    .values
2467                    .entry(norm_line_length_key)
2468                    .or_insert_with(|| SourcedValue::new(line_length.clone(), ConfigSource::Default));
2469                sv.push_override(line_length.clone(), source, file.clone(), None);
2470            }
2471        };
2472
2473        // First, check for [tool.rumdl.global] section
2474        if let Some(global_table) = rumdl_table.get("global").and_then(|g| g.as_table()) {
2475            extract_global_config(&mut fragment, global_table);
2476        }
2477
2478        // Also extract global options from [tool.rumdl] directly (for flat structure)
2479        extract_global_config(&mut fragment, rumdl_table);
2480
2481        // --- Extract per-file-ignores configurations ---
2482        // Check both hyphenated and underscored versions for compatibility
2483        let per_file_ignores_key = rumdl_table
2484            .get("per-file-ignores")
2485            .or_else(|| rumdl_table.get("per_file_ignores"));
2486
2487        if let Some(per_file_ignores_value) = per_file_ignores_key
2488            && let Some(per_file_table) = per_file_ignores_value.as_table()
2489        {
2490            let mut per_file_map = HashMap::new();
2491            for (pattern, rules_value) in per_file_table {
2492                if let Ok(rules) = Vec::<String>::deserialize(rules_value.clone()) {
2493                    let normalized_rules = rules.into_iter().map(|s| normalize_key(&s)).collect();
2494                    per_file_map.insert(pattern.clone(), normalized_rules);
2495                } else {
2496                    log::warn!(
2497                        "[WARN] Expected array for per-file-ignores pattern '{pattern}' in {path}, found {rules_value:?}"
2498                    );
2499                }
2500            }
2501            fragment
2502                .per_file_ignores
2503                .push_override(per_file_map, source, file.clone(), None);
2504        }
2505
2506        // --- Extract rule-specific configurations ---
2507        for (key, value) in rumdl_table {
2508            let norm_rule_key = normalize_key(key);
2509
2510            // Skip keys already handled as global or special cases
2511            if [
2512                "enable",
2513                "disable",
2514                "include",
2515                "exclude",
2516                "respect_gitignore",
2517                "respect-gitignore", // Added kebab-case here too
2518                "force_exclude",
2519                "force-exclude",
2520                "line_length",
2521                "line-length",
2522                "output_format",
2523                "output-format",
2524                "fixable",
2525                "unfixable",
2526                "per-file-ignores",
2527                "per_file_ignores",
2528                "global",
2529            ]
2530            .contains(&norm_rule_key.as_str())
2531            {
2532                continue;
2533            }
2534
2535            // Explicitly check if the key looks like a rule name (e.g., starts with 'md')
2536            // AND if the value is actually a TOML table before processing as rule config.
2537            // This prevents misinterpreting other top-level keys under [tool.rumdl]
2538            let norm_rule_key_upper = norm_rule_key.to_ascii_uppercase();
2539            if norm_rule_key_upper.len() == 5
2540                && norm_rule_key_upper.starts_with("MD")
2541                && norm_rule_key_upper[2..].chars().all(|c| c.is_ascii_digit())
2542                && value.is_table()
2543            {
2544                if let Some(rule_config_table) = value.as_table() {
2545                    // Get the entry for this rule (e.g., "md013")
2546                    let rule_entry = fragment.rules.entry(norm_rule_key_upper).or_default();
2547                    for (rk, rv) in rule_config_table {
2548                        let norm_rk = normalize_key(rk); // Normalize the config key itself
2549
2550                        let toml_val = rv.clone();
2551
2552                        let sv = rule_entry
2553                            .values
2554                            .entry(norm_rk.clone())
2555                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), ConfigSource::Default));
2556                        sv.push_override(toml_val, source, file.clone(), None);
2557                    }
2558                }
2559            } else {
2560                // Key is not a global/special key, doesn't start with 'md', or isn't a table.
2561                // Track unknown keys under [tool.rumdl] for validation
2562                fragment
2563                    .unknown_keys
2564                    .push(("[tool.rumdl]".to_string(), key.to_string(), Some(path.to_string())));
2565            }
2566        }
2567    }
2568
2569    // 2. Handle [tool.rumdl.MDxxx] sections as rule-specific config (nested under [tool])
2570    if let Some(tool_table) = doc.get("tool").and_then(|t| t.as_table()) {
2571        for (key, value) in tool_table.iter() {
2572            if let Some(rule_name) = key.strip_prefix("rumdl.") {
2573                let norm_rule_name = normalize_key(rule_name);
2574                if norm_rule_name.len() == 5
2575                    && norm_rule_name.to_ascii_uppercase().starts_with("MD")
2576                    && norm_rule_name[2..].chars().all(|c| c.is_ascii_digit())
2577                    && let Some(rule_table) = value.as_table()
2578                {
2579                    let rule_entry = fragment.rules.entry(norm_rule_name.to_ascii_uppercase()).or_default();
2580                    for (rk, rv) in rule_table {
2581                        let norm_rk = normalize_key(rk);
2582                        let toml_val = rv.clone();
2583                        let sv = rule_entry
2584                            .values
2585                            .entry(norm_rk.clone())
2586                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), source));
2587                        sv.push_override(toml_val, source, file.clone(), None);
2588                    }
2589                } else if rule_name.to_ascii_uppercase().starts_with("MD") {
2590                    // Track unknown rule sections like [tool.rumdl.MD999]
2591                    fragment.unknown_keys.push((
2592                        format!("[tool.rumdl.{rule_name}]"),
2593                        String::new(),
2594                        Some(path.to_string()),
2595                    ));
2596                }
2597            }
2598        }
2599    }
2600
2601    // 3. Handle [tool.rumdl.MDxxx] sections as top-level keys (e.g., [tool.rumdl.MD007])
2602    if let Some(doc_table) = doc.as_table() {
2603        for (key, value) in doc_table.iter() {
2604            if let Some(rule_name) = key.strip_prefix("tool.rumdl.") {
2605                let norm_rule_name = normalize_key(rule_name);
2606                if norm_rule_name.len() == 5
2607                    && norm_rule_name.to_ascii_uppercase().starts_with("MD")
2608                    && norm_rule_name[2..].chars().all(|c| c.is_ascii_digit())
2609                    && let Some(rule_table) = value.as_table()
2610                {
2611                    let rule_entry = fragment.rules.entry(norm_rule_name.to_ascii_uppercase()).or_default();
2612                    for (rk, rv) in rule_table {
2613                        let norm_rk = normalize_key(rk);
2614                        let toml_val = rv.clone();
2615                        let sv = rule_entry
2616                            .values
2617                            .entry(norm_rk.clone())
2618                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), source));
2619                        sv.push_override(toml_val, source, file.clone(), None);
2620                    }
2621                } else if rule_name.to_ascii_uppercase().starts_with("MD") {
2622                    // Track unknown rule sections like [tool.rumdl.MD999]
2623                    fragment.unknown_keys.push((
2624                        format!("[tool.rumdl.{rule_name}]"),
2625                        String::new(),
2626                        Some(path.to_string()),
2627                    ));
2628                }
2629            }
2630        }
2631    }
2632
2633    // Only return Some(fragment) if any config was found
2634    let has_any = !fragment.global.enable.value.is_empty()
2635        || !fragment.global.disable.value.is_empty()
2636        || !fragment.global.include.value.is_empty()
2637        || !fragment.global.exclude.value.is_empty()
2638        || !fragment.global.fixable.value.is_empty()
2639        || !fragment.global.unfixable.value.is_empty()
2640        || fragment.global.output_format.is_some()
2641        || !fragment.per_file_ignores.value.is_empty()
2642        || !fragment.rules.is_empty();
2643    if has_any { Ok(Some(fragment)) } else { Ok(None) }
2644}
2645
2646/// Parses rumdl.toml / .rumdl.toml content.
2647fn parse_rumdl_toml(content: &str, path: &str) -> Result<SourcedConfigFragment, ConfigError> {
2648    let doc = content
2649        .parse::<DocumentMut>()
2650        .map_err(|e| ConfigError::ParseError(format!("{path}: Failed to parse TOML: {e}")))?;
2651    let mut fragment = SourcedConfigFragment::default();
2652    let source = ConfigSource::RumdlToml;
2653    let file = Some(path.to_string());
2654
2655    // Define known rules before the loop
2656    let all_rules = rules::all_rules(&Config::default());
2657    let registry = RuleRegistry::from_rules(&all_rules);
2658    let known_rule_names: BTreeSet<String> = registry
2659        .rule_names()
2660        .into_iter()
2661        .map(|s| s.to_ascii_uppercase())
2662        .collect();
2663
2664    // Handle [global] section
2665    if let Some(global_item) = doc.get("global")
2666        && let Some(global_table) = global_item.as_table()
2667    {
2668        for (key, value_item) in global_table.iter() {
2669            let norm_key = normalize_key(key);
2670            match norm_key.as_str() {
2671                "enable" | "disable" | "include" | "exclude" => {
2672                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
2673                        // Corrected: Iterate directly over the Formatted<Array>
2674                        let values: Vec<String> = formatted_array
2675                                .iter()
2676                                .filter_map(|item| item.as_str()) // Extract strings
2677                                .map(|s| s.to_string())
2678                                .collect();
2679
2680                        // Normalize rule names for enable/disable
2681                        let final_values = if norm_key == "enable" || norm_key == "disable" {
2682                            // Corrected: Pass &str to normalize_key
2683                            values.into_iter().map(|s| normalize_key(&s)).collect()
2684                        } else {
2685                            values
2686                        };
2687
2688                        match norm_key.as_str() {
2689                            "enable" => fragment
2690                                .global
2691                                .enable
2692                                .push_override(final_values, source, file.clone(), None),
2693                            "disable" => {
2694                                fragment
2695                                    .global
2696                                    .disable
2697                                    .push_override(final_values, source, file.clone(), None)
2698                            }
2699                            "include" => {
2700                                fragment
2701                                    .global
2702                                    .include
2703                                    .push_override(final_values, source, file.clone(), None)
2704                            }
2705                            "exclude" => {
2706                                fragment
2707                                    .global
2708                                    .exclude
2709                                    .push_override(final_values, source, file.clone(), None)
2710                            }
2711                            _ => unreachable!(), // Should not happen due to outer match
2712                        }
2713                    } else {
2714                        log::warn!(
2715                            "[WARN] Expected array for global key '{}' in {}, found {}",
2716                            key,
2717                            path,
2718                            value_item.type_name()
2719                        );
2720                    }
2721                }
2722                "respect_gitignore" | "respect-gitignore" => {
2723                    // Handle both cases
2724                    if let Some(toml_edit::Value::Boolean(formatted_bool)) = value_item.as_value() {
2725                        let val = *formatted_bool.value();
2726                        fragment
2727                            .global
2728                            .respect_gitignore
2729                            .push_override(val, source, file.clone(), None);
2730                    } else {
2731                        log::warn!(
2732                            "[WARN] Expected boolean for global key '{}' in {}, found {}",
2733                            key,
2734                            path,
2735                            value_item.type_name()
2736                        );
2737                    }
2738                }
2739                "force_exclude" | "force-exclude" => {
2740                    // Handle both cases
2741                    if let Some(toml_edit::Value::Boolean(formatted_bool)) = value_item.as_value() {
2742                        let val = *formatted_bool.value();
2743                        fragment
2744                            .global
2745                            .force_exclude
2746                            .push_override(val, source, file.clone(), None);
2747                    } else {
2748                        log::warn!(
2749                            "[WARN] Expected boolean for global key '{}' in {}, found {}",
2750                            key,
2751                            path,
2752                            value_item.type_name()
2753                        );
2754                    }
2755                }
2756                "line_length" | "line-length" => {
2757                    // Handle both cases
2758                    if let Some(toml_edit::Value::Integer(formatted_int)) = value_item.as_value() {
2759                        let val = *formatted_int.value() as u64;
2760                        fragment
2761                            .global
2762                            .line_length
2763                            .push_override(val, source, file.clone(), None);
2764                    } else {
2765                        log::warn!(
2766                            "[WARN] Expected integer for global key '{}' in {}, found {}",
2767                            key,
2768                            path,
2769                            value_item.type_name()
2770                        );
2771                    }
2772                }
2773                "output_format" | "output-format" => {
2774                    // Handle both cases
2775                    if let Some(toml_edit::Value::String(formatted_string)) = value_item.as_value() {
2776                        let val = formatted_string.value().clone();
2777                        if fragment.global.output_format.is_none() {
2778                            fragment.global.output_format = Some(SourcedValue::new(val.clone(), source));
2779                        } else {
2780                            fragment.global.output_format.as_mut().unwrap().push_override(
2781                                val,
2782                                source,
2783                                file.clone(),
2784                                None,
2785                            );
2786                        }
2787                    } else {
2788                        log::warn!(
2789                            "[WARN] Expected string for global key '{}' in {}, found {}",
2790                            key,
2791                            path,
2792                            value_item.type_name()
2793                        );
2794                    }
2795                }
2796                "fixable" => {
2797                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
2798                        let values: Vec<String> = formatted_array
2799                            .iter()
2800                            .filter_map(|item| item.as_str())
2801                            .map(normalize_key)
2802                            .collect();
2803                        fragment
2804                            .global
2805                            .fixable
2806                            .push_override(values, source, file.clone(), None);
2807                    } else {
2808                        log::warn!(
2809                            "[WARN] Expected array for global key '{}' in {}, found {}",
2810                            key,
2811                            path,
2812                            value_item.type_name()
2813                        );
2814                    }
2815                }
2816                "unfixable" => {
2817                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
2818                        let values: Vec<String> = formatted_array
2819                            .iter()
2820                            .filter_map(|item| item.as_str())
2821                            .map(normalize_key)
2822                            .collect();
2823                        fragment
2824                            .global
2825                            .unfixable
2826                            .push_override(values, source, file.clone(), None);
2827                    } else {
2828                        log::warn!(
2829                            "[WARN] Expected array for global key '{}' in {}, found {}",
2830                            key,
2831                            path,
2832                            value_item.type_name()
2833                        );
2834                    }
2835                }
2836                "flavor" => {
2837                    if let Some(toml_edit::Value::String(formatted_string)) = value_item.as_value() {
2838                        let val = formatted_string.value();
2839                        if let Ok(flavor) = MarkdownFlavor::from_str(val) {
2840                            fragment.global.flavor.push_override(flavor, source, file.clone(), None);
2841                        } else {
2842                            log::warn!("[WARN] Unknown markdown flavor '{val}' in {path}");
2843                        }
2844                    } else {
2845                        log::warn!(
2846                            "[WARN] Expected string for global key '{}' in {}, found {}",
2847                            key,
2848                            path,
2849                            value_item.type_name()
2850                        );
2851                    }
2852                }
2853                _ => {
2854                    // Track unknown global keys for validation
2855                    fragment
2856                        .unknown_keys
2857                        .push(("[global]".to_string(), key.to_string(), Some(path.to_string())));
2858                    log::warn!("[WARN] Unknown key in [global] section of {path}: {key}");
2859                }
2860            }
2861        }
2862    }
2863
2864    // Handle [per-file-ignores] section
2865    if let Some(per_file_item) = doc.get("per-file-ignores")
2866        && let Some(per_file_table) = per_file_item.as_table()
2867    {
2868        let mut per_file_map = HashMap::new();
2869        for (pattern, value_item) in per_file_table.iter() {
2870            if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
2871                let rules: Vec<String> = formatted_array
2872                    .iter()
2873                    .filter_map(|item| item.as_str())
2874                    .map(normalize_key)
2875                    .collect();
2876                per_file_map.insert(pattern.to_string(), rules);
2877            } else {
2878                let type_name = value_item.type_name();
2879                log::warn!(
2880                    "[WARN] Expected array for per-file-ignores pattern '{pattern}' in {path}, found {type_name}"
2881                );
2882            }
2883        }
2884        fragment
2885            .per_file_ignores
2886            .push_override(per_file_map, source, file.clone(), None);
2887    }
2888
2889    // Rule-specific: all other top-level tables
2890    for (key, item) in doc.iter() {
2891        let norm_rule_name = key.to_ascii_uppercase();
2892
2893        // Skip known special sections
2894        if key == "global" || key == "per-file-ignores" {
2895            continue;
2896        }
2897
2898        // Track unknown rule sections (like [MD999])
2899        if !known_rule_names.contains(&norm_rule_name) {
2900            // Only track if it looks like a rule section (starts with MD or is uppercase)
2901            if norm_rule_name.starts_with("MD") || key.chars().all(|c| c.is_uppercase() || c.is_numeric()) {
2902                fragment
2903                    .unknown_keys
2904                    .push((format!("[{key}]"), String::new(), Some(path.to_string())));
2905            }
2906            continue;
2907        }
2908
2909        if let Some(tbl) = item.as_table() {
2910            let rule_entry = fragment.rules.entry(norm_rule_name.clone()).or_default();
2911            for (rk, rv_item) in tbl.iter() {
2912                let norm_rk = normalize_key(rk);
2913                let maybe_toml_val: Option<toml::Value> = match rv_item.as_value() {
2914                    Some(toml_edit::Value::String(formatted)) => Some(toml::Value::String(formatted.value().clone())),
2915                    Some(toml_edit::Value::Integer(formatted)) => Some(toml::Value::Integer(*formatted.value())),
2916                    Some(toml_edit::Value::Float(formatted)) => Some(toml::Value::Float(*formatted.value())),
2917                    Some(toml_edit::Value::Boolean(formatted)) => Some(toml::Value::Boolean(*formatted.value())),
2918                    Some(toml_edit::Value::Datetime(formatted)) => Some(toml::Value::Datetime(*formatted.value())),
2919                    Some(toml_edit::Value::Array(formatted_array)) => {
2920                        // Convert toml_edit Array to toml::Value::Array
2921                        let mut values = Vec::new();
2922                        for item in formatted_array.iter() {
2923                            match item {
2924                                toml_edit::Value::String(formatted) => {
2925                                    values.push(toml::Value::String(formatted.value().clone()))
2926                                }
2927                                toml_edit::Value::Integer(formatted) => {
2928                                    values.push(toml::Value::Integer(*formatted.value()))
2929                                }
2930                                toml_edit::Value::Float(formatted) => {
2931                                    values.push(toml::Value::Float(*formatted.value()))
2932                                }
2933                                toml_edit::Value::Boolean(formatted) => {
2934                                    values.push(toml::Value::Boolean(*formatted.value()))
2935                                }
2936                                toml_edit::Value::Datetime(formatted) => {
2937                                    values.push(toml::Value::Datetime(*formatted.value()))
2938                                }
2939                                _ => {
2940                                    log::warn!(
2941                                        "[WARN] Skipping unsupported array element type in key '{norm_rule_name}.{norm_rk}' in {path}"
2942                                    );
2943                                }
2944                            }
2945                        }
2946                        Some(toml::Value::Array(values))
2947                    }
2948                    Some(toml_edit::Value::InlineTable(_)) => {
2949                        log::warn!(
2950                            "[WARN] Skipping inline table value for key '{norm_rule_name}.{norm_rk}' in {path}. Table conversion not yet fully implemented in parser."
2951                        );
2952                        None
2953                    }
2954                    None => {
2955                        log::warn!(
2956                            "[WARN] Skipping non-value item for key '{norm_rule_name}.{norm_rk}' in {path}. Expected simple value."
2957                        );
2958                        None
2959                    }
2960                };
2961                if let Some(toml_val) = maybe_toml_val {
2962                    let sv = rule_entry
2963                        .values
2964                        .entry(norm_rk.clone())
2965                        .or_insert_with(|| SourcedValue::new(toml_val.clone(), ConfigSource::Default));
2966                    sv.push_override(toml_val, source, file.clone(), None);
2967                }
2968            }
2969        } else if item.is_value() {
2970            log::warn!("[WARN] Ignoring top-level value key in {path}: '{key}'. Expected a table like [{key}].");
2971        }
2972    }
2973
2974    Ok(fragment)
2975}
2976
2977/// Loads and converts a markdownlint config file (.json or .yaml) into a SourcedConfigFragment.
2978fn load_from_markdownlint(path: &str) -> Result<SourcedConfigFragment, ConfigError> {
2979    // Use the unified loader from markdownlint_config.rs
2980    let ml_config = crate::markdownlint_config::load_markdownlint_config(path)
2981        .map_err(|e| ConfigError::ParseError(format!("{path}: {e}")))?;
2982    Ok(ml_config.map_to_sourced_rumdl_config_fragment(Some(path)))
2983}