rumdl_lib/
config.rs

1//!
2//! This module defines configuration structures, loading logic, and provenance tracking for rumdl.
3//! Supports TOML, pyproject.toml, and markdownlint config formats, and provides merging and override logic.
4
5use crate::rule::Rule;
6use crate::rules;
7use crate::types::LineLength;
8use log;
9use serde::{Deserialize, Serialize};
10use std::collections::BTreeMap;
11use std::collections::{HashMap, HashSet};
12use std::fmt;
13use std::fs;
14use std::io;
15use std::marker::PhantomData;
16use std::path::Path;
17use std::str::FromStr;
18use toml_edit::DocumentMut;
19
20// ============================================================================
21// Typestate markers for configuration pipeline
22// ============================================================================
23
24/// Marker type for configuration that has been loaded but not yet validated.
25/// This is the initial state after `load_with_discovery()`.
26#[derive(Debug, Clone, Copy, Default)]
27pub struct ConfigLoaded;
28
29/// Marker type for configuration that has been validated.
30/// Only validated configs can be converted to `Config`.
31#[derive(Debug, Clone, Copy, Default)]
32pub struct ConfigValidated;
33
34/// Markdown flavor/dialect enumeration
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, schemars::JsonSchema)]
36#[serde(rename_all = "lowercase")]
37pub enum MarkdownFlavor {
38    /// Standard Markdown without flavor-specific adjustments
39    #[serde(rename = "standard", alias = "none", alias = "")]
40    #[default]
41    Standard,
42    /// MkDocs flavor with auto-reference support
43    #[serde(rename = "mkdocs")]
44    MkDocs,
45    /// MDX flavor with JSX and ESM support (.mdx files)
46    #[serde(rename = "mdx")]
47    MDX,
48    /// Quarto/RMarkdown flavor for scientific publishing (.qmd, .Rmd files)
49    #[serde(rename = "quarto")]
50    Quarto,
51    // Future flavors can be added here when they have actual implementation differences
52    // Planned: GFM (GitHub Flavored Markdown) - for GitHub-specific features like tables, strikethrough
53    // Planned: CommonMark - for strict CommonMark compliance
54}
55
56impl fmt::Display for MarkdownFlavor {
57    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
58        match self {
59            MarkdownFlavor::Standard => write!(f, "standard"),
60            MarkdownFlavor::MkDocs => write!(f, "mkdocs"),
61            MarkdownFlavor::MDX => write!(f, "mdx"),
62            MarkdownFlavor::Quarto => write!(f, "quarto"),
63        }
64    }
65}
66
67impl FromStr for MarkdownFlavor {
68    type Err = String;
69
70    fn from_str(s: &str) -> Result<Self, Self::Err> {
71        match s.to_lowercase().as_str() {
72            "standard" | "" | "none" => Ok(MarkdownFlavor::Standard),
73            "mkdocs" => Ok(MarkdownFlavor::MkDocs),
74            "mdx" => Ok(MarkdownFlavor::MDX),
75            "quarto" | "qmd" | "rmd" | "rmarkdown" => Ok(MarkdownFlavor::Quarto),
76            // GFM and CommonMark are aliases for Standard since the base parser
77            // (pulldown-cmark) already supports GFM extensions (tables, task lists,
78            // strikethrough, autolinks, etc.) which are a superset of CommonMark
79            "gfm" | "github" | "commonmark" => Ok(MarkdownFlavor::Standard),
80            _ => Err(format!("Unknown markdown flavor: {s}")),
81        }
82    }
83}
84
85impl MarkdownFlavor {
86    /// Detect flavor from file extension
87    pub fn from_extension(ext: &str) -> Self {
88        match ext.to_lowercase().as_str() {
89            "mdx" => Self::MDX,
90            "qmd" => Self::Quarto,
91            "rmd" => Self::Quarto,
92            _ => Self::Standard,
93        }
94    }
95
96    /// Detect flavor from file path
97    pub fn from_path(path: &std::path::Path) -> Self {
98        path.extension()
99            .and_then(|e| e.to_str())
100            .map(Self::from_extension)
101            .unwrap_or(Self::Standard)
102    }
103
104    /// Check if this flavor supports ESM imports/exports (MDX-specific)
105    pub fn supports_esm_blocks(self) -> bool {
106        matches!(self, Self::MDX)
107    }
108
109    /// Check if this flavor supports JSX components (MDX-specific)
110    pub fn supports_jsx(self) -> bool {
111        matches!(self, Self::MDX)
112    }
113
114    /// Check if this flavor supports auto-references (MkDocs-specific)
115    pub fn supports_auto_references(self) -> bool {
116        matches!(self, Self::MkDocs)
117    }
118
119    /// Get a human-readable name for this flavor
120    pub fn name(self) -> &'static str {
121        match self {
122            Self::Standard => "Standard",
123            Self::MkDocs => "MkDocs",
124            Self::MDX => "MDX",
125            Self::Quarto => "Quarto",
126        }
127    }
128}
129
130/// Normalizes configuration keys (rule names, option names) to lowercase kebab-case.
131pub fn normalize_key(key: &str) -> String {
132    // If the key looks like a rule name (e.g., MD013), uppercase it
133    if key.len() == 5 && key.to_ascii_lowercase().starts_with("md") && key[2..].chars().all(|c| c.is_ascii_digit()) {
134        key.to_ascii_uppercase()
135    } else {
136        key.replace('_', "-").to_ascii_lowercase()
137    }
138}
139
140/// Represents a rule-specific configuration
141#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, schemars::JsonSchema)]
142pub struct RuleConfig {
143    /// Configuration values for the rule
144    #[serde(flatten)]
145    #[schemars(schema_with = "arbitrary_value_schema")]
146    pub values: BTreeMap<String, toml::Value>,
147}
148
149/// Generate a JSON schema for arbitrary configuration values
150fn arbitrary_value_schema(_gen: &mut schemars::SchemaGenerator) -> schemars::Schema {
151    schemars::json_schema!({
152        "type": "object",
153        "additionalProperties": true
154    })
155}
156
157/// Represents the complete configuration loaded from rumdl.toml
158#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, schemars::JsonSchema)]
159#[schemars(
160    description = "rumdl configuration for linting Markdown files. Rules can be configured individually using [MD###] sections with rule-specific options."
161)]
162pub struct Config {
163    /// Global configuration options
164    #[serde(default)]
165    pub global: GlobalConfig,
166
167    /// Per-file rule ignores: maps file patterns to lists of rules to ignore
168    /// Example: { "README.md": ["MD033"], "docs/**/*.md": ["MD013"] }
169    #[serde(default, rename = "per-file-ignores")]
170    pub per_file_ignores: HashMap<String, Vec<String>>,
171
172    /// Rule-specific configurations (e.g., MD013, MD007, MD044)
173    /// Each rule section can contain options specific to that rule.
174    ///
175    /// Common examples:
176    /// - MD013: line_length, code_blocks, tables, headings
177    /// - MD007: indent
178    /// - MD003: style ("atx", "atx_closed", "setext")
179    /// - MD044: names (array of proper names to check)
180    ///
181    /// See https://github.com/rvben/rumdl for full rule documentation.
182    #[serde(flatten)]
183    pub rules: BTreeMap<String, RuleConfig>,
184
185    /// Per-rule severity overrides
186    /// Maps rule name (e.g., "MD001") to severity level (Error or Warning)
187    #[serde(skip)]
188    pub rule_severities: BTreeMap<String, crate::rule::Severity>,
189
190    /// Project root directory, used for resolving relative paths in per-file-ignores
191    #[serde(skip)]
192    pub project_root: Option<std::path::PathBuf>,
193}
194
195impl Config {
196    /// Check if the Markdown flavor is set to MkDocs
197    pub fn is_mkdocs_flavor(&self) -> bool {
198        self.global.flavor == MarkdownFlavor::MkDocs
199    }
200
201    // Future methods for when GFM and CommonMark are implemented:
202    // pub fn is_gfm_flavor(&self) -> bool
203    // pub fn is_commonmark_flavor(&self) -> bool
204
205    /// Get the configured Markdown flavor
206    pub fn markdown_flavor(&self) -> MarkdownFlavor {
207        self.global.flavor
208    }
209
210    /// Legacy method for backwards compatibility - redirects to is_mkdocs_flavor
211    pub fn is_mkdocs_project(&self) -> bool {
212        self.is_mkdocs_flavor()
213    }
214
215    /// Get the set of rules that should be ignored for a specific file based on per-file-ignores configuration
216    /// Returns a HashSet of rule names (uppercase, e.g., "MD033") that match the given file path
217    pub fn get_ignored_rules_for_file(&self, file_path: &Path) -> HashSet<String> {
218        use globset::{Glob, GlobSetBuilder};
219
220        let mut ignored_rules = HashSet::new();
221
222        if self.per_file_ignores.is_empty() {
223            return ignored_rules;
224        }
225
226        // Normalize the file path to be relative to project_root for pattern matching
227        // This ensures patterns like ".github/file.md" work with absolute paths
228        let path_for_matching: std::borrow::Cow<'_, Path> = if let Some(ref root) = self.project_root {
229            if let Ok(canonical_path) = file_path.canonicalize() {
230                if let Ok(canonical_root) = root.canonicalize() {
231                    if let Ok(relative) = canonical_path.strip_prefix(&canonical_root) {
232                        std::borrow::Cow::Owned(relative.to_path_buf())
233                    } else {
234                        std::borrow::Cow::Borrowed(file_path)
235                    }
236                } else {
237                    std::borrow::Cow::Borrowed(file_path)
238                }
239            } else {
240                std::borrow::Cow::Borrowed(file_path)
241            }
242        } else {
243            std::borrow::Cow::Borrowed(file_path)
244        };
245
246        // Build a globset for efficient matching
247        let mut builder = GlobSetBuilder::new();
248        let mut pattern_to_rules: Vec<(usize, &Vec<String>)> = Vec::new();
249
250        for (idx, (pattern, rules)) in self.per_file_ignores.iter().enumerate() {
251            if let Ok(glob) = Glob::new(pattern) {
252                builder.add(glob);
253                pattern_to_rules.push((idx, rules));
254            } else {
255                log::warn!("Invalid glob pattern in per-file-ignores: {pattern}");
256            }
257        }
258
259        let globset = match builder.build() {
260            Ok(gs) => gs,
261            Err(e) => {
262                log::error!("Failed to build globset for per-file-ignores: {e}");
263                return ignored_rules;
264            }
265        };
266
267        // Match the file path against all patterns
268        for match_idx in globset.matches(path_for_matching.as_ref()) {
269            if let Some((_, rules)) = pattern_to_rules.get(match_idx) {
270                for rule in rules.iter() {
271                    // Normalize rule names to uppercase (MD033, md033 -> MD033)
272                    ignored_rules.insert(normalize_key(rule));
273                }
274            }
275        }
276
277        ignored_rules
278    }
279}
280
281/// Global configuration options
282#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, schemars::JsonSchema)]
283#[serde(default, rename_all = "kebab-case")]
284pub struct GlobalConfig {
285    /// Enabled rules
286    #[serde(default)]
287    pub enable: Vec<String>,
288
289    /// Disabled rules
290    #[serde(default)]
291    pub disable: Vec<String>,
292
293    /// Files to exclude
294    #[serde(default)]
295    pub exclude: Vec<String>,
296
297    /// Files to include
298    #[serde(default)]
299    pub include: Vec<String>,
300
301    /// Respect .gitignore files when scanning directories
302    #[serde(default = "default_respect_gitignore", alias = "respect_gitignore")]
303    pub respect_gitignore: bool,
304
305    /// Global line length setting (used by MD013 and other rules if not overridden)
306    #[serde(default, alias = "line_length")]
307    pub line_length: LineLength,
308
309    /// Output format for linting results (e.g., "text", "json", "pylint", etc.)
310    #[serde(skip_serializing_if = "Option::is_none", alias = "output_format")]
311    pub output_format: Option<String>,
312
313    /// Rules that are allowed to be fixed when --fix is used
314    /// If specified, only these rules will be fixed
315    #[serde(default)]
316    pub fixable: Vec<String>,
317
318    /// Rules that should never be fixed, even when --fix is used
319    /// Takes precedence over fixable
320    #[serde(default)]
321    pub unfixable: Vec<String>,
322
323    /// Markdown flavor/dialect to use (mkdocs, gfm, commonmark, etc.)
324    /// When set, adjusts parsing and validation rules for that specific Markdown variant
325    #[serde(default)]
326    pub flavor: MarkdownFlavor,
327
328    /// [DEPRECATED] Whether to enforce exclude patterns for explicitly passed paths.
329    /// This option is deprecated as of v0.0.156 and has no effect.
330    /// Exclude patterns are now always respected, even for explicitly provided files.
331    /// This prevents duplication between rumdl config and tool configs like pre-commit.
332    #[serde(default, alias = "force_exclude")]
333    #[deprecated(since = "0.0.156", note = "Exclude patterns are now always respected")]
334    pub force_exclude: bool,
335
336    /// Directory to store cache files (default: .rumdl_cache)
337    /// Can also be set via --cache-dir CLI flag or RUMDL_CACHE_DIR environment variable
338    #[serde(default, alias = "cache_dir", skip_serializing_if = "Option::is_none")]
339    pub cache_dir: Option<String>,
340
341    /// Whether caching is enabled (default: true)
342    /// Can also be disabled via --no-cache CLI flag
343    #[serde(default = "default_true")]
344    pub cache: bool,
345}
346
347fn default_respect_gitignore() -> bool {
348    true
349}
350
351fn default_true() -> bool {
352    true
353}
354
355// Add the Default impl
356impl Default for GlobalConfig {
357    #[allow(deprecated)]
358    fn default() -> Self {
359        Self {
360            enable: Vec::new(),
361            disable: Vec::new(),
362            exclude: Vec::new(),
363            include: Vec::new(),
364            respect_gitignore: true,
365            line_length: LineLength::default(),
366            output_format: None,
367            fixable: Vec::new(),
368            unfixable: Vec::new(),
369            flavor: MarkdownFlavor::default(),
370            force_exclude: false,
371            cache_dir: None,
372            cache: true,
373        }
374    }
375}
376
377const MARKDOWNLINT_CONFIG_FILES: &[&str] = &[
378    ".markdownlint.json",
379    ".markdownlint.jsonc",
380    ".markdownlint.yaml",
381    ".markdownlint.yml",
382    "markdownlint.json",
383    "markdownlint.jsonc",
384    "markdownlint.yaml",
385    "markdownlint.yml",
386];
387
388/// Create a default configuration file at the specified path
389pub fn create_default_config(path: &str) -> Result<(), ConfigError> {
390    // Check if file already exists
391    if Path::new(path).exists() {
392        return Err(ConfigError::FileExists { path: path.to_string() });
393    }
394
395    // Default configuration content
396    let default_config = r#"# rumdl configuration file
397
398# Global configuration options
399[global]
400# List of rules to disable (uncomment and modify as needed)
401# disable = ["MD013", "MD033"]
402
403# List of rules to enable exclusively (if provided, only these rules will run)
404# enable = ["MD001", "MD003", "MD004"]
405
406# List of file/directory patterns to include for linting (if provided, only these will be linted)
407# include = [
408#    "docs/*.md",
409#    "src/**/*.md",
410#    "README.md"
411# ]
412
413# List of file/directory patterns to exclude from linting
414exclude = [
415    # Common directories to exclude
416    ".git",
417    ".github",
418    "node_modules",
419    "vendor",
420    "dist",
421    "build",
422
423    # Specific files or patterns
424    "CHANGELOG.md",
425    "LICENSE.md",
426]
427
428# Respect .gitignore files when scanning directories (default: true)
429respect-gitignore = true
430
431# Markdown flavor/dialect (uncomment to enable)
432# Options: standard (default), gfm, commonmark, mkdocs, mdx, quarto
433# flavor = "mkdocs"
434
435# Rule-specific configurations (uncomment and modify as needed)
436
437# [MD003]
438# style = "atx"  # Heading style (atx, atx_closed, setext)
439
440# [MD004]
441# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
442
443# [MD007]
444# indent = 4  # Unordered list indentation
445
446# [MD013]
447# line-length = 100  # Line length
448# code-blocks = false  # Exclude code blocks from line length check
449# tables = false  # Exclude tables from line length check
450# headings = true  # Include headings in line length check
451
452# [MD044]
453# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
454# code-blocks = false  # Check code blocks for proper names (default: false, skips code blocks)
455"#;
456
457    // Write the default configuration to the file
458    match fs::write(path, default_config) {
459        Ok(_) => Ok(()),
460        Err(err) => Err(ConfigError::IoError {
461            source: err,
462            path: path.to_string(),
463        }),
464    }
465}
466
467/// Errors that can occur when loading configuration
468#[derive(Debug, thiserror::Error)]
469pub enum ConfigError {
470    /// Failed to read the configuration file
471    #[error("Failed to read config file at {path}: {source}")]
472    IoError { source: io::Error, path: String },
473
474    /// Failed to parse the configuration content (TOML or JSON)
475    #[error("Failed to parse config: {0}")]
476    ParseError(String),
477
478    /// Configuration file already exists
479    #[error("Configuration file already exists at {path}")]
480    FileExists { path: String },
481}
482
483/// Get a rule-specific configuration value
484/// Automatically tries both the original key and normalized variants (kebab-case ↔ snake_case)
485/// for better markdownlint compatibility
486pub fn get_rule_config_value<T: serde::de::DeserializeOwned>(config: &Config, rule_name: &str, key: &str) -> Option<T> {
487    let norm_rule_name = rule_name.to_ascii_uppercase(); // Use uppercase for lookup
488
489    let rule_config = config.rules.get(&norm_rule_name)?;
490
491    // Try multiple key variants to support both underscore and kebab-case formats
492    let key_variants = [
493        key.to_string(),       // Original key as provided
494        normalize_key(key),    // Normalized key (lowercase, kebab-case)
495        key.replace('-', "_"), // Convert kebab-case to snake_case
496        key.replace('_', "-"), // Convert snake_case to kebab-case
497    ];
498
499    // Try each variant until we find a match
500    for variant in &key_variants {
501        if let Some(value) = rule_config.values.get(variant)
502            && let Ok(result) = T::deserialize(value.clone())
503        {
504            return Some(result);
505        }
506    }
507
508    None
509}
510
511/// Generate default rumdl configuration for pyproject.toml
512pub fn generate_pyproject_config() -> String {
513    let config_content = r#"
514[tool.rumdl]
515# Global configuration options
516line-length = 100
517disable = []
518exclude = [
519    # Common directories to exclude
520    ".git",
521    ".github",
522    "node_modules",
523    "vendor",
524    "dist",
525    "build",
526]
527respect-gitignore = true
528
529# Rule-specific configurations (uncomment and modify as needed)
530
531# [tool.rumdl.MD003]
532# style = "atx"  # Heading style (atx, atx_closed, setext)
533
534# [tool.rumdl.MD004]
535# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
536
537# [tool.rumdl.MD007]
538# indent = 4  # Unordered list indentation
539
540# [tool.rumdl.MD013]
541# line-length = 100  # Line length
542# code-blocks = false  # Exclude code blocks from line length check
543# tables = false  # Exclude tables from line length check
544# headings = true  # Include headings in line length check
545
546# [tool.rumdl.MD044]
547# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
548# code-blocks = false  # Check code blocks for proper names (default: false, skips code blocks)
549"#;
550
551    config_content.to_string()
552}
553
554#[cfg(test)]
555mod tests {
556    use super::*;
557    use std::fs;
558    use tempfile::tempdir;
559
560    #[test]
561    fn test_flavor_loading() {
562        let temp_dir = tempdir().unwrap();
563        let config_path = temp_dir.path().join(".rumdl.toml");
564        let config_content = r#"
565[global]
566flavor = "mkdocs"
567disable = ["MD001"]
568"#;
569        fs::write(&config_path, config_content).unwrap();
570
571        // Load the config
572        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
573        let config: Config = sourced.into_validated_unchecked().into();
574
575        // Check that flavor was loaded
576        assert_eq!(config.global.flavor, MarkdownFlavor::MkDocs);
577        assert!(config.is_mkdocs_flavor());
578        assert!(config.is_mkdocs_project()); // Test backwards compatibility
579        assert_eq!(config.global.disable, vec!["MD001".to_string()]);
580    }
581
582    #[test]
583    fn test_pyproject_toml_root_level_config() {
584        let temp_dir = tempdir().unwrap();
585        let config_path = temp_dir.path().join("pyproject.toml");
586
587        // Create a test pyproject.toml with root-level configuration
588        let content = r#"
589[tool.rumdl]
590line-length = 120
591disable = ["MD033"]
592enable = ["MD001", "MD004"]
593include = ["docs/*.md"]
594exclude = ["node_modules"]
595respect-gitignore = true
596        "#;
597
598        fs::write(&config_path, content).unwrap();
599
600        // Load the config with skip_auto_discovery to avoid environment config files
601        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
602        let config: Config = sourced.into_validated_unchecked().into(); // Convert to plain config for assertions
603
604        // Check global settings
605        assert_eq!(config.global.disable, vec!["MD033".to_string()]);
606        assert_eq!(config.global.enable, vec!["MD001".to_string(), "MD004".to_string()]);
607        // Should now contain only the configured pattern since auto-discovery is disabled
608        assert_eq!(config.global.include, vec!["docs/*.md".to_string()]);
609        assert_eq!(config.global.exclude, vec!["node_modules".to_string()]);
610        assert!(config.global.respect_gitignore);
611
612        // Check line-length was correctly added to MD013
613        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
614        assert_eq!(line_length, Some(120));
615    }
616
617    #[test]
618    fn test_pyproject_toml_snake_case_and_kebab_case() {
619        let temp_dir = tempdir().unwrap();
620        let config_path = temp_dir.path().join("pyproject.toml");
621
622        // Test with both kebab-case and snake_case variants
623        let content = r#"
624[tool.rumdl]
625line-length = 150
626respect_gitignore = true
627        "#;
628
629        fs::write(&config_path, content).unwrap();
630
631        // Load the config with skip_auto_discovery to avoid environment config files
632        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
633        let config: Config = sourced.into_validated_unchecked().into(); // Convert to plain config for assertions
634
635        // Check settings were correctly loaded
636        assert!(config.global.respect_gitignore);
637        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
638        assert_eq!(line_length, Some(150));
639    }
640
641    #[test]
642    fn test_md013_key_normalization_in_rumdl_toml() {
643        let temp_dir = tempdir().unwrap();
644        let config_path = temp_dir.path().join(".rumdl.toml");
645        let config_content = r#"
646[MD013]
647line_length = 111
648line-length = 222
649"#;
650        fs::write(&config_path, config_content).unwrap();
651        // Load the config with skip_auto_discovery to avoid environment config files
652        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
653        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
654        // Now we should only get the explicitly configured key
655        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
656        assert_eq!(keys, vec!["line-length"]);
657        let val = &rule_cfg.values["line-length"].value;
658        assert_eq!(val.as_integer(), Some(222));
659        // get_rule_config_value should retrieve the value for both snake_case and kebab-case
660        let config: Config = sourced.clone().into_validated_unchecked().into();
661        let v1 = get_rule_config_value::<usize>(&config, "MD013", "line_length");
662        let v2 = get_rule_config_value::<usize>(&config, "MD013", "line-length");
663        assert_eq!(v1, Some(222));
664        assert_eq!(v2, Some(222));
665    }
666
667    #[test]
668    fn test_md013_section_case_insensitivity() {
669        let temp_dir = tempdir().unwrap();
670        let config_path = temp_dir.path().join(".rumdl.toml");
671        let config_content = r#"
672[md013]
673line-length = 101
674
675[Md013]
676line-length = 102
677
678[MD013]
679line-length = 103
680"#;
681        fs::write(&config_path, config_content).unwrap();
682        // Load the config with skip_auto_discovery to avoid environment config files
683        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
684        let config: Config = sourced.clone().into_validated_unchecked().into();
685        // Only the last section should win, and be present
686        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
687        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
688        assert_eq!(keys, vec!["line-length"]);
689        let val = &rule_cfg.values["line-length"].value;
690        assert_eq!(val.as_integer(), Some(103));
691        let v = get_rule_config_value::<usize>(&config, "MD013", "line-length");
692        assert_eq!(v, Some(103));
693    }
694
695    #[test]
696    fn test_md013_key_snake_and_kebab_case() {
697        let temp_dir = tempdir().unwrap();
698        let config_path = temp_dir.path().join(".rumdl.toml");
699        let config_content = r#"
700[MD013]
701line_length = 201
702line-length = 202
703"#;
704        fs::write(&config_path, config_content).unwrap();
705        // Load the config with skip_auto_discovery to avoid environment config files
706        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
707        let config: Config = sourced.clone().into_validated_unchecked().into();
708        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
709        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
710        assert_eq!(keys, vec!["line-length"]);
711        let val = &rule_cfg.values["line-length"].value;
712        assert_eq!(val.as_integer(), Some(202));
713        let v1 = get_rule_config_value::<usize>(&config, "MD013", "line_length");
714        let v2 = get_rule_config_value::<usize>(&config, "MD013", "line-length");
715        assert_eq!(v1, Some(202));
716        assert_eq!(v2, Some(202));
717    }
718
719    #[test]
720    fn test_unknown_rule_section_is_ignored() {
721        let temp_dir = tempdir().unwrap();
722        let config_path = temp_dir.path().join(".rumdl.toml");
723        let config_content = r#"
724[MD999]
725foo = 1
726bar = 2
727[MD013]
728line-length = 303
729"#;
730        fs::write(&config_path, config_content).unwrap();
731        // Load the config with skip_auto_discovery to avoid environment config files
732        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
733        let config: Config = sourced.clone().into_validated_unchecked().into();
734        // MD999 should not be present
735        assert!(!sourced.rules.contains_key("MD999"));
736        // MD013 should be present and correct
737        let v = get_rule_config_value::<usize>(&config, "MD013", "line-length");
738        assert_eq!(v, Some(303));
739    }
740
741    #[test]
742    fn test_invalid_toml_syntax() {
743        let temp_dir = tempdir().unwrap();
744        let config_path = temp_dir.path().join(".rumdl.toml");
745
746        // Invalid TOML with unclosed string
747        let config_content = r#"
748[MD013]
749line-length = "unclosed string
750"#;
751        fs::write(&config_path, config_content).unwrap();
752
753        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
754        assert!(result.is_err());
755        match result.unwrap_err() {
756            ConfigError::ParseError(msg) => {
757                // The actual error message from toml parser might vary
758                assert!(msg.contains("expected") || msg.contains("invalid") || msg.contains("unterminated"));
759            }
760            _ => panic!("Expected ParseError"),
761        }
762    }
763
764    #[test]
765    fn test_wrong_type_for_config_value() {
766        let temp_dir = tempdir().unwrap();
767        let config_path = temp_dir.path().join(".rumdl.toml");
768
769        // line-length should be a number, not a string
770        let config_content = r#"
771[MD013]
772line-length = "not a number"
773"#;
774        fs::write(&config_path, config_content).unwrap();
775
776        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
777        let config: Config = sourced.into_validated_unchecked().into();
778
779        // The value should be loaded as a string, not converted
780        let rule_config = config.rules.get("MD013").unwrap();
781        let value = rule_config.values.get("line-length").unwrap();
782        assert!(matches!(value, toml::Value::String(_)));
783    }
784
785    #[test]
786    fn test_empty_config_file() {
787        let temp_dir = tempdir().unwrap();
788        let config_path = temp_dir.path().join(".rumdl.toml");
789
790        // Empty file
791        fs::write(&config_path, "").unwrap();
792
793        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
794        let config: Config = sourced.into_validated_unchecked().into();
795
796        // Should have default values
797        assert_eq!(config.global.line_length.get(), 80);
798        assert!(config.global.respect_gitignore);
799        assert!(config.rules.is_empty());
800    }
801
802    #[test]
803    fn test_malformed_pyproject_toml() {
804        let temp_dir = tempdir().unwrap();
805        let config_path = temp_dir.path().join("pyproject.toml");
806
807        // Missing closing bracket
808        let content = r#"
809[tool.rumdl
810line-length = 120
811"#;
812        fs::write(&config_path, content).unwrap();
813
814        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
815        assert!(result.is_err());
816    }
817
818    #[test]
819    fn test_conflicting_config_values() {
820        let temp_dir = tempdir().unwrap();
821        let config_path = temp_dir.path().join(".rumdl.toml");
822
823        // Both enable and disable the same rule - these need to be in a global section
824        let config_content = r#"
825[global]
826enable = ["MD013"]
827disable = ["MD013"]
828"#;
829        fs::write(&config_path, config_content).unwrap();
830
831        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
832        let config: Config = sourced.into_validated_unchecked().into();
833
834        // Conflict resolution: enable wins over disable
835        assert!(config.global.enable.contains(&"MD013".to_string()));
836        assert!(!config.global.disable.contains(&"MD013".to_string()));
837    }
838
839    #[test]
840    fn test_invalid_rule_names() {
841        let temp_dir = tempdir().unwrap();
842        let config_path = temp_dir.path().join(".rumdl.toml");
843
844        let config_content = r#"
845[global]
846enable = ["MD001", "NOT_A_RULE", "md002", "12345"]
847disable = ["MD-001", "MD_002"]
848"#;
849        fs::write(&config_path, config_content).unwrap();
850
851        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
852        let config: Config = sourced.into_validated_unchecked().into();
853
854        // All values should be preserved as-is
855        assert_eq!(config.global.enable.len(), 4);
856        assert_eq!(config.global.disable.len(), 2);
857    }
858
859    #[test]
860    fn test_deeply_nested_config() {
861        let temp_dir = tempdir().unwrap();
862        let config_path = temp_dir.path().join(".rumdl.toml");
863
864        // This should be ignored as we don't support nested tables within rule configs
865        let config_content = r#"
866[MD013]
867line-length = 100
868[MD013.nested]
869value = 42
870"#;
871        fs::write(&config_path, config_content).unwrap();
872
873        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
874        let config: Config = sourced.into_validated_unchecked().into();
875
876        let rule_config = config.rules.get("MD013").unwrap();
877        assert_eq!(
878            rule_config.values.get("line-length").unwrap(),
879            &toml::Value::Integer(100)
880        );
881        // Nested table should not be present
882        assert!(!rule_config.values.contains_key("nested"));
883    }
884
885    #[test]
886    fn test_unicode_in_config() {
887        let temp_dir = tempdir().unwrap();
888        let config_path = temp_dir.path().join(".rumdl.toml");
889
890        let config_content = r#"
891[global]
892include = ["文档/*.md", "ドキュメント/*.md"]
893exclude = ["测试/*", "🚀/*"]
894
895[MD013]
896line-length = 80
897message = "行太长了 🚨"
898"#;
899        fs::write(&config_path, config_content).unwrap();
900
901        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
902        let config: Config = sourced.into_validated_unchecked().into();
903
904        assert_eq!(config.global.include.len(), 2);
905        assert_eq!(config.global.exclude.len(), 2);
906        assert!(config.global.include[0].contains("文档"));
907        assert!(config.global.exclude[1].contains("🚀"));
908
909        let rule_config = config.rules.get("MD013").unwrap();
910        let message = rule_config.values.get("message").unwrap();
911        if let toml::Value::String(s) = message {
912            assert!(s.contains("行太长了"));
913            assert!(s.contains("🚨"));
914        }
915    }
916
917    #[test]
918    fn test_extremely_long_values() {
919        let temp_dir = tempdir().unwrap();
920        let config_path = temp_dir.path().join(".rumdl.toml");
921
922        let long_string = "a".repeat(10000);
923        let config_content = format!(
924            r#"
925[global]
926exclude = ["{long_string}"]
927
928[MD013]
929line-length = 999999999
930"#
931        );
932
933        fs::write(&config_path, config_content).unwrap();
934
935        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
936        let config: Config = sourced.into_validated_unchecked().into();
937
938        assert_eq!(config.global.exclude[0].len(), 10000);
939        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
940        assert_eq!(line_length, Some(999999999));
941    }
942
943    #[test]
944    fn test_config_with_comments() {
945        let temp_dir = tempdir().unwrap();
946        let config_path = temp_dir.path().join(".rumdl.toml");
947
948        let config_content = r#"
949[global]
950# This is a comment
951enable = ["MD001"] # Enable MD001
952# disable = ["MD002"] # This is commented out
953
954[MD013] # Line length rule
955line-length = 100 # Set to 100 characters
956# ignored = true # This setting is commented out
957"#;
958        fs::write(&config_path, config_content).unwrap();
959
960        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
961        let config: Config = sourced.into_validated_unchecked().into();
962
963        assert_eq!(config.global.enable, vec!["MD001"]);
964        assert!(config.global.disable.is_empty()); // Commented out
965
966        let rule_config = config.rules.get("MD013").unwrap();
967        assert_eq!(rule_config.values.len(), 1); // Only line-length
968        assert!(!rule_config.values.contains_key("ignored"));
969    }
970
971    #[test]
972    fn test_arrays_in_rule_config() {
973        let temp_dir = tempdir().unwrap();
974        let config_path = temp_dir.path().join(".rumdl.toml");
975
976        let config_content = r#"
977[MD003]
978levels = [1, 2, 3]
979tags = ["important", "critical"]
980mixed = [1, "two", true]
981"#;
982        fs::write(&config_path, config_content).unwrap();
983
984        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
985        let config: Config = sourced.into_validated_unchecked().into();
986
987        // Arrays should now be properly parsed
988        let rule_config = config.rules.get("MD003").expect("MD003 config should exist");
989
990        // Check that arrays are present and correctly parsed
991        assert!(rule_config.values.contains_key("levels"));
992        assert!(rule_config.values.contains_key("tags"));
993        assert!(rule_config.values.contains_key("mixed"));
994
995        // Verify array contents
996        if let Some(toml::Value::Array(levels)) = rule_config.values.get("levels") {
997            assert_eq!(levels.len(), 3);
998            assert_eq!(levels[0], toml::Value::Integer(1));
999            assert_eq!(levels[1], toml::Value::Integer(2));
1000            assert_eq!(levels[2], toml::Value::Integer(3));
1001        } else {
1002            panic!("levels should be an array");
1003        }
1004
1005        if let Some(toml::Value::Array(tags)) = rule_config.values.get("tags") {
1006            assert_eq!(tags.len(), 2);
1007            assert_eq!(tags[0], toml::Value::String("important".to_string()));
1008            assert_eq!(tags[1], toml::Value::String("critical".to_string()));
1009        } else {
1010            panic!("tags should be an array");
1011        }
1012
1013        if let Some(toml::Value::Array(mixed)) = rule_config.values.get("mixed") {
1014            assert_eq!(mixed.len(), 3);
1015            assert_eq!(mixed[0], toml::Value::Integer(1));
1016            assert_eq!(mixed[1], toml::Value::String("two".to_string()));
1017            assert_eq!(mixed[2], toml::Value::Boolean(true));
1018        } else {
1019            panic!("mixed should be an array");
1020        }
1021    }
1022
1023    #[test]
1024    fn test_normalize_key_edge_cases() {
1025        // Rule names
1026        assert_eq!(normalize_key("MD001"), "MD001");
1027        assert_eq!(normalize_key("md001"), "MD001");
1028        assert_eq!(normalize_key("Md001"), "MD001");
1029        assert_eq!(normalize_key("mD001"), "MD001");
1030
1031        // Non-rule names
1032        assert_eq!(normalize_key("line_length"), "line-length");
1033        assert_eq!(normalize_key("line-length"), "line-length");
1034        assert_eq!(normalize_key("LINE_LENGTH"), "line-length");
1035        assert_eq!(normalize_key("respect_gitignore"), "respect-gitignore");
1036
1037        // Edge cases
1038        assert_eq!(normalize_key("MD"), "md"); // Too short to be a rule
1039        assert_eq!(normalize_key("MD00"), "md00"); // Too short
1040        assert_eq!(normalize_key("MD0001"), "md0001"); // Too long
1041        assert_eq!(normalize_key("MDabc"), "mdabc"); // Non-digit
1042        assert_eq!(normalize_key("MD00a"), "md00a"); // Partial digit
1043        assert_eq!(normalize_key(""), "");
1044        assert_eq!(normalize_key("_"), "-");
1045        assert_eq!(normalize_key("___"), "---");
1046    }
1047
1048    #[test]
1049    fn test_missing_config_file() {
1050        let temp_dir = tempdir().unwrap();
1051        let config_path = temp_dir.path().join("nonexistent.toml");
1052
1053        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
1054        assert!(result.is_err());
1055        match result.unwrap_err() {
1056            ConfigError::IoError { .. } => {}
1057            _ => panic!("Expected IoError for missing file"),
1058        }
1059    }
1060
1061    #[test]
1062    #[cfg(unix)]
1063    fn test_permission_denied_config() {
1064        use std::os::unix::fs::PermissionsExt;
1065
1066        let temp_dir = tempdir().unwrap();
1067        let config_path = temp_dir.path().join(".rumdl.toml");
1068
1069        fs::write(&config_path, "enable = [\"MD001\"]").unwrap();
1070
1071        // Remove read permissions
1072        let mut perms = fs::metadata(&config_path).unwrap().permissions();
1073        perms.set_mode(0o000);
1074        fs::set_permissions(&config_path, perms).unwrap();
1075
1076        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
1077
1078        // Restore permissions for cleanup
1079        let mut perms = fs::metadata(&config_path).unwrap().permissions();
1080        perms.set_mode(0o644);
1081        fs::set_permissions(&config_path, perms).unwrap();
1082
1083        assert!(result.is_err());
1084        match result.unwrap_err() {
1085            ConfigError::IoError { .. } => {}
1086            _ => panic!("Expected IoError for permission denied"),
1087        }
1088    }
1089
1090    #[test]
1091    fn test_circular_reference_detection() {
1092        // This test is more conceptual since TOML doesn't support circular references
1093        // But we test that deeply nested structures don't cause stack overflow
1094        let temp_dir = tempdir().unwrap();
1095        let config_path = temp_dir.path().join(".rumdl.toml");
1096
1097        let mut config_content = String::from("[MD001]\n");
1098        for i in 0..100 {
1099            config_content.push_str(&format!("key{i} = {i}\n"));
1100        }
1101
1102        fs::write(&config_path, config_content).unwrap();
1103
1104        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1105        let config: Config = sourced.into_validated_unchecked().into();
1106
1107        let rule_config = config.rules.get("MD001").unwrap();
1108        assert_eq!(rule_config.values.len(), 100);
1109    }
1110
1111    #[test]
1112    fn test_special_toml_values() {
1113        let temp_dir = tempdir().unwrap();
1114        let config_path = temp_dir.path().join(".rumdl.toml");
1115
1116        let config_content = r#"
1117[MD001]
1118infinity = inf
1119neg_infinity = -inf
1120not_a_number = nan
1121datetime = 1979-05-27T07:32:00Z
1122local_date = 1979-05-27
1123local_time = 07:32:00
1124"#;
1125        fs::write(&config_path, config_content).unwrap();
1126
1127        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1128        let config: Config = sourced.into_validated_unchecked().into();
1129
1130        // Some values might not be parsed due to parser limitations
1131        if let Some(rule_config) = config.rules.get("MD001") {
1132            // Check special float values if present
1133            if let Some(toml::Value::Float(f)) = rule_config.values.get("infinity") {
1134                assert!(f.is_infinite() && f.is_sign_positive());
1135            }
1136            if let Some(toml::Value::Float(f)) = rule_config.values.get("neg_infinity") {
1137                assert!(f.is_infinite() && f.is_sign_negative());
1138            }
1139            if let Some(toml::Value::Float(f)) = rule_config.values.get("not_a_number") {
1140                assert!(f.is_nan());
1141            }
1142
1143            // Check datetime values if present
1144            if let Some(val) = rule_config.values.get("datetime") {
1145                assert!(matches!(val, toml::Value::Datetime(_)));
1146            }
1147            // Note: local_date and local_time might not be parsed by the current implementation
1148        }
1149    }
1150
1151    #[test]
1152    fn test_default_config_passes_validation() {
1153        use crate::rules;
1154
1155        let temp_dir = tempdir().unwrap();
1156        let config_path = temp_dir.path().join(".rumdl.toml");
1157        let config_path_str = config_path.to_str().unwrap();
1158
1159        // Create the default config using the same function that `rumdl init` uses
1160        create_default_config(config_path_str).unwrap();
1161
1162        // Load it back as a SourcedConfig
1163        let sourced =
1164            SourcedConfig::load(Some(config_path_str), None).expect("Default config should load successfully");
1165
1166        // Create the rule registry
1167        let all_rules = rules::all_rules(&Config::default());
1168        let registry = RuleRegistry::from_rules(&all_rules);
1169
1170        // Validate the config
1171        let warnings = validate_config_sourced(&sourced, &registry);
1172
1173        // The default config should have no warnings
1174        if !warnings.is_empty() {
1175            for warning in &warnings {
1176                eprintln!("Config validation warning: {}", warning.message);
1177                if let Some(rule) = &warning.rule {
1178                    eprintln!("  Rule: {rule}");
1179                }
1180                if let Some(key) = &warning.key {
1181                    eprintln!("  Key: {key}");
1182                }
1183            }
1184        }
1185        assert!(
1186            warnings.is_empty(),
1187            "Default config from rumdl init should pass validation without warnings"
1188        );
1189    }
1190
1191    #[test]
1192    fn test_per_file_ignores_config_parsing() {
1193        let temp_dir = tempdir().unwrap();
1194        let config_path = temp_dir.path().join(".rumdl.toml");
1195        let config_content = r#"
1196[per-file-ignores]
1197"README.md" = ["MD033"]
1198"docs/**/*.md" = ["MD013", "MD033"]
1199"test/*.md" = ["MD041"]
1200"#;
1201        fs::write(&config_path, config_content).unwrap();
1202
1203        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1204        let config: Config = sourced.into_validated_unchecked().into();
1205
1206        // Verify per-file-ignores was loaded
1207        assert_eq!(config.per_file_ignores.len(), 3);
1208        assert_eq!(
1209            config.per_file_ignores.get("README.md"),
1210            Some(&vec!["MD033".to_string()])
1211        );
1212        assert_eq!(
1213            config.per_file_ignores.get("docs/**/*.md"),
1214            Some(&vec!["MD013".to_string(), "MD033".to_string()])
1215        );
1216        assert_eq!(
1217            config.per_file_ignores.get("test/*.md"),
1218            Some(&vec!["MD041".to_string()])
1219        );
1220    }
1221
1222    #[test]
1223    fn test_per_file_ignores_glob_matching() {
1224        use std::path::PathBuf;
1225
1226        let temp_dir = tempdir().unwrap();
1227        let config_path = temp_dir.path().join(".rumdl.toml");
1228        let config_content = r#"
1229[per-file-ignores]
1230"README.md" = ["MD033"]
1231"docs/**/*.md" = ["MD013"]
1232"**/test_*.md" = ["MD041"]
1233"#;
1234        fs::write(&config_path, config_content).unwrap();
1235
1236        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1237        let config: Config = sourced.into_validated_unchecked().into();
1238
1239        // Test exact match
1240        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1241        assert!(ignored.contains("MD033"));
1242        assert_eq!(ignored.len(), 1);
1243
1244        // Test glob pattern matching
1245        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("docs/api/overview.md"));
1246        assert!(ignored.contains("MD013"));
1247        assert_eq!(ignored.len(), 1);
1248
1249        // Test recursive glob pattern
1250        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("tests/fixtures/test_example.md"));
1251        assert!(ignored.contains("MD041"));
1252        assert_eq!(ignored.len(), 1);
1253
1254        // Test non-matching path
1255        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("other/file.md"));
1256        assert!(ignored.is_empty());
1257    }
1258
1259    #[test]
1260    fn test_per_file_ignores_pyproject_toml() {
1261        let temp_dir = tempdir().unwrap();
1262        let config_path = temp_dir.path().join("pyproject.toml");
1263        let config_content = r#"
1264[tool.rumdl]
1265[tool.rumdl.per-file-ignores]
1266"README.md" = ["MD033", "MD013"]
1267"generated/*.md" = ["MD041"]
1268"#;
1269        fs::write(&config_path, config_content).unwrap();
1270
1271        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1272        let config: Config = sourced.into_validated_unchecked().into();
1273
1274        // Verify per-file-ignores was loaded from pyproject.toml
1275        assert_eq!(config.per_file_ignores.len(), 2);
1276        assert_eq!(
1277            config.per_file_ignores.get("README.md"),
1278            Some(&vec!["MD033".to_string(), "MD013".to_string()])
1279        );
1280        assert_eq!(
1281            config.per_file_ignores.get("generated/*.md"),
1282            Some(&vec!["MD041".to_string()])
1283        );
1284    }
1285
1286    #[test]
1287    fn test_per_file_ignores_multiple_patterns_match() {
1288        use std::path::PathBuf;
1289
1290        let temp_dir = tempdir().unwrap();
1291        let config_path = temp_dir.path().join(".rumdl.toml");
1292        let config_content = r#"
1293[per-file-ignores]
1294"docs/**/*.md" = ["MD013"]
1295"**/api/*.md" = ["MD033"]
1296"docs/api/overview.md" = ["MD041"]
1297"#;
1298        fs::write(&config_path, config_content).unwrap();
1299
1300        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1301        let config: Config = sourced.into_validated_unchecked().into();
1302
1303        // File matches multiple patterns - should get union of all rules
1304        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("docs/api/overview.md"));
1305        assert_eq!(ignored.len(), 3);
1306        assert!(ignored.contains("MD013"));
1307        assert!(ignored.contains("MD033"));
1308        assert!(ignored.contains("MD041"));
1309    }
1310
1311    #[test]
1312    fn test_per_file_ignores_rule_name_normalization() {
1313        use std::path::PathBuf;
1314
1315        let temp_dir = tempdir().unwrap();
1316        let config_path = temp_dir.path().join(".rumdl.toml");
1317        let config_content = r#"
1318[per-file-ignores]
1319"README.md" = ["md033", "MD013", "Md041"]
1320"#;
1321        fs::write(&config_path, config_content).unwrap();
1322
1323        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1324        let config: Config = sourced.into_validated_unchecked().into();
1325
1326        // All rule names should be normalized to uppercase
1327        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1328        assert_eq!(ignored.len(), 3);
1329        assert!(ignored.contains("MD033"));
1330        assert!(ignored.contains("MD013"));
1331        assert!(ignored.contains("MD041"));
1332    }
1333
1334    #[test]
1335    fn test_per_file_ignores_invalid_glob_pattern() {
1336        use std::path::PathBuf;
1337
1338        let temp_dir = tempdir().unwrap();
1339        let config_path = temp_dir.path().join(".rumdl.toml");
1340        let config_content = r#"
1341[per-file-ignores]
1342"[invalid" = ["MD033"]
1343"valid/*.md" = ["MD013"]
1344"#;
1345        fs::write(&config_path, config_content).unwrap();
1346
1347        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1348        let config: Config = sourced.into_validated_unchecked().into();
1349
1350        // Invalid pattern should be skipped, valid pattern should work
1351        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("valid/test.md"));
1352        assert!(ignored.contains("MD013"));
1353
1354        // Invalid pattern should not cause issues
1355        let ignored2 = config.get_ignored_rules_for_file(&PathBuf::from("[invalid"));
1356        assert!(ignored2.is_empty());
1357    }
1358
1359    #[test]
1360    fn test_per_file_ignores_empty_section() {
1361        use std::path::PathBuf;
1362
1363        let temp_dir = tempdir().unwrap();
1364        let config_path = temp_dir.path().join(".rumdl.toml");
1365        let config_content = r#"
1366[global]
1367disable = ["MD001"]
1368
1369[per-file-ignores]
1370"#;
1371        fs::write(&config_path, config_content).unwrap();
1372
1373        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1374        let config: Config = sourced.into_validated_unchecked().into();
1375
1376        // Empty per-file-ignores should work fine
1377        assert_eq!(config.per_file_ignores.len(), 0);
1378        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1379        assert!(ignored.is_empty());
1380    }
1381
1382    #[test]
1383    fn test_per_file_ignores_with_underscores_in_pyproject() {
1384        let temp_dir = tempdir().unwrap();
1385        let config_path = temp_dir.path().join("pyproject.toml");
1386        let config_content = r#"
1387[tool.rumdl]
1388[tool.rumdl.per_file_ignores]
1389"README.md" = ["MD033"]
1390"#;
1391        fs::write(&config_path, config_content).unwrap();
1392
1393        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1394        let config: Config = sourced.into_validated_unchecked().into();
1395
1396        // Should support both per-file-ignores and per_file_ignores
1397        assert_eq!(config.per_file_ignores.len(), 1);
1398        assert_eq!(
1399            config.per_file_ignores.get("README.md"),
1400            Some(&vec!["MD033".to_string()])
1401        );
1402    }
1403
1404    #[test]
1405    fn test_per_file_ignores_absolute_path_matching() {
1406        // Regression test for issue #208: per-file-ignores should work with absolute paths
1407        // This is critical for GitHub Actions which uses absolute paths like $GITHUB_WORKSPACE
1408        use std::path::PathBuf;
1409
1410        let temp_dir = tempdir().unwrap();
1411        let config_path = temp_dir.path().join(".rumdl.toml");
1412
1413        // Create a subdirectory and file to match against
1414        let github_dir = temp_dir.path().join(".github");
1415        fs::create_dir_all(&github_dir).unwrap();
1416        let test_file = github_dir.join("pull_request_template.md");
1417        fs::write(&test_file, "Test content").unwrap();
1418
1419        let config_content = r#"
1420[per-file-ignores]
1421".github/pull_request_template.md" = ["MD041"]
1422"docs/**/*.md" = ["MD013"]
1423"#;
1424        fs::write(&config_path, config_content).unwrap();
1425
1426        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1427        let config: Config = sourced.into_validated_unchecked().into();
1428
1429        // Test with absolute path (like GitHub Actions would use)
1430        let absolute_path = test_file.canonicalize().unwrap();
1431        let ignored = config.get_ignored_rules_for_file(&absolute_path);
1432        assert!(
1433            ignored.contains("MD041"),
1434            "Should match absolute path {absolute_path:?} against relative pattern"
1435        );
1436        assert_eq!(ignored.len(), 1);
1437
1438        // Also verify relative path still works
1439        let relative_path = PathBuf::from(".github/pull_request_template.md");
1440        let ignored = config.get_ignored_rules_for_file(&relative_path);
1441        assert!(ignored.contains("MD041"), "Should match relative path");
1442    }
1443
1444    #[test]
1445    fn test_generate_json_schema() {
1446        use schemars::schema_for;
1447        use std::env;
1448
1449        let schema = schema_for!(Config);
1450        let schema_json = serde_json::to_string_pretty(&schema).expect("Failed to serialize schema");
1451
1452        // Write schema to file if RUMDL_UPDATE_SCHEMA env var is set
1453        if env::var("RUMDL_UPDATE_SCHEMA").is_ok() {
1454            let schema_path = env::current_dir().unwrap().join("rumdl.schema.json");
1455            fs::write(&schema_path, &schema_json).expect("Failed to write schema file");
1456            println!("Schema written to: {}", schema_path.display());
1457        }
1458
1459        // Basic validation that schema was generated
1460        assert!(schema_json.contains("\"title\": \"Config\""));
1461        assert!(schema_json.contains("\"global\""));
1462        assert!(schema_json.contains("\"per-file-ignores\""));
1463    }
1464
1465    #[test]
1466    fn test_user_config_loaded_with_explicit_project_config() {
1467        // Regression test for issue #131: User config should always be loaded as base layer,
1468        // even when an explicit project config path is provided
1469        let temp_dir = tempdir().unwrap();
1470
1471        // Create a fake user config directory
1472        // Note: user_configuration_path_impl adds /rumdl to the config dir
1473        let user_config_dir = temp_dir.path().join("user_config");
1474        let rumdl_config_dir = user_config_dir.join("rumdl");
1475        fs::create_dir_all(&rumdl_config_dir).unwrap();
1476        let user_config_path = rumdl_config_dir.join("rumdl.toml");
1477
1478        // User config disables MD013 and MD041
1479        let user_config_content = r#"
1480[global]
1481disable = ["MD013", "MD041"]
1482line-length = 100
1483"#;
1484        fs::write(&user_config_path, user_config_content).unwrap();
1485
1486        // Create a project config that enables MD001
1487        let project_config_path = temp_dir.path().join("project").join("pyproject.toml");
1488        fs::create_dir_all(project_config_path.parent().unwrap()).unwrap();
1489        let project_config_content = r#"
1490[tool.rumdl]
1491enable = ["MD001"]
1492"#;
1493        fs::write(&project_config_path, project_config_content).unwrap();
1494
1495        // Load config with explicit project path, passing user_config_dir
1496        let sourced = SourcedConfig::load_with_discovery_impl(
1497            Some(project_config_path.to_str().unwrap()),
1498            None,
1499            false,
1500            Some(&user_config_dir),
1501        )
1502        .unwrap();
1503
1504        let config: Config = sourced.into_validated_unchecked().into();
1505
1506        // User config settings should be preserved
1507        assert!(
1508            config.global.disable.contains(&"MD013".to_string()),
1509            "User config disabled rules should be preserved"
1510        );
1511        assert!(
1512            config.global.disable.contains(&"MD041".to_string()),
1513            "User config disabled rules should be preserved"
1514        );
1515
1516        // Project config settings should also be applied (merged on top)
1517        assert!(
1518            config.global.enable.contains(&"MD001".to_string()),
1519            "Project config enabled rules should be applied"
1520        );
1521    }
1522
1523    #[test]
1524    fn test_typestate_validate_method() {
1525        use tempfile::tempdir;
1526
1527        let temp_dir = tempdir().expect("Failed to create temporary directory");
1528        let config_path = temp_dir.path().join("test.toml");
1529
1530        // Create config with an unknown rule option to trigger a validation warning
1531        let config_content = r#"
1532[global]
1533enable = ["MD001"]
1534
1535[MD013]
1536line_length = 80
1537unknown_option = true
1538"#;
1539        std::fs::write(&config_path, config_content).expect("Failed to write config");
1540
1541        // Load config - this returns SourcedConfig<ConfigLoaded>
1542        let loaded = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true)
1543            .expect("Should load config");
1544
1545        // Create a rule registry for validation
1546        let default_config = Config::default();
1547        let all_rules = crate::rules::all_rules(&default_config);
1548        let registry = RuleRegistry::from_rules(&all_rules);
1549
1550        // Validate - this transitions to SourcedConfig<ConfigValidated>
1551        let validated = loaded.validate(&registry).expect("Should validate config");
1552
1553        // Check that validation warnings were captured for the unknown option
1554        // Note: The validation checks rule options against the rule's schema
1555        let has_unknown_option_warning = validated
1556            .validation_warnings
1557            .iter()
1558            .any(|w| w.message.contains("unknown_option") || w.message.contains("Unknown option"));
1559
1560        // Print warnings for debugging if assertion fails
1561        if !has_unknown_option_warning {
1562            for w in &validated.validation_warnings {
1563                eprintln!("Warning: {}", w.message);
1564            }
1565        }
1566        assert!(
1567            has_unknown_option_warning,
1568            "Should have warning for unknown option. Got {} warnings: {:?}",
1569            validated.validation_warnings.len(),
1570            validated
1571                .validation_warnings
1572                .iter()
1573                .map(|w| &w.message)
1574                .collect::<Vec<_>>()
1575        );
1576
1577        // Now we can convert to Config (this would be a compile error with ConfigLoaded)
1578        let config: Config = validated.into();
1579
1580        // Verify the config values are correct
1581        assert!(config.global.enable.contains(&"MD001".to_string()));
1582    }
1583
1584    #[test]
1585    fn test_typestate_validate_into_convenience_method() {
1586        use tempfile::tempdir;
1587
1588        let temp_dir = tempdir().expect("Failed to create temporary directory");
1589        let config_path = temp_dir.path().join("test.toml");
1590
1591        let config_content = r#"
1592[global]
1593enable = ["MD022"]
1594
1595[MD022]
1596lines_above = 2
1597"#;
1598        std::fs::write(&config_path, config_content).expect("Failed to write config");
1599
1600        let loaded = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true)
1601            .expect("Should load config");
1602
1603        let default_config = Config::default();
1604        let all_rules = crate::rules::all_rules(&default_config);
1605        let registry = RuleRegistry::from_rules(&all_rules);
1606
1607        // Use the convenience method that validates and converts in one step
1608        let (config, warnings) = loaded.validate_into(&registry).expect("Should validate and convert");
1609
1610        // Should have no warnings for valid config
1611        assert!(warnings.is_empty(), "Should have no warnings for valid config");
1612
1613        // Config should be usable
1614        assert!(config.global.enable.contains(&"MD022".to_string()));
1615    }
1616}
1617
1618/// Configuration source with clear precedence hierarchy.
1619///
1620/// Precedence order (lower values override higher values):
1621/// - Default (0): Built-in defaults
1622/// - UserConfig (1): User-level ~/.config/rumdl/rumdl.toml
1623/// - PyprojectToml (2): Project-level pyproject.toml
1624/// - ProjectConfig (3): Project-level .rumdl.toml (most specific)
1625/// - Cli (4): Command-line flags (highest priority)
1626#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1627pub enum ConfigSource {
1628    /// Built-in default configuration
1629    Default,
1630    /// User-level configuration from ~/.config/rumdl/rumdl.toml
1631    UserConfig,
1632    /// Project-level configuration from pyproject.toml
1633    PyprojectToml,
1634    /// Project-level configuration from .rumdl.toml or rumdl.toml
1635    ProjectConfig,
1636    /// Command-line flags (highest precedence)
1637    Cli,
1638}
1639
1640#[derive(Debug, Clone)]
1641pub struct ConfigOverride<T> {
1642    pub value: T,
1643    pub source: ConfigSource,
1644    pub file: Option<String>,
1645    pub line: Option<usize>,
1646}
1647
1648#[derive(Debug, Clone)]
1649pub struct SourcedValue<T> {
1650    pub value: T,
1651    pub source: ConfigSource,
1652    pub overrides: Vec<ConfigOverride<T>>,
1653}
1654
1655impl<T: Clone> SourcedValue<T> {
1656    pub fn new(value: T, source: ConfigSource) -> Self {
1657        Self {
1658            value: value.clone(),
1659            source,
1660            overrides: vec![ConfigOverride {
1661                value,
1662                source,
1663                file: None,
1664                line: None,
1665            }],
1666        }
1667    }
1668
1669    /// Merges a new override into this SourcedValue based on source precedence.
1670    /// If the new source has higher or equal precedence, the value and source are updated,
1671    /// and the new override is added to the history.
1672    pub fn merge_override(
1673        &mut self,
1674        new_value: T,
1675        new_source: ConfigSource,
1676        new_file: Option<String>,
1677        new_line: Option<usize>,
1678    ) {
1679        // Helper function to get precedence, defined locally or globally
1680        fn source_precedence(src: ConfigSource) -> u8 {
1681            match src {
1682                ConfigSource::Default => 0,
1683                ConfigSource::UserConfig => 1,
1684                ConfigSource::PyprojectToml => 2,
1685                ConfigSource::ProjectConfig => 3,
1686                ConfigSource::Cli => 4,
1687            }
1688        }
1689
1690        if source_precedence(new_source) >= source_precedence(self.source) {
1691            self.value = new_value.clone();
1692            self.source = new_source;
1693            self.overrides.push(ConfigOverride {
1694                value: new_value,
1695                source: new_source,
1696                file: new_file,
1697                line: new_line,
1698            });
1699        }
1700    }
1701
1702    pub fn push_override(&mut self, value: T, source: ConfigSource, file: Option<String>, line: Option<usize>) {
1703        // This is essentially merge_override without the precedence check
1704        // We might consolidate these later, but keep separate for now during refactor
1705        self.value = value.clone();
1706        self.source = source;
1707        self.overrides.push(ConfigOverride {
1708            value,
1709            source,
1710            file,
1711            line,
1712        });
1713    }
1714}
1715
1716impl<T: Clone + Eq + std::hash::Hash> SourcedValue<Vec<T>> {
1717    /// Merges a new value using union semantics (for arrays like `disable`)
1718    /// Values from both sources are combined, with deduplication
1719    pub fn merge_union(
1720        &mut self,
1721        new_value: Vec<T>,
1722        new_source: ConfigSource,
1723        new_file: Option<String>,
1724        new_line: Option<usize>,
1725    ) {
1726        fn source_precedence(src: ConfigSource) -> u8 {
1727            match src {
1728                ConfigSource::Default => 0,
1729                ConfigSource::UserConfig => 1,
1730                ConfigSource::PyprojectToml => 2,
1731                ConfigSource::ProjectConfig => 3,
1732                ConfigSource::Cli => 4,
1733            }
1734        }
1735
1736        if source_precedence(new_source) >= source_precedence(self.source) {
1737            // Union: combine values from both sources with deduplication
1738            let mut combined = self.value.clone();
1739            for item in new_value.iter() {
1740                if !combined.contains(item) {
1741                    combined.push(item.clone());
1742                }
1743            }
1744
1745            self.value = combined;
1746            self.source = new_source;
1747            self.overrides.push(ConfigOverride {
1748                value: new_value,
1749                source: new_source,
1750                file: new_file,
1751                line: new_line,
1752            });
1753        }
1754    }
1755}
1756
1757#[derive(Debug, Clone)]
1758pub struct SourcedGlobalConfig {
1759    pub enable: SourcedValue<Vec<String>>,
1760    pub disable: SourcedValue<Vec<String>>,
1761    pub exclude: SourcedValue<Vec<String>>,
1762    pub include: SourcedValue<Vec<String>>,
1763    pub respect_gitignore: SourcedValue<bool>,
1764    pub line_length: SourcedValue<LineLength>,
1765    pub output_format: Option<SourcedValue<String>>,
1766    pub fixable: SourcedValue<Vec<String>>,
1767    pub unfixable: SourcedValue<Vec<String>>,
1768    pub flavor: SourcedValue<MarkdownFlavor>,
1769    pub force_exclude: SourcedValue<bool>,
1770    pub cache_dir: Option<SourcedValue<String>>,
1771    pub cache: SourcedValue<bool>,
1772}
1773
1774impl Default for SourcedGlobalConfig {
1775    fn default() -> Self {
1776        SourcedGlobalConfig {
1777            enable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1778            disable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1779            exclude: SourcedValue::new(Vec::new(), ConfigSource::Default),
1780            include: SourcedValue::new(Vec::new(), ConfigSource::Default),
1781            respect_gitignore: SourcedValue::new(true, ConfigSource::Default),
1782            line_length: SourcedValue::new(LineLength::default(), ConfigSource::Default),
1783            output_format: None,
1784            fixable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1785            unfixable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1786            flavor: SourcedValue::new(MarkdownFlavor::default(), ConfigSource::Default),
1787            force_exclude: SourcedValue::new(false, ConfigSource::Default),
1788            cache_dir: None,
1789            cache: SourcedValue::new(true, ConfigSource::Default),
1790        }
1791    }
1792}
1793
1794#[derive(Debug, Default, Clone)]
1795pub struct SourcedRuleConfig {
1796    pub values: BTreeMap<String, SourcedValue<toml::Value>>,
1797}
1798
1799/// Represents configuration loaded from a single source file, with provenance.
1800/// Used as an intermediate step before merging into the final SourcedConfig.
1801#[derive(Debug, Clone)]
1802pub struct SourcedConfigFragment {
1803    pub global: SourcedGlobalConfig,
1804    pub per_file_ignores: SourcedValue<HashMap<String, Vec<String>>>,
1805    pub rules: BTreeMap<String, SourcedRuleConfig>,
1806    pub rule_severities: BTreeMap<String, SourcedValue<crate::rule::Severity>>,
1807    pub unknown_keys: Vec<(String, String, Option<String>)>, // (section, key, file_path)
1808                                                             // Note: loaded_files is tracked globally in SourcedConfig.
1809}
1810
1811impl Default for SourcedConfigFragment {
1812    fn default() -> Self {
1813        Self {
1814            global: SourcedGlobalConfig::default(),
1815            per_file_ignores: SourcedValue::new(HashMap::new(), ConfigSource::Default),
1816            rules: BTreeMap::new(),
1817            rule_severities: BTreeMap::new(),
1818            unknown_keys: Vec::new(),
1819        }
1820    }
1821}
1822
1823/// Configuration with provenance tracking for values.
1824///
1825/// The `State` type parameter encodes the validation state:
1826/// - `ConfigLoaded`: Config has been loaded but not validated
1827/// - `ConfigValidated`: Config has been validated and can be converted to `Config`
1828///
1829/// # Typestate Pattern
1830///
1831/// This uses the typestate pattern to ensure validation happens before conversion:
1832///
1833/// ```ignore
1834/// let loaded: SourcedConfig<ConfigLoaded> = SourcedConfig::load_with_discovery(...)?;
1835/// let validated: SourcedConfig<ConfigValidated> = loaded.validate(&registry)?;
1836/// let config: Config = validated.into();  // Only works on ConfigValidated!
1837/// ```
1838///
1839/// Attempting to convert a `ConfigLoaded` config directly to `Config` is a compile error.
1840#[derive(Debug, Clone)]
1841pub struct SourcedConfig<State = ConfigLoaded> {
1842    pub global: SourcedGlobalConfig,
1843    pub per_file_ignores: SourcedValue<HashMap<String, Vec<String>>>,
1844    pub rules: BTreeMap<String, SourcedRuleConfig>,
1845    pub rule_severities: BTreeMap<String, SourcedValue<crate::rule::Severity>>,
1846    pub loaded_files: Vec<String>,
1847    pub unknown_keys: Vec<(String, String, Option<String>)>, // (section, key, file_path)
1848    /// Project root directory (parent of config file), used for resolving relative paths
1849    pub project_root: Option<std::path::PathBuf>,
1850    /// Validation warnings (populated after validate() is called)
1851    pub validation_warnings: Vec<ConfigValidationWarning>,
1852    /// Phantom data for the state type parameter
1853    _state: PhantomData<State>,
1854}
1855
1856impl Default for SourcedConfig<ConfigLoaded> {
1857    fn default() -> Self {
1858        Self {
1859            global: SourcedGlobalConfig::default(),
1860            per_file_ignores: SourcedValue::new(HashMap::new(), ConfigSource::Default),
1861            rules: BTreeMap::new(),
1862            rule_severities: BTreeMap::new(),
1863            loaded_files: Vec::new(),
1864            unknown_keys: Vec::new(),
1865            project_root: None,
1866            validation_warnings: Vec::new(),
1867            _state: PhantomData,
1868        }
1869    }
1870}
1871
1872impl SourcedConfig<ConfigLoaded> {
1873    /// Merges another SourcedConfigFragment into this SourcedConfig.
1874    /// Uses source precedence to determine which values take effect.
1875    fn merge(&mut self, fragment: SourcedConfigFragment) {
1876        // Merge global config
1877        // Enable uses replace semantics (project can enforce rules)
1878        self.global.enable.merge_override(
1879            fragment.global.enable.value,
1880            fragment.global.enable.source,
1881            fragment.global.enable.overrides.first().and_then(|o| o.file.clone()),
1882            fragment.global.enable.overrides.first().and_then(|o| o.line),
1883        );
1884
1885        // Disable uses union semantics (user can add to project disables)
1886        self.global.disable.merge_union(
1887            fragment.global.disable.value,
1888            fragment.global.disable.source,
1889            fragment.global.disable.overrides.first().and_then(|o| o.file.clone()),
1890            fragment.global.disable.overrides.first().and_then(|o| o.line),
1891        );
1892
1893        // Conflict resolution: Enable overrides disable
1894        // Remove any rules from disable that appear in enable
1895        self.global
1896            .disable
1897            .value
1898            .retain(|rule| !self.global.enable.value.contains(rule));
1899        self.global.include.merge_override(
1900            fragment.global.include.value,
1901            fragment.global.include.source,
1902            fragment.global.include.overrides.first().and_then(|o| o.file.clone()),
1903            fragment.global.include.overrides.first().and_then(|o| o.line),
1904        );
1905        self.global.exclude.merge_override(
1906            fragment.global.exclude.value,
1907            fragment.global.exclude.source,
1908            fragment.global.exclude.overrides.first().and_then(|o| o.file.clone()),
1909            fragment.global.exclude.overrides.first().and_then(|o| o.line),
1910        );
1911        self.global.respect_gitignore.merge_override(
1912            fragment.global.respect_gitignore.value,
1913            fragment.global.respect_gitignore.source,
1914            fragment
1915                .global
1916                .respect_gitignore
1917                .overrides
1918                .first()
1919                .and_then(|o| o.file.clone()),
1920            fragment.global.respect_gitignore.overrides.first().and_then(|o| o.line),
1921        );
1922        self.global.line_length.merge_override(
1923            fragment.global.line_length.value,
1924            fragment.global.line_length.source,
1925            fragment
1926                .global
1927                .line_length
1928                .overrides
1929                .first()
1930                .and_then(|o| o.file.clone()),
1931            fragment.global.line_length.overrides.first().and_then(|o| o.line),
1932        );
1933        self.global.fixable.merge_override(
1934            fragment.global.fixable.value,
1935            fragment.global.fixable.source,
1936            fragment.global.fixable.overrides.first().and_then(|o| o.file.clone()),
1937            fragment.global.fixable.overrides.first().and_then(|o| o.line),
1938        );
1939        self.global.unfixable.merge_override(
1940            fragment.global.unfixable.value,
1941            fragment.global.unfixable.source,
1942            fragment.global.unfixable.overrides.first().and_then(|o| o.file.clone()),
1943            fragment.global.unfixable.overrides.first().and_then(|o| o.line),
1944        );
1945
1946        // Merge flavor
1947        self.global.flavor.merge_override(
1948            fragment.global.flavor.value,
1949            fragment.global.flavor.source,
1950            fragment.global.flavor.overrides.first().and_then(|o| o.file.clone()),
1951            fragment.global.flavor.overrides.first().and_then(|o| o.line),
1952        );
1953
1954        // Merge force_exclude
1955        self.global.force_exclude.merge_override(
1956            fragment.global.force_exclude.value,
1957            fragment.global.force_exclude.source,
1958            fragment
1959                .global
1960                .force_exclude
1961                .overrides
1962                .first()
1963                .and_then(|o| o.file.clone()),
1964            fragment.global.force_exclude.overrides.first().and_then(|o| o.line),
1965        );
1966
1967        // Merge output_format if present
1968        if let Some(output_format_fragment) = fragment.global.output_format {
1969            if let Some(ref mut output_format) = self.global.output_format {
1970                output_format.merge_override(
1971                    output_format_fragment.value,
1972                    output_format_fragment.source,
1973                    output_format_fragment.overrides.first().and_then(|o| o.file.clone()),
1974                    output_format_fragment.overrides.first().and_then(|o| o.line),
1975                );
1976            } else {
1977                self.global.output_format = Some(output_format_fragment);
1978            }
1979        }
1980
1981        // Merge cache_dir if present
1982        if let Some(cache_dir_fragment) = fragment.global.cache_dir {
1983            if let Some(ref mut cache_dir) = self.global.cache_dir {
1984                cache_dir.merge_override(
1985                    cache_dir_fragment.value,
1986                    cache_dir_fragment.source,
1987                    cache_dir_fragment.overrides.first().and_then(|o| o.file.clone()),
1988                    cache_dir_fragment.overrides.first().and_then(|o| o.line),
1989                );
1990            } else {
1991                self.global.cache_dir = Some(cache_dir_fragment);
1992            }
1993        }
1994
1995        // Merge cache if not default (only override when explicitly set)
1996        if fragment.global.cache.source != ConfigSource::Default {
1997            self.global.cache.merge_override(
1998                fragment.global.cache.value,
1999                fragment.global.cache.source,
2000                fragment.global.cache.overrides.first().and_then(|o| o.file.clone()),
2001                fragment.global.cache.overrides.first().and_then(|o| o.line),
2002            );
2003        }
2004
2005        // Merge per_file_ignores
2006        self.per_file_ignores.merge_override(
2007            fragment.per_file_ignores.value,
2008            fragment.per_file_ignores.source,
2009            fragment.per_file_ignores.overrides.first().and_then(|o| o.file.clone()),
2010            fragment.per_file_ignores.overrides.first().and_then(|o| o.line),
2011        );
2012
2013        // Merge rule configs
2014        for (rule_name, rule_fragment) in fragment.rules {
2015            let norm_rule_name = rule_name.to_ascii_uppercase(); // Normalize to uppercase for case-insensitivity
2016            let rule_entry = self.rules.entry(norm_rule_name).or_default();
2017            for (key, sourced_value_fragment) in rule_fragment.values {
2018                let sv_entry = rule_entry
2019                    .values
2020                    .entry(key.clone())
2021                    .or_insert_with(|| SourcedValue::new(sourced_value_fragment.value.clone(), ConfigSource::Default));
2022                let file_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.file.clone());
2023                let line_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.line);
2024                sv_entry.merge_override(
2025                    sourced_value_fragment.value,  // Use the value from the fragment
2026                    sourced_value_fragment.source, // Use the source from the fragment
2027                    file_from_fragment,            // Pass the file path from the fragment override
2028                    line_from_fragment,            // Pass the line number from the fragment override
2029                );
2030            }
2031        }
2032
2033        // Merge rule_severities
2034        for (rule_name, severity) in fragment.rule_severities {
2035            if let Some(ref mut existing) = self.rule_severities.get_mut(&rule_name) {
2036                existing.merge_override(
2037                    severity.value,
2038                    severity.source,
2039                    severity.overrides.first().and_then(|o| o.file.clone()),
2040                    severity.overrides.first().and_then(|o| o.line),
2041                );
2042            } else {
2043                self.rule_severities.insert(rule_name, severity);
2044            }
2045        }
2046
2047        // Merge unknown_keys from fragment
2048        for (section, key, file_path) in fragment.unknown_keys {
2049            // Deduplicate: only add if not already present
2050            if !self.unknown_keys.iter().any(|(s, k, _)| s == &section && k == &key) {
2051                self.unknown_keys.push((section, key, file_path));
2052            }
2053        }
2054    }
2055
2056    /// Load and merge configurations from files and CLI overrides.
2057    pub fn load(config_path: Option<&str>, cli_overrides: Option<&SourcedGlobalConfig>) -> Result<Self, ConfigError> {
2058        Self::load_with_discovery(config_path, cli_overrides, false)
2059    }
2060
2061    /// Finds project root by walking up from start_dir looking for .git directory.
2062    /// Falls back to start_dir if no .git found.
2063    fn find_project_root_from(start_dir: &Path) -> std::path::PathBuf {
2064        let mut current = start_dir.to_path_buf();
2065        const MAX_DEPTH: usize = 100;
2066
2067        for _ in 0..MAX_DEPTH {
2068            if current.join(".git").exists() {
2069                log::debug!("[rumdl-config] Found .git at: {}", current.display());
2070                return current;
2071            }
2072
2073            match current.parent() {
2074                Some(parent) => current = parent.to_path_buf(),
2075                None => break,
2076            }
2077        }
2078
2079        // No .git found, use start_dir as project root
2080        log::debug!(
2081            "[rumdl-config] No .git found, using config location as project root: {}",
2082            start_dir.display()
2083        );
2084        start_dir.to_path_buf()
2085    }
2086
2087    /// Discover configuration file by traversing up the directory tree.
2088    /// Returns the first configuration file found.
2089    /// Discovers config file and returns both the config path and project root.
2090    /// Returns: (config_file_path, project_root_path)
2091    /// Project root is the directory containing .git, or config parent as fallback.
2092    fn discover_config_upward() -> Option<(std::path::PathBuf, std::path::PathBuf)> {
2093        use std::env;
2094
2095        const CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", ".config/rumdl.toml", "pyproject.toml"];
2096        const MAX_DEPTH: usize = 100; // Prevent infinite traversal
2097
2098        let start_dir = match env::current_dir() {
2099            Ok(dir) => dir,
2100            Err(e) => {
2101                log::debug!("[rumdl-config] Failed to get current directory: {e}");
2102                return None;
2103            }
2104        };
2105
2106        let mut current_dir = start_dir.clone();
2107        let mut depth = 0;
2108        let mut found_config: Option<(std::path::PathBuf, std::path::PathBuf)> = None;
2109
2110        loop {
2111            if depth >= MAX_DEPTH {
2112                log::debug!("[rumdl-config] Maximum traversal depth reached");
2113                break;
2114            }
2115
2116            log::debug!("[rumdl-config] Searching for config in: {}", current_dir.display());
2117
2118            // Check for config files in order of precedence (only if not already found)
2119            if found_config.is_none() {
2120                for config_name in CONFIG_FILES {
2121                    let config_path = current_dir.join(config_name);
2122
2123                    if config_path.exists() {
2124                        // For pyproject.toml, verify it contains [tool.rumdl] section
2125                        if *config_name == "pyproject.toml" {
2126                            if let Ok(content) = std::fs::read_to_string(&config_path) {
2127                                if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
2128                                    log::debug!("[rumdl-config] Found config file: {}", config_path.display());
2129                                    // Store config, but continue looking for .git
2130                                    found_config = Some((config_path.clone(), current_dir.clone()));
2131                                    break;
2132                                }
2133                                log::debug!("[rumdl-config] Found pyproject.toml but no [tool.rumdl] section");
2134                                continue;
2135                            }
2136                        } else {
2137                            log::debug!("[rumdl-config] Found config file: {}", config_path.display());
2138                            // Store config, but continue looking for .git
2139                            found_config = Some((config_path.clone(), current_dir.clone()));
2140                            break;
2141                        }
2142                    }
2143                }
2144            }
2145
2146            // Check for .git directory (stop boundary)
2147            if current_dir.join(".git").exists() {
2148                log::debug!("[rumdl-config] Stopping at .git directory");
2149                break;
2150            }
2151
2152            // Move to parent directory
2153            match current_dir.parent() {
2154                Some(parent) => {
2155                    current_dir = parent.to_owned();
2156                    depth += 1;
2157                }
2158                None => {
2159                    log::debug!("[rumdl-config] Reached filesystem root");
2160                    break;
2161                }
2162            }
2163        }
2164
2165        // If config found, determine project root by walking up from config location
2166        if let Some((config_path, config_dir)) = found_config {
2167            let project_root = Self::find_project_root_from(&config_dir);
2168            return Some((config_path, project_root));
2169        }
2170
2171        None
2172    }
2173
2174    /// Discover markdownlint configuration file by traversing up the directory tree.
2175    /// Similar to discover_config_upward but for .markdownlint.yaml/json files.
2176    /// Returns the path to the config file if found.
2177    fn discover_markdownlint_config_upward() -> Option<std::path::PathBuf> {
2178        use std::env;
2179
2180        const MAX_DEPTH: usize = 100;
2181
2182        let start_dir = match env::current_dir() {
2183            Ok(dir) => dir,
2184            Err(e) => {
2185                log::debug!("[rumdl-config] Failed to get current directory for markdownlint discovery: {e}");
2186                return None;
2187            }
2188        };
2189
2190        let mut current_dir = start_dir.clone();
2191        let mut depth = 0;
2192
2193        loop {
2194            if depth >= MAX_DEPTH {
2195                log::debug!("[rumdl-config] Maximum traversal depth reached for markdownlint discovery");
2196                break;
2197            }
2198
2199            log::debug!(
2200                "[rumdl-config] Searching for markdownlint config in: {}",
2201                current_dir.display()
2202            );
2203
2204            // Check for markdownlint config files in order of precedence
2205            for config_name in MARKDOWNLINT_CONFIG_FILES {
2206                let config_path = current_dir.join(config_name);
2207                if config_path.exists() {
2208                    log::debug!("[rumdl-config] Found markdownlint config: {}", config_path.display());
2209                    return Some(config_path);
2210                }
2211            }
2212
2213            // Check for .git directory (stop boundary)
2214            if current_dir.join(".git").exists() {
2215                log::debug!("[rumdl-config] Stopping markdownlint search at .git directory");
2216                break;
2217            }
2218
2219            // Move to parent directory
2220            match current_dir.parent() {
2221                Some(parent) => {
2222                    current_dir = parent.to_owned();
2223                    depth += 1;
2224                }
2225                None => {
2226                    log::debug!("[rumdl-config] Reached filesystem root during markdownlint search");
2227                    break;
2228                }
2229            }
2230        }
2231
2232        None
2233    }
2234
2235    /// Internal implementation that accepts config directory for testing
2236    fn user_configuration_path_impl(config_dir: &Path) -> Option<std::path::PathBuf> {
2237        let config_dir = config_dir.join("rumdl");
2238
2239        // Check for config files in precedence order (same as project discovery)
2240        const USER_CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", "pyproject.toml"];
2241
2242        log::debug!(
2243            "[rumdl-config] Checking for user configuration in: {}",
2244            config_dir.display()
2245        );
2246
2247        for filename in USER_CONFIG_FILES {
2248            let config_path = config_dir.join(filename);
2249
2250            if config_path.exists() {
2251                // For pyproject.toml, verify it contains [tool.rumdl] section
2252                if *filename == "pyproject.toml" {
2253                    if let Ok(content) = std::fs::read_to_string(&config_path) {
2254                        if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
2255                            log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
2256                            return Some(config_path);
2257                        }
2258                        log::debug!("[rumdl-config] Found user pyproject.toml but no [tool.rumdl] section");
2259                        continue;
2260                    }
2261                } else {
2262                    log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
2263                    return Some(config_path);
2264                }
2265            }
2266        }
2267
2268        log::debug!(
2269            "[rumdl-config] No user configuration found in: {}",
2270            config_dir.display()
2271        );
2272        None
2273    }
2274
2275    /// Discover user-level configuration file from platform-specific config directory.
2276    /// Returns the first configuration file found in the user config directory.
2277    #[cfg(feature = "native")]
2278    fn user_configuration_path() -> Option<std::path::PathBuf> {
2279        use etcetera::{BaseStrategy, choose_base_strategy};
2280
2281        match choose_base_strategy() {
2282            Ok(strategy) => {
2283                let config_dir = strategy.config_dir();
2284                Self::user_configuration_path_impl(&config_dir)
2285            }
2286            Err(e) => {
2287                log::debug!("[rumdl-config] Failed to determine user config directory: {e}");
2288                None
2289            }
2290        }
2291    }
2292
2293    /// Stub for WASM builds - user config not supported
2294    #[cfg(not(feature = "native"))]
2295    fn user_configuration_path() -> Option<std::path::PathBuf> {
2296        None
2297    }
2298
2299    /// Internal implementation that accepts user config directory for testing
2300    #[doc(hidden)]
2301    pub fn load_with_discovery_impl(
2302        config_path: Option<&str>,
2303        cli_overrides: Option<&SourcedGlobalConfig>,
2304        skip_auto_discovery: bool,
2305        user_config_dir: Option<&Path>,
2306    ) -> Result<Self, ConfigError> {
2307        use std::env;
2308        log::debug!("[rumdl-config] Current working directory: {:?}", env::current_dir());
2309        if config_path.is_none() {
2310            if skip_auto_discovery {
2311                log::debug!("[rumdl-config] Skipping auto-discovery due to --no-config flag");
2312            } else {
2313                log::debug!("[rumdl-config] No explicit config_path provided, will search default locations");
2314            }
2315        } else {
2316            log::debug!("[rumdl-config] Explicit config_path provided: {config_path:?}");
2317        }
2318        let mut sourced_config = SourcedConfig::default();
2319
2320        // 1. Always load user configuration first (unless auto-discovery is disabled)
2321        // User config serves as the base layer that project configs build upon
2322        if !skip_auto_discovery {
2323            let user_config_path = if let Some(dir) = user_config_dir {
2324                Self::user_configuration_path_impl(dir)
2325            } else {
2326                Self::user_configuration_path()
2327            };
2328
2329            if let Some(user_config_path) = user_config_path {
2330                let path_str = user_config_path.display().to_string();
2331                let filename = user_config_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
2332
2333                log::debug!("[rumdl-config] Loading user configuration file: {path_str}");
2334
2335                if filename == "pyproject.toml" {
2336                    let content = std::fs::read_to_string(&user_config_path).map_err(|e| ConfigError::IoError {
2337                        source: e,
2338                        path: path_str.clone(),
2339                    })?;
2340                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
2341                        sourced_config.merge(fragment);
2342                        sourced_config.loaded_files.push(path_str);
2343                    }
2344                } else {
2345                    let content = std::fs::read_to_string(&user_config_path).map_err(|e| ConfigError::IoError {
2346                        source: e,
2347                        path: path_str.clone(),
2348                    })?;
2349                    let fragment = parse_rumdl_toml(&content, &path_str, ConfigSource::UserConfig)?;
2350                    sourced_config.merge(fragment);
2351                    sourced_config.loaded_files.push(path_str);
2352                }
2353            } else {
2354                log::debug!("[rumdl-config] No user configuration file found");
2355            }
2356        }
2357
2358        // 2. Load explicit config path if provided (overrides user config)
2359        if let Some(path) = config_path {
2360            let path_obj = Path::new(path);
2361            let filename = path_obj.file_name().and_then(|name| name.to_str()).unwrap_or("");
2362            log::debug!("[rumdl-config] Trying to load config file: {filename}");
2363            let path_str = path.to_string();
2364
2365            // Find project root by walking up from config location looking for .git
2366            if let Some(config_parent) = path_obj.parent() {
2367                let project_root = Self::find_project_root_from(config_parent);
2368                log::debug!(
2369                    "[rumdl-config] Project root (from explicit config): {}",
2370                    project_root.display()
2371                );
2372                sourced_config.project_root = Some(project_root);
2373            }
2374
2375            // Known markdownlint config files
2376            const MARKDOWNLINT_FILENAMES: &[&str] = &[".markdownlint.json", ".markdownlint.yaml", ".markdownlint.yml"];
2377
2378            if filename == "pyproject.toml" || filename == ".rumdl.toml" || filename == "rumdl.toml" {
2379                let content = std::fs::read_to_string(path).map_err(|e| ConfigError::IoError {
2380                    source: e,
2381                    path: path_str.clone(),
2382                })?;
2383                if filename == "pyproject.toml" {
2384                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
2385                        sourced_config.merge(fragment);
2386                        sourced_config.loaded_files.push(path_str.clone());
2387                    }
2388                } else {
2389                    let fragment = parse_rumdl_toml(&content, &path_str, ConfigSource::ProjectConfig)?;
2390                    sourced_config.merge(fragment);
2391                    sourced_config.loaded_files.push(path_str.clone());
2392                }
2393            } else if MARKDOWNLINT_FILENAMES.contains(&filename)
2394                || path_str.ends_with(".json")
2395                || path_str.ends_with(".jsonc")
2396                || path_str.ends_with(".yaml")
2397                || path_str.ends_with(".yml")
2398            {
2399                // Parse as markdownlint config (JSON/YAML)
2400                let fragment = load_from_markdownlint(&path_str)?;
2401                sourced_config.merge(fragment);
2402                sourced_config.loaded_files.push(path_str.clone());
2403                // markdownlint is fallback only
2404            } else {
2405                // Try TOML only
2406                let content = std::fs::read_to_string(path).map_err(|e| ConfigError::IoError {
2407                    source: e,
2408                    path: path_str.clone(),
2409                })?;
2410                let fragment = parse_rumdl_toml(&content, &path_str, ConfigSource::ProjectConfig)?;
2411                sourced_config.merge(fragment);
2412                sourced_config.loaded_files.push(path_str.clone());
2413            }
2414        }
2415
2416        // 3. Perform auto-discovery for project config if not skipped AND no explicit config path
2417        if !skip_auto_discovery && config_path.is_none() {
2418            // Look for project configuration files (override user config)
2419            if let Some((config_file, project_root)) = Self::discover_config_upward() {
2420                let path_str = config_file.display().to_string();
2421                let filename = config_file.file_name().and_then(|n| n.to_str()).unwrap_or("");
2422
2423                log::debug!("[rumdl-config] Loading discovered config file: {path_str}");
2424                log::debug!("[rumdl-config] Project root: {}", project_root.display());
2425
2426                // Store project root for cache directory resolution
2427                sourced_config.project_root = Some(project_root);
2428
2429                if filename == "pyproject.toml" {
2430                    let content = std::fs::read_to_string(&config_file).map_err(|e| ConfigError::IoError {
2431                        source: e,
2432                        path: path_str.clone(),
2433                    })?;
2434                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
2435                        sourced_config.merge(fragment);
2436                        sourced_config.loaded_files.push(path_str);
2437                    }
2438                } else if filename == ".rumdl.toml" || filename == "rumdl.toml" {
2439                    let content = std::fs::read_to_string(&config_file).map_err(|e| ConfigError::IoError {
2440                        source: e,
2441                        path: path_str.clone(),
2442                    })?;
2443                    let fragment = parse_rumdl_toml(&content, &path_str, ConfigSource::ProjectConfig)?;
2444                    sourced_config.merge(fragment);
2445                    sourced_config.loaded_files.push(path_str);
2446                }
2447            } else {
2448                log::debug!("[rumdl-config] No configuration file found via upward traversal");
2449
2450                // If no project config found, fallback to markdownlint config via upward traversal
2451                if let Some(config_path) = Self::discover_markdownlint_config_upward() {
2452                    let path_str = config_path.display().to_string();
2453                    match load_from_markdownlint(&path_str) {
2454                        Ok(fragment) => {
2455                            sourced_config.merge(fragment);
2456                            sourced_config.loaded_files.push(path_str);
2457                        }
2458                        Err(_e) => {
2459                            log::debug!("[rumdl-config] Failed to load markdownlint config");
2460                        }
2461                    }
2462                } else {
2463                    log::debug!("[rumdl-config] No markdownlint configuration file found");
2464                }
2465            }
2466        }
2467
2468        // 4. Apply CLI overrides (highest precedence)
2469        if let Some(cli) = cli_overrides {
2470            sourced_config
2471                .global
2472                .enable
2473                .merge_override(cli.enable.value.clone(), ConfigSource::Cli, None, None);
2474            sourced_config
2475                .global
2476                .disable
2477                .merge_override(cli.disable.value.clone(), ConfigSource::Cli, None, None);
2478            sourced_config
2479                .global
2480                .exclude
2481                .merge_override(cli.exclude.value.clone(), ConfigSource::Cli, None, None);
2482            sourced_config
2483                .global
2484                .include
2485                .merge_override(cli.include.value.clone(), ConfigSource::Cli, None, None);
2486            sourced_config.global.respect_gitignore.merge_override(
2487                cli.respect_gitignore.value,
2488                ConfigSource::Cli,
2489                None,
2490                None,
2491            );
2492            sourced_config
2493                .global
2494                .fixable
2495                .merge_override(cli.fixable.value.clone(), ConfigSource::Cli, None, None);
2496            sourced_config
2497                .global
2498                .unfixable
2499                .merge_override(cli.unfixable.value.clone(), ConfigSource::Cli, None, None);
2500            // No rule-specific CLI overrides implemented yet
2501        }
2502
2503        // Unknown keys are now collected during parsing and validated via validate_config_sourced()
2504
2505        Ok(sourced_config)
2506    }
2507
2508    /// Load and merge configurations from files and CLI overrides.
2509    /// If skip_auto_discovery is true, only explicit config paths are loaded.
2510    pub fn load_with_discovery(
2511        config_path: Option<&str>,
2512        cli_overrides: Option<&SourcedGlobalConfig>,
2513        skip_auto_discovery: bool,
2514    ) -> Result<Self, ConfigError> {
2515        Self::load_with_discovery_impl(config_path, cli_overrides, skip_auto_discovery, None)
2516    }
2517
2518    /// Validate the configuration against a rule registry.
2519    ///
2520    /// This method transitions the config from `ConfigLoaded` to `ConfigValidated` state,
2521    /// enabling conversion to `Config`. Validation warnings are stored in the config
2522    /// and can be displayed to the user.
2523    ///
2524    /// # Example
2525    ///
2526    /// ```ignore
2527    /// let loaded = SourcedConfig::load_with_discovery(path, None, false)?;
2528    /// let validated = loaded.validate(&registry)?;
2529    /// let config: Config = validated.into();
2530    /// ```
2531    pub fn validate(self, registry: &RuleRegistry) -> Result<SourcedConfig<ConfigValidated>, ConfigError> {
2532        let warnings = validate_config_sourced_internal(&self, registry);
2533
2534        Ok(SourcedConfig {
2535            global: self.global,
2536            per_file_ignores: self.per_file_ignores,
2537            rules: self.rules,
2538            rule_severities: self.rule_severities,
2539            loaded_files: self.loaded_files,
2540            unknown_keys: self.unknown_keys,
2541            project_root: self.project_root,
2542            validation_warnings: warnings,
2543            _state: PhantomData,
2544        })
2545    }
2546
2547    /// Validate and convert to Config in one step (convenience method).
2548    ///
2549    /// This combines `validate()` and `into()` for callers who want the
2550    /// validation warnings separately.
2551    pub fn validate_into(self, registry: &RuleRegistry) -> Result<(Config, Vec<ConfigValidationWarning>), ConfigError> {
2552        let validated = self.validate(registry)?;
2553        let warnings = validated.validation_warnings.clone();
2554        Ok((validated.into(), warnings))
2555    }
2556
2557    /// Skip validation and convert directly to ConfigValidated state.
2558    ///
2559    /// # Safety
2560    ///
2561    /// This method bypasses validation. Use only when:
2562    /// - You've already validated via `validate_config_sourced()`
2563    /// - You're in test code that doesn't need validation
2564    /// - You're migrating legacy code and will add proper validation later
2565    ///
2566    /// Prefer `validate()` for new code.
2567    pub fn into_validated_unchecked(self) -> SourcedConfig<ConfigValidated> {
2568        SourcedConfig {
2569            global: self.global,
2570            per_file_ignores: self.per_file_ignores,
2571            rules: self.rules,
2572            rule_severities: self.rule_severities,
2573            loaded_files: self.loaded_files,
2574            unknown_keys: self.unknown_keys,
2575            project_root: self.project_root,
2576            validation_warnings: Vec::new(),
2577            _state: PhantomData,
2578        }
2579    }
2580}
2581
2582/// Convert a validated configuration to the final Config type.
2583///
2584/// This implementation only exists for `SourcedConfig<ConfigValidated>`,
2585/// ensuring that validation must occur before conversion.
2586impl From<SourcedConfig<ConfigValidated>> for Config {
2587    fn from(sourced: SourcedConfig<ConfigValidated>) -> Self {
2588        let mut rules = BTreeMap::new();
2589        for (rule_name, sourced_rule_cfg) in sourced.rules {
2590            // Normalize rule name to uppercase for case-insensitive lookup
2591            let normalized_rule_name = rule_name.to_ascii_uppercase();
2592            let mut values = BTreeMap::new();
2593            for (key, sourced_val) in sourced_rule_cfg.values {
2594                values.insert(key, sourced_val.value);
2595            }
2596            rules.insert(normalized_rule_name, RuleConfig { values });
2597        }
2598        #[allow(deprecated)]
2599        let global = GlobalConfig {
2600            enable: sourced.global.enable.value,
2601            disable: sourced.global.disable.value,
2602            exclude: sourced.global.exclude.value,
2603            include: sourced.global.include.value,
2604            respect_gitignore: sourced.global.respect_gitignore.value,
2605            line_length: sourced.global.line_length.value,
2606            output_format: sourced.global.output_format.as_ref().map(|v| v.value.clone()),
2607            fixable: sourced.global.fixable.value,
2608            unfixable: sourced.global.unfixable.value,
2609            flavor: sourced.global.flavor.value,
2610            force_exclude: sourced.global.force_exclude.value,
2611            cache_dir: sourced.global.cache_dir.as_ref().map(|v| v.value.clone()),
2612            cache: sourced.global.cache.value,
2613        };
2614        Config {
2615            global,
2616            per_file_ignores: sourced.per_file_ignores.value,
2617            rules,
2618            rule_severities: sourced.rule_severities.into_iter().map(|(k, v)| (k, v.value)).collect(),
2619            project_root: sourced.project_root,
2620        }
2621    }
2622}
2623
2624/// Registry of all known rules and their config schemas
2625pub struct RuleRegistry {
2626    /// Map of rule name (e.g. "MD013") to set of valid config keys and their TOML value types
2627    pub rule_schemas: std::collections::BTreeMap<String, toml::map::Map<String, toml::Value>>,
2628    /// Map of rule name to config key aliases
2629    pub rule_aliases: std::collections::BTreeMap<String, std::collections::HashMap<String, String>>,
2630}
2631
2632impl RuleRegistry {
2633    /// Build a registry from a list of rules
2634    pub fn from_rules(rules: &[Box<dyn Rule>]) -> Self {
2635        let mut rule_schemas = std::collections::BTreeMap::new();
2636        let mut rule_aliases = std::collections::BTreeMap::new();
2637
2638        for rule in rules {
2639            let norm_name = if let Some((name, toml::Value::Table(table))) = rule.default_config_section() {
2640                let norm_name = normalize_key(&name); // Normalize the name from default_config_section
2641                rule_schemas.insert(norm_name.clone(), table);
2642                norm_name
2643            } else {
2644                let norm_name = normalize_key(rule.name()); // Normalize the name from rule.name()
2645                rule_schemas.insert(norm_name.clone(), toml::map::Map::new());
2646                norm_name
2647            };
2648
2649            // Store aliases if the rule provides them
2650            if let Some(aliases) = rule.config_aliases() {
2651                rule_aliases.insert(norm_name, aliases);
2652            }
2653        }
2654
2655        RuleRegistry {
2656            rule_schemas,
2657            rule_aliases,
2658        }
2659    }
2660
2661    /// Get all known rule names
2662    pub fn rule_names(&self) -> std::collections::BTreeSet<String> {
2663        self.rule_schemas.keys().cloned().collect()
2664    }
2665
2666    /// Get the valid configuration keys for a rule, including both original and normalized variants
2667    pub fn config_keys_for(&self, rule: &str) -> Option<std::collections::BTreeSet<String>> {
2668        self.rule_schemas.get(rule).map(|schema| {
2669            let mut all_keys = std::collections::BTreeSet::new();
2670
2671            // Always allow 'severity' for any rule
2672            all_keys.insert("severity".to_string());
2673
2674            // Add original keys from schema
2675            for key in schema.keys() {
2676                all_keys.insert(key.clone());
2677            }
2678
2679            // Add normalized variants for markdownlint compatibility
2680            for key in schema.keys() {
2681                // Add kebab-case variant
2682                all_keys.insert(key.replace('_', "-"));
2683                // Add snake_case variant
2684                all_keys.insert(key.replace('-', "_"));
2685                // Add normalized variant
2686                all_keys.insert(normalize_key(key));
2687            }
2688
2689            // Add any aliases defined by the rule
2690            if let Some(aliases) = self.rule_aliases.get(rule) {
2691                for alias_key in aliases.keys() {
2692                    all_keys.insert(alias_key.clone());
2693                    // Also add normalized variants of the alias
2694                    all_keys.insert(alias_key.replace('_', "-"));
2695                    all_keys.insert(alias_key.replace('-', "_"));
2696                    all_keys.insert(normalize_key(alias_key));
2697                }
2698            }
2699
2700            all_keys
2701        })
2702    }
2703
2704    /// Get the expected value type for a rule's configuration key, trying variants
2705    pub fn expected_value_for(&self, rule: &str, key: &str) -> Option<&toml::Value> {
2706        if let Some(schema) = self.rule_schemas.get(rule) {
2707            // Check if this key is an alias
2708            if let Some(aliases) = self.rule_aliases.get(rule)
2709                && let Some(canonical_key) = aliases.get(key)
2710            {
2711                // Use the canonical key for schema lookup
2712                if let Some(value) = schema.get(canonical_key) {
2713                    return Some(value);
2714                }
2715            }
2716
2717            // Try the original key
2718            if let Some(value) = schema.get(key) {
2719                return Some(value);
2720            }
2721
2722            // Try key variants
2723            let key_variants = [
2724                key.replace('-', "_"), // Convert kebab-case to snake_case
2725                key.replace('_', "-"), // Convert snake_case to kebab-case
2726                normalize_key(key),    // Normalized key (lowercase, kebab-case)
2727            ];
2728
2729            for variant in &key_variants {
2730                if let Some(value) = schema.get(variant) {
2731                    return Some(value);
2732                }
2733            }
2734        }
2735        None
2736    }
2737
2738    /// Resolve any rule name (canonical or alias) to its canonical form
2739    /// Returns None if the rule name is not recognized
2740    ///
2741    /// Resolution order:
2742    /// 1. Direct canonical name match
2743    /// 2. Static aliases (built-in markdownlint aliases)
2744    pub fn resolve_rule_name(&self, name: &str) -> Option<String> {
2745        // Try normalized canonical name first
2746        let normalized = normalize_key(name);
2747        if self.rule_schemas.contains_key(&normalized) {
2748            return Some(normalized);
2749        }
2750
2751        // Try static alias resolution (O(1) perfect hash lookup)
2752        resolve_rule_name_alias(name).map(|s| s.to_string())
2753    }
2754}
2755
2756/// Compile-time perfect hash map for O(1) rule alias lookups
2757/// Uses phf for zero-cost abstraction - compiles to direct jumps
2758static RULE_ALIAS_MAP: phf::Map<&'static str, &'static str> = phf::phf_map! {
2759    // Canonical names (identity mapping for consistency)
2760    "MD001" => "MD001",
2761    "MD003" => "MD003",
2762    "MD004" => "MD004",
2763    "MD005" => "MD005",
2764    "MD007" => "MD007",
2765    "MD008" => "MD008",
2766    "MD009" => "MD009",
2767    "MD010" => "MD010",
2768    "MD011" => "MD011",
2769    "MD012" => "MD012",
2770    "MD013" => "MD013",
2771    "MD014" => "MD014",
2772    "MD015" => "MD015",
2773    "MD018" => "MD018",
2774    "MD019" => "MD019",
2775    "MD020" => "MD020",
2776    "MD021" => "MD021",
2777    "MD022" => "MD022",
2778    "MD023" => "MD023",
2779    "MD024" => "MD024",
2780    "MD025" => "MD025",
2781    "MD026" => "MD026",
2782    "MD027" => "MD027",
2783    "MD028" => "MD028",
2784    "MD029" => "MD029",
2785    "MD030" => "MD030",
2786    "MD031" => "MD031",
2787    "MD032" => "MD032",
2788    "MD033" => "MD033",
2789    "MD034" => "MD034",
2790    "MD035" => "MD035",
2791    "MD036" => "MD036",
2792    "MD037" => "MD037",
2793    "MD038" => "MD038",
2794    "MD039" => "MD039",
2795    "MD040" => "MD040",
2796    "MD041" => "MD041",
2797    "MD042" => "MD042",
2798    "MD043" => "MD043",
2799    "MD044" => "MD044",
2800    "MD045" => "MD045",
2801    "MD046" => "MD046",
2802    "MD047" => "MD047",
2803    "MD048" => "MD048",
2804    "MD049" => "MD049",
2805    "MD050" => "MD050",
2806    "MD051" => "MD051",
2807    "MD052" => "MD052",
2808    "MD053" => "MD053",
2809    "MD054" => "MD054",
2810    "MD055" => "MD055",
2811    "MD056" => "MD056",
2812    "MD057" => "MD057",
2813    "MD058" => "MD058",
2814    "MD059" => "MD059",
2815    "MD060" => "MD060",
2816    "MD061" => "MD061",
2817
2818    // Aliases (hyphen format)
2819    "HEADING-INCREMENT" => "MD001",
2820    "HEADING-STYLE" => "MD003",
2821    "UL-STYLE" => "MD004",
2822    "LIST-INDENT" => "MD005",
2823    "UL-INDENT" => "MD007",
2824    "NO-TRAILING-SPACES" => "MD009",
2825    "NO-HARD-TABS" => "MD010",
2826    "NO-REVERSED-LINKS" => "MD011",
2827    "NO-MULTIPLE-BLANKS" => "MD012",
2828    "LINE-LENGTH" => "MD013",
2829    "COMMANDS-SHOW-OUTPUT" => "MD014",
2830    "NO-MISSING-SPACE-AFTER-LIST-MARKER" => "MD015",
2831    "NO-MISSING-SPACE-ATX" => "MD018",
2832    "NO-MULTIPLE-SPACE-ATX" => "MD019",
2833    "NO-MISSING-SPACE-CLOSED-ATX" => "MD020",
2834    "NO-MULTIPLE-SPACE-CLOSED-ATX" => "MD021",
2835    "BLANKS-AROUND-HEADINGS" => "MD022",
2836    "HEADING-START-LEFT" => "MD023",
2837    "NO-DUPLICATE-HEADING" => "MD024",
2838    "SINGLE-TITLE" => "MD025",
2839    "SINGLE-H1" => "MD025",
2840    "NO-TRAILING-PUNCTUATION" => "MD026",
2841    "NO-MULTIPLE-SPACE-BLOCKQUOTE" => "MD027",
2842    "NO-BLANKS-BLOCKQUOTE" => "MD028",
2843    "OL-PREFIX" => "MD029",
2844    "LIST-MARKER-SPACE" => "MD030",
2845    "BLANKS-AROUND-FENCES" => "MD031",
2846    "BLANKS-AROUND-LISTS" => "MD032",
2847    "NO-INLINE-HTML" => "MD033",
2848    "NO-BARE-URLS" => "MD034",
2849    "HR-STYLE" => "MD035",
2850    "NO-EMPHASIS-AS-HEADING" => "MD036",
2851    "NO-SPACE-IN-EMPHASIS" => "MD037",
2852    "NO-SPACE-IN-CODE" => "MD038",
2853    "NO-SPACE-IN-LINKS" => "MD039",
2854    "FENCED-CODE-LANGUAGE" => "MD040",
2855    "FIRST-LINE-HEADING" => "MD041",
2856    "FIRST-LINE-H1" => "MD041",
2857    "NO-EMPTY-LINKS" => "MD042",
2858    "REQUIRED-HEADINGS" => "MD043",
2859    "PROPER-NAMES" => "MD044",
2860    "NO-ALT-TEXT" => "MD045",
2861    "CODE-BLOCK-STYLE" => "MD046",
2862    "SINGLE-TRAILING-NEWLINE" => "MD047",
2863    "CODE-FENCE-STYLE" => "MD048",
2864    "EMPHASIS-STYLE" => "MD049",
2865    "STRONG-STYLE" => "MD050",
2866    "LINK-FRAGMENTS" => "MD051",
2867    "REFERENCE-LINKS-IMAGES" => "MD052",
2868    "LINK-IMAGE-REFERENCE-DEFINITIONS" => "MD053",
2869    "LINK-IMAGE-STYLE" => "MD054",
2870    "TABLE-PIPE-STYLE" => "MD055",
2871    "TABLE-COLUMN-COUNT" => "MD056",
2872    "EXISTING-RELATIVE-LINKS" => "MD057",
2873    "BLANKS-AROUND-TABLES" => "MD058",
2874    "TABLE-CELL-ALIGNMENT" => "MD059",
2875    "TABLE-FORMAT" => "MD060",
2876    "FORBIDDEN-TERMS" => "MD061",
2877};
2878
2879/// Resolve a rule name alias to its canonical form with O(1) perfect hash lookup
2880/// Converts rule aliases (like "ul-style", "line-length") to canonical IDs (like "MD004", "MD013")
2881/// Returns None if the rule name is not recognized
2882pub(crate) fn resolve_rule_name_alias(key: &str) -> Option<&'static str> {
2883    // Normalize: uppercase and replace underscores with hyphens
2884    let normalized_key = key.to_ascii_uppercase().replace('_', "-");
2885
2886    // O(1) perfect hash lookup
2887    RULE_ALIAS_MAP.get(normalized_key.as_str()).copied()
2888}
2889
2890/// Represents a config validation warning or error
2891#[derive(Debug, Clone)]
2892pub struct ConfigValidationWarning {
2893    pub message: String,
2894    pub rule: Option<String>,
2895    pub key: Option<String>,
2896}
2897
2898/// Internal validation function that works with any SourcedConfig state.
2899/// This is used by both the public `validate_config_sourced` and the typestate `validate()` method.
2900fn validate_config_sourced_internal<S>(
2901    sourced: &SourcedConfig<S>,
2902    registry: &RuleRegistry,
2903) -> Vec<ConfigValidationWarning> {
2904    validate_config_sourced_impl(&sourced.rules, &sourced.unknown_keys, registry)
2905}
2906
2907/// Core validation implementation that doesn't depend on SourcedConfig type parameter.
2908fn validate_config_sourced_impl(
2909    rules: &BTreeMap<String, SourcedRuleConfig>,
2910    unknown_keys: &[(String, String, Option<String>)],
2911    registry: &RuleRegistry,
2912) -> Vec<ConfigValidationWarning> {
2913    let mut warnings = Vec::new();
2914    let known_rules = registry.rule_names();
2915    // 1. Unknown rules
2916    for rule in rules.keys() {
2917        if !known_rules.contains(rule) {
2918            // Include both canonical names AND aliases for fuzzy matching
2919            let all_rule_names: Vec<String> = RULE_ALIAS_MAP.keys().map(|s| s.to_string()).collect();
2920            let message = if let Some(suggestion) = suggest_similar_key(rule, &all_rule_names) {
2921                // Convert alias suggestions to lowercase for better UX (MD001 stays uppercase, ul-style becomes lowercase)
2922                let formatted_suggestion = if suggestion.starts_with("MD") {
2923                    suggestion
2924                } else {
2925                    suggestion.to_lowercase()
2926                };
2927                format!("Unknown rule in config: {rule} (did you mean: {formatted_suggestion}?)")
2928            } else {
2929                format!("Unknown rule in config: {rule}")
2930            };
2931            warnings.push(ConfigValidationWarning {
2932                message,
2933                rule: Some(rule.clone()),
2934                key: None,
2935            });
2936        }
2937    }
2938    // 2. Unknown options and type mismatches
2939    for (rule, rule_cfg) in rules {
2940        if let Some(valid_keys) = registry.config_keys_for(rule) {
2941            for key in rule_cfg.values.keys() {
2942                if !valid_keys.contains(key) {
2943                    let valid_keys_vec: Vec<String> = valid_keys.iter().cloned().collect();
2944                    let message = if let Some(suggestion) = suggest_similar_key(key, &valid_keys_vec) {
2945                        format!("Unknown option for rule {rule}: {key} (did you mean: {suggestion}?)")
2946                    } else {
2947                        format!("Unknown option for rule {rule}: {key}")
2948                    };
2949                    warnings.push(ConfigValidationWarning {
2950                        message,
2951                        rule: Some(rule.clone()),
2952                        key: Some(key.clone()),
2953                    });
2954                } else {
2955                    // Type check: compare type of value to type of default
2956                    if let Some(expected) = registry.expected_value_for(rule, key) {
2957                        let actual = &rule_cfg.values[key].value;
2958                        if !toml_value_type_matches(expected, actual) {
2959                            warnings.push(ConfigValidationWarning {
2960                                message: format!(
2961                                    "Type mismatch for {}.{}: expected {}, got {}",
2962                                    rule,
2963                                    key,
2964                                    toml_type_name(expected),
2965                                    toml_type_name(actual)
2966                                ),
2967                                rule: Some(rule.clone()),
2968                                key: Some(key.clone()),
2969                            });
2970                        }
2971                    }
2972                }
2973            }
2974        }
2975    }
2976    // 3. Unknown global options (from unknown_keys)
2977    let known_global_keys = vec![
2978        "enable".to_string(),
2979        "disable".to_string(),
2980        "include".to_string(),
2981        "exclude".to_string(),
2982        "respect-gitignore".to_string(),
2983        "line-length".to_string(),
2984        "fixable".to_string(),
2985        "unfixable".to_string(),
2986        "flavor".to_string(),
2987        "force-exclude".to_string(),
2988        "output-format".to_string(),
2989        "cache-dir".to_string(),
2990        "cache".to_string(),
2991    ];
2992
2993    for (section, key, file_path) in unknown_keys {
2994        if section.contains("[global]") || section.contains("[tool.rumdl]") {
2995            let message = if let Some(suggestion) = suggest_similar_key(key, &known_global_keys) {
2996                if let Some(path) = file_path {
2997                    format!("Unknown global option in {path}: {key} (did you mean: {suggestion}?)")
2998                } else {
2999                    format!("Unknown global option: {key} (did you mean: {suggestion}?)")
3000                }
3001            } else if let Some(path) = file_path {
3002                format!("Unknown global option in {path}: {key}")
3003            } else {
3004                format!("Unknown global option: {key}")
3005            };
3006            warnings.push(ConfigValidationWarning {
3007                message,
3008                rule: None,
3009                key: Some(key.clone()),
3010            });
3011        } else if !key.is_empty() {
3012            // This is an unknown rule section (key is empty means it's a section header)
3013            continue;
3014        } else {
3015            // Unknown rule section - suggest similar rule names
3016            let rule_name = section.trim_matches(|c| c == '[' || c == ']');
3017            let all_rule_names: Vec<String> = RULE_ALIAS_MAP.keys().map(|s| s.to_string()).collect();
3018            let message = if let Some(suggestion) = suggest_similar_key(rule_name, &all_rule_names) {
3019                // Convert alias suggestions to lowercase for better UX (MD001 stays uppercase, ul-style becomes lowercase)
3020                let formatted_suggestion = if suggestion.starts_with("MD") {
3021                    suggestion
3022                } else {
3023                    suggestion.to_lowercase()
3024                };
3025                if let Some(path) = file_path {
3026                    format!("Unknown rule in {path}: {rule_name} (did you mean: {formatted_suggestion}?)")
3027                } else {
3028                    format!("Unknown rule in config: {rule_name} (did you mean: {formatted_suggestion}?)")
3029                }
3030            } else if let Some(path) = file_path {
3031                format!("Unknown rule in {path}: {rule_name}")
3032            } else {
3033                format!("Unknown rule in config: {rule_name}")
3034            };
3035            warnings.push(ConfigValidationWarning {
3036                message,
3037                rule: None,
3038                key: None,
3039            });
3040        }
3041    }
3042    warnings
3043}
3044
3045/// Validate a loaded config against the rule registry, using SourcedConfig for unknown key tracking.
3046///
3047/// This is the legacy API that works with `SourcedConfig<ConfigLoaded>`.
3048/// For new code, prefer using `sourced.validate(&registry)` which returns a
3049/// `SourcedConfig<ConfigValidated>` that can be converted to `Config`.
3050pub fn validate_config_sourced(
3051    sourced: &SourcedConfig<ConfigLoaded>,
3052    registry: &RuleRegistry,
3053) -> Vec<ConfigValidationWarning> {
3054    validate_config_sourced_internal(sourced, registry)
3055}
3056
3057/// Validate a config that has already been validated (no-op, returns stored warnings).
3058///
3059/// This exists for API consistency - validated configs already have their warnings stored.
3060pub fn validate_config_sourced_validated(
3061    sourced: &SourcedConfig<ConfigValidated>,
3062    _registry: &RuleRegistry,
3063) -> Vec<ConfigValidationWarning> {
3064    sourced.validation_warnings.clone()
3065}
3066
3067fn toml_type_name(val: &toml::Value) -> &'static str {
3068    match val {
3069        toml::Value::String(_) => "string",
3070        toml::Value::Integer(_) => "integer",
3071        toml::Value::Float(_) => "float",
3072        toml::Value::Boolean(_) => "boolean",
3073        toml::Value::Array(_) => "array",
3074        toml::Value::Table(_) => "table",
3075        toml::Value::Datetime(_) => "datetime",
3076    }
3077}
3078
3079/// Calculate Levenshtein distance between two strings (simple implementation)
3080fn levenshtein_distance(s1: &str, s2: &str) -> usize {
3081    let len1 = s1.len();
3082    let len2 = s2.len();
3083
3084    if len1 == 0 {
3085        return len2;
3086    }
3087    if len2 == 0 {
3088        return len1;
3089    }
3090
3091    let s1_chars: Vec<char> = s1.chars().collect();
3092    let s2_chars: Vec<char> = s2.chars().collect();
3093
3094    let mut prev_row: Vec<usize> = (0..=len2).collect();
3095    let mut curr_row = vec![0; len2 + 1];
3096
3097    for i in 1..=len1 {
3098        curr_row[0] = i;
3099        for j in 1..=len2 {
3100            let cost = if s1_chars[i - 1] == s2_chars[j - 1] { 0 } else { 1 };
3101            curr_row[j] = (prev_row[j] + 1)          // deletion
3102                .min(curr_row[j - 1] + 1)            // insertion
3103                .min(prev_row[j - 1] + cost); // substitution
3104        }
3105        std::mem::swap(&mut prev_row, &mut curr_row);
3106    }
3107
3108    prev_row[len2]
3109}
3110
3111/// Suggest a similar key from a list of valid keys using fuzzy matching
3112fn suggest_similar_key(unknown: &str, valid_keys: &[String]) -> Option<String> {
3113    let unknown_lower = unknown.to_lowercase();
3114    let max_distance = 2.max(unknown.len() / 3); // Allow up to 2 edits or 30% of string length
3115
3116    let mut best_match: Option<(String, usize)> = None;
3117
3118    for valid in valid_keys {
3119        let valid_lower = valid.to_lowercase();
3120        let distance = levenshtein_distance(&unknown_lower, &valid_lower);
3121
3122        if distance <= max_distance {
3123            if let Some((_, best_dist)) = &best_match {
3124                if distance < *best_dist {
3125                    best_match = Some((valid.clone(), distance));
3126                }
3127            } else {
3128                best_match = Some((valid.clone(), distance));
3129            }
3130        }
3131    }
3132
3133    best_match.map(|(key, _)| key)
3134}
3135
3136fn toml_value_type_matches(expected: &toml::Value, actual: &toml::Value) -> bool {
3137    use toml::Value::*;
3138    match (expected, actual) {
3139        (String(_), String(_)) => true,
3140        (Integer(_), Integer(_)) => true,
3141        (Float(_), Float(_)) => true,
3142        (Boolean(_), Boolean(_)) => true,
3143        (Array(_), Array(_)) => true,
3144        (Table(_), Table(_)) => true,
3145        (Datetime(_), Datetime(_)) => true,
3146        // Allow integer for float
3147        (Float(_), Integer(_)) => true,
3148        _ => false,
3149    }
3150}
3151
3152/// Parses pyproject.toml content and extracts the [tool.rumdl] section if present.
3153fn parse_pyproject_toml(content: &str, path: &str) -> Result<Option<SourcedConfigFragment>, ConfigError> {
3154    let doc: toml::Value =
3155        toml::from_str(content).map_err(|e| ConfigError::ParseError(format!("{path}: Failed to parse TOML: {e}")))?;
3156    let mut fragment = SourcedConfigFragment::default();
3157    let source = ConfigSource::PyprojectToml;
3158    let file = Some(path.to_string());
3159
3160    // Create rule registry for alias resolution
3161    let all_rules = rules::all_rules(&Config::default());
3162    let registry = RuleRegistry::from_rules(&all_rules);
3163
3164    // 1. Handle [tool.rumdl] and [tool.rumdl.global] sections
3165    if let Some(rumdl_config) = doc.get("tool").and_then(|t| t.get("rumdl"))
3166        && let Some(rumdl_table) = rumdl_config.as_table()
3167    {
3168        // Helper function to extract global config from a table
3169        let extract_global_config = |fragment: &mut SourcedConfigFragment, table: &toml::value::Table| {
3170            // Extract global options from the given table
3171            if let Some(enable) = table.get("enable")
3172                && let Ok(values) = Vec::<String>::deserialize(enable.clone())
3173            {
3174                // Resolve rule name aliases (e.g., "ul-style" -> "MD004")
3175                let normalized_values = values
3176                    .into_iter()
3177                    .map(|s| registry.resolve_rule_name(&s).unwrap_or_else(|| normalize_key(&s)))
3178                    .collect();
3179                fragment
3180                    .global
3181                    .enable
3182                    .push_override(normalized_values, source, file.clone(), None);
3183            }
3184
3185            if let Some(disable) = table.get("disable")
3186                && let Ok(values) = Vec::<String>::deserialize(disable.clone())
3187            {
3188                // Resolve rule name aliases
3189                let normalized_values: Vec<String> = values
3190                    .into_iter()
3191                    .map(|s| registry.resolve_rule_name(&s).unwrap_or_else(|| normalize_key(&s)))
3192                    .collect();
3193                fragment
3194                    .global
3195                    .disable
3196                    .push_override(normalized_values, source, file.clone(), None);
3197            }
3198
3199            if let Some(include) = table.get("include")
3200                && let Ok(values) = Vec::<String>::deserialize(include.clone())
3201            {
3202                fragment
3203                    .global
3204                    .include
3205                    .push_override(values, source, file.clone(), None);
3206            }
3207
3208            if let Some(exclude) = table.get("exclude")
3209                && let Ok(values) = Vec::<String>::deserialize(exclude.clone())
3210            {
3211                fragment
3212                    .global
3213                    .exclude
3214                    .push_override(values, source, file.clone(), None);
3215            }
3216
3217            if let Some(respect_gitignore) = table
3218                .get("respect-gitignore")
3219                .or_else(|| table.get("respect_gitignore"))
3220                && let Ok(value) = bool::deserialize(respect_gitignore.clone())
3221            {
3222                fragment
3223                    .global
3224                    .respect_gitignore
3225                    .push_override(value, source, file.clone(), None);
3226            }
3227
3228            if let Some(force_exclude) = table.get("force-exclude").or_else(|| table.get("force_exclude"))
3229                && let Ok(value) = bool::deserialize(force_exclude.clone())
3230            {
3231                fragment
3232                    .global
3233                    .force_exclude
3234                    .push_override(value, source, file.clone(), None);
3235            }
3236
3237            if let Some(output_format) = table.get("output-format").or_else(|| table.get("output_format"))
3238                && let Ok(value) = String::deserialize(output_format.clone())
3239            {
3240                if fragment.global.output_format.is_none() {
3241                    fragment.global.output_format = Some(SourcedValue::new(value.clone(), source));
3242                } else {
3243                    fragment
3244                        .global
3245                        .output_format
3246                        .as_mut()
3247                        .unwrap()
3248                        .push_override(value, source, file.clone(), None);
3249                }
3250            }
3251
3252            if let Some(fixable) = table.get("fixable")
3253                && let Ok(values) = Vec::<String>::deserialize(fixable.clone())
3254            {
3255                let normalized_values = values
3256                    .into_iter()
3257                    .map(|s| registry.resolve_rule_name(&s).unwrap_or_else(|| normalize_key(&s)))
3258                    .collect();
3259                fragment
3260                    .global
3261                    .fixable
3262                    .push_override(normalized_values, source, file.clone(), None);
3263            }
3264
3265            if let Some(unfixable) = table.get("unfixable")
3266                && let Ok(values) = Vec::<String>::deserialize(unfixable.clone())
3267            {
3268                let normalized_values = values
3269                    .into_iter()
3270                    .map(|s| registry.resolve_rule_name(&s).unwrap_or_else(|| normalize_key(&s)))
3271                    .collect();
3272                fragment
3273                    .global
3274                    .unfixable
3275                    .push_override(normalized_values, source, file.clone(), None);
3276            }
3277
3278            if let Some(flavor) = table.get("flavor")
3279                && let Ok(value) = MarkdownFlavor::deserialize(flavor.clone())
3280            {
3281                fragment.global.flavor.push_override(value, source, file.clone(), None);
3282            }
3283
3284            // Handle line-length special case - this should set the global line_length
3285            if let Some(line_length) = table.get("line-length").or_else(|| table.get("line_length"))
3286                && let Ok(value) = u64::deserialize(line_length.clone())
3287            {
3288                fragment
3289                    .global
3290                    .line_length
3291                    .push_override(LineLength::new(value as usize), source, file.clone(), None);
3292
3293                // Also add to MD013 rule config for backward compatibility
3294                let norm_md013_key = normalize_key("MD013");
3295                let rule_entry = fragment.rules.entry(norm_md013_key).or_default();
3296                let norm_line_length_key = normalize_key("line-length");
3297                let sv = rule_entry
3298                    .values
3299                    .entry(norm_line_length_key)
3300                    .or_insert_with(|| SourcedValue::new(line_length.clone(), ConfigSource::Default));
3301                sv.push_override(line_length.clone(), source, file.clone(), None);
3302            }
3303
3304            if let Some(cache_dir) = table.get("cache-dir").or_else(|| table.get("cache_dir"))
3305                && let Ok(value) = String::deserialize(cache_dir.clone())
3306            {
3307                if fragment.global.cache_dir.is_none() {
3308                    fragment.global.cache_dir = Some(SourcedValue::new(value.clone(), source));
3309                } else {
3310                    fragment
3311                        .global
3312                        .cache_dir
3313                        .as_mut()
3314                        .unwrap()
3315                        .push_override(value, source, file.clone(), None);
3316                }
3317            }
3318
3319            if let Some(cache) = table.get("cache")
3320                && let Ok(value) = bool::deserialize(cache.clone())
3321            {
3322                fragment.global.cache.push_override(value, source, file.clone(), None);
3323            }
3324        };
3325
3326        // First, check for [tool.rumdl.global] section
3327        if let Some(global_table) = rumdl_table.get("global").and_then(|g| g.as_table()) {
3328            extract_global_config(&mut fragment, global_table);
3329        }
3330
3331        // Also extract global options from [tool.rumdl] directly (for flat structure)
3332        extract_global_config(&mut fragment, rumdl_table);
3333
3334        // --- Extract per-file-ignores configurations ---
3335        // Check both hyphenated and underscored versions for compatibility
3336        let per_file_ignores_key = rumdl_table
3337            .get("per-file-ignores")
3338            .or_else(|| rumdl_table.get("per_file_ignores"));
3339
3340        if let Some(per_file_ignores_value) = per_file_ignores_key
3341            && let Some(per_file_table) = per_file_ignores_value.as_table()
3342        {
3343            let mut per_file_map = HashMap::new();
3344            for (pattern, rules_value) in per_file_table {
3345                if let Ok(rules) = Vec::<String>::deserialize(rules_value.clone()) {
3346                    let normalized_rules = rules
3347                        .into_iter()
3348                        .map(|s| registry.resolve_rule_name(&s).unwrap_or_else(|| normalize_key(&s)))
3349                        .collect();
3350                    per_file_map.insert(pattern.clone(), normalized_rules);
3351                } else {
3352                    log::warn!(
3353                        "[WARN] Expected array for per-file-ignores pattern '{pattern}' in {path}, found {rules_value:?}"
3354                    );
3355                }
3356            }
3357            fragment
3358                .per_file_ignores
3359                .push_override(per_file_map, source, file.clone(), None);
3360        }
3361
3362        // --- Extract rule-specific configurations ---
3363        for (key, value) in rumdl_table {
3364            let norm_rule_key = normalize_key(key);
3365
3366            // Skip keys already handled as global or special cases
3367            // Note: Only skip these if they're NOT tables (rule sections are tables)
3368            let is_global_key = [
3369                "enable",
3370                "disable",
3371                "include",
3372                "exclude",
3373                "respect_gitignore",
3374                "respect-gitignore",
3375                "force_exclude",
3376                "force-exclude",
3377                "output_format",
3378                "output-format",
3379                "fixable",
3380                "unfixable",
3381                "per-file-ignores",
3382                "per_file_ignores",
3383                "global",
3384                "flavor",
3385                "cache_dir",
3386                "cache-dir",
3387                "cache",
3388            ]
3389            .contains(&norm_rule_key.as_str());
3390
3391            // Special handling for line-length: could be global config OR rule section
3392            let is_line_length_global =
3393                (norm_rule_key == "line-length" || norm_rule_key == "line_length") && !value.is_table();
3394
3395            if is_global_key || is_line_length_global {
3396                continue;
3397            }
3398
3399            // Try to resolve as a rule name (handles both canonical names and aliases)
3400            if let Some(resolved_rule_name) = registry.resolve_rule_name(key)
3401                && value.is_table()
3402                && let Some(rule_config_table) = value.as_table()
3403            {
3404                let rule_entry = fragment.rules.entry(resolved_rule_name.clone()).or_default();
3405                for (rk, rv) in rule_config_table {
3406                    let norm_rk = normalize_key(rk);
3407
3408                    // Special handling for severity
3409                    if norm_rk == "severity" {
3410                        if let Ok(severity) = crate::rule::Severity::deserialize(rv.clone()) {
3411                            if !fragment.rule_severities.contains_key(&resolved_rule_name) {
3412                                fragment
3413                                    .rule_severities
3414                                    .insert(resolved_rule_name.clone(), SourcedValue::new(severity, source));
3415                            } else {
3416                                fragment
3417                                    .rule_severities
3418                                    .get_mut(&resolved_rule_name)
3419                                    .unwrap()
3420                                    .push_override(severity, source, file.clone(), None);
3421                            }
3422                        }
3423                        continue; // Skip regular value processing for severity
3424                    }
3425
3426                    let toml_val = rv.clone();
3427
3428                    let sv = rule_entry
3429                        .values
3430                        .entry(norm_rk.clone())
3431                        .or_insert_with(|| SourcedValue::new(toml_val.clone(), ConfigSource::Default));
3432                    sv.push_override(toml_val, source, file.clone(), None);
3433                }
3434            } else if registry.resolve_rule_name(key).is_none() {
3435                // Key is not a global/special key and not a recognized rule name
3436                // Track unknown keys under [tool.rumdl] for validation
3437                fragment
3438                    .unknown_keys
3439                    .push(("[tool.rumdl]".to_string(), key.to_string(), Some(path.to_string())));
3440            }
3441        }
3442    }
3443
3444    // 2. Handle [tool.rumdl.MDxxx] sections as rule-specific config (nested under [tool])
3445    if let Some(tool_table) = doc.get("tool").and_then(|t| t.as_table()) {
3446        for (key, value) in tool_table.iter() {
3447            if let Some(rule_name) = key.strip_prefix("rumdl.") {
3448                // Try to resolve as a rule name (handles both canonical names and aliases)
3449                if let Some(resolved_rule_name) = registry.resolve_rule_name(rule_name) {
3450                    if let Some(rule_table) = value.as_table() {
3451                        let rule_entry = fragment.rules.entry(resolved_rule_name.clone()).or_default();
3452                        for (rk, rv) in rule_table {
3453                            let norm_rk = normalize_key(rk);
3454
3455                            // Special handling for severity
3456                            if norm_rk == "severity" {
3457                                if let Ok(severity) = crate::rule::Severity::deserialize(rv.clone()) {
3458                                    if !fragment.rule_severities.contains_key(&resolved_rule_name) {
3459                                        fragment
3460                                            .rule_severities
3461                                            .insert(resolved_rule_name.clone(), SourcedValue::new(severity, source));
3462                                    } else {
3463                                        fragment
3464                                            .rule_severities
3465                                            .get_mut(&resolved_rule_name)
3466                                            .unwrap()
3467                                            .push_override(severity, source, file.clone(), None);
3468                                    }
3469                                }
3470                                continue; // Skip regular value processing for severity
3471                            }
3472
3473                            let toml_val = rv.clone();
3474                            let sv = rule_entry
3475                                .values
3476                                .entry(norm_rk.clone())
3477                                .or_insert_with(|| SourcedValue::new(toml_val.clone(), source));
3478                            sv.push_override(toml_val, source, file.clone(), None);
3479                        }
3480                    }
3481                } else if rule_name.to_ascii_uppercase().starts_with("MD")
3482                    || rule_name.chars().any(|c| c.is_alphabetic())
3483                {
3484                    // Track unknown rule sections like [tool.rumdl.MD999] or [tool.rumdl.unknown-rule]
3485                    fragment.unknown_keys.push((
3486                        format!("[tool.rumdl.{rule_name}]"),
3487                        String::new(),
3488                        Some(path.to_string()),
3489                    ));
3490                }
3491            }
3492        }
3493    }
3494
3495    // 3. Handle [tool.rumdl.MDxxx] sections as top-level keys (e.g., [tool.rumdl.MD007] or [tool.rumdl.line-length])
3496    if let Some(doc_table) = doc.as_table() {
3497        for (key, value) in doc_table.iter() {
3498            if let Some(rule_name) = key.strip_prefix("tool.rumdl.") {
3499                // Try to resolve as a rule name (handles both canonical names and aliases)
3500                if let Some(resolved_rule_name) = registry.resolve_rule_name(rule_name) {
3501                    if let Some(rule_table) = value.as_table() {
3502                        let rule_entry = fragment.rules.entry(resolved_rule_name.clone()).or_default();
3503                        for (rk, rv) in rule_table {
3504                            let norm_rk = normalize_key(rk);
3505
3506                            // Special handling for severity
3507                            if norm_rk == "severity" {
3508                                if let Ok(severity) = crate::rule::Severity::deserialize(rv.clone()) {
3509                                    if !fragment.rule_severities.contains_key(&resolved_rule_name) {
3510                                        fragment
3511                                            .rule_severities
3512                                            .insert(resolved_rule_name.clone(), SourcedValue::new(severity, source));
3513                                    } else {
3514                                        fragment
3515                                            .rule_severities
3516                                            .get_mut(&resolved_rule_name)
3517                                            .unwrap()
3518                                            .push_override(severity, source, file.clone(), None);
3519                                    }
3520                                }
3521                                continue; // Skip regular value processing for severity
3522                            }
3523
3524                            let toml_val = rv.clone();
3525                            let sv = rule_entry
3526                                .values
3527                                .entry(norm_rk.clone())
3528                                .or_insert_with(|| SourcedValue::new(toml_val.clone(), source));
3529                            sv.push_override(toml_val, source, file.clone(), None);
3530                        }
3531                    }
3532                } else if rule_name.to_ascii_uppercase().starts_with("MD")
3533                    || rule_name.chars().any(|c| c.is_alphabetic())
3534                {
3535                    // Track unknown rule sections like [tool.rumdl.MD999] or [tool.rumdl.unknown-rule]
3536                    fragment.unknown_keys.push((
3537                        format!("[tool.rumdl.{rule_name}]"),
3538                        String::new(),
3539                        Some(path.to_string()),
3540                    ));
3541                }
3542            }
3543        }
3544    }
3545
3546    // Only return Some(fragment) if any config was found
3547    let has_any = !fragment.global.enable.value.is_empty()
3548        || !fragment.global.disable.value.is_empty()
3549        || !fragment.global.include.value.is_empty()
3550        || !fragment.global.exclude.value.is_empty()
3551        || !fragment.global.fixable.value.is_empty()
3552        || !fragment.global.unfixable.value.is_empty()
3553        || fragment.global.output_format.is_some()
3554        || fragment.global.cache_dir.is_some()
3555        || !fragment.global.cache.value
3556        || !fragment.per_file_ignores.value.is_empty()
3557        || !fragment.rules.is_empty();
3558    if has_any { Ok(Some(fragment)) } else { Ok(None) }
3559}
3560
3561/// Parses rumdl.toml / .rumdl.toml content.
3562fn parse_rumdl_toml(content: &str, path: &str, source: ConfigSource) -> Result<SourcedConfigFragment, ConfigError> {
3563    let doc = content
3564        .parse::<DocumentMut>()
3565        .map_err(|e| ConfigError::ParseError(format!("{path}: Failed to parse TOML: {e}")))?;
3566    let mut fragment = SourcedConfigFragment::default();
3567    // source parameter provided by caller
3568    let file = Some(path.to_string());
3569
3570    // Define known rules before the loop
3571    let all_rules = rules::all_rules(&Config::default());
3572    let registry = RuleRegistry::from_rules(&all_rules);
3573
3574    // Handle [global] section
3575    if let Some(global_item) = doc.get("global")
3576        && let Some(global_table) = global_item.as_table()
3577    {
3578        for (key, value_item) in global_table.iter() {
3579            let norm_key = normalize_key(key);
3580            match norm_key.as_str() {
3581                "enable" | "disable" | "include" | "exclude" => {
3582                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
3583                        // Corrected: Iterate directly over the Formatted<Array>
3584                        let values: Vec<String> = formatted_array
3585                                .iter()
3586                                .filter_map(|item| item.as_str()) // Extract strings
3587                                .map(|s| s.to_string())
3588                                .collect();
3589
3590                        // Resolve rule name aliases for enable/disable (e.g., "ul-style" -> "MD004")
3591                        let final_values = if norm_key == "enable" || norm_key == "disable" {
3592                            values
3593                                .into_iter()
3594                                .map(|s| registry.resolve_rule_name(&s).unwrap_or_else(|| normalize_key(&s)))
3595                                .collect()
3596                        } else {
3597                            values
3598                        };
3599
3600                        match norm_key.as_str() {
3601                            "enable" => fragment
3602                                .global
3603                                .enable
3604                                .push_override(final_values, source, file.clone(), None),
3605                            "disable" => {
3606                                fragment
3607                                    .global
3608                                    .disable
3609                                    .push_override(final_values, source, file.clone(), None)
3610                            }
3611                            "include" => {
3612                                fragment
3613                                    .global
3614                                    .include
3615                                    .push_override(final_values, source, file.clone(), None)
3616                            }
3617                            "exclude" => {
3618                                fragment
3619                                    .global
3620                                    .exclude
3621                                    .push_override(final_values, source, file.clone(), None)
3622                            }
3623                            _ => unreachable!("Outer match guarantees only enable/disable/include/exclude"),
3624                        }
3625                    } else {
3626                        log::warn!(
3627                            "[WARN] Expected array for global key '{}' in {}, found {}",
3628                            key,
3629                            path,
3630                            value_item.type_name()
3631                        );
3632                    }
3633                }
3634                "respect_gitignore" | "respect-gitignore" => {
3635                    // Handle both cases
3636                    if let Some(toml_edit::Value::Boolean(formatted_bool)) = value_item.as_value() {
3637                        let val = *formatted_bool.value();
3638                        fragment
3639                            .global
3640                            .respect_gitignore
3641                            .push_override(val, source, file.clone(), None);
3642                    } else {
3643                        log::warn!(
3644                            "[WARN] Expected boolean for global key '{}' in {}, found {}",
3645                            key,
3646                            path,
3647                            value_item.type_name()
3648                        );
3649                    }
3650                }
3651                "force_exclude" | "force-exclude" => {
3652                    // Handle both cases
3653                    if let Some(toml_edit::Value::Boolean(formatted_bool)) = value_item.as_value() {
3654                        let val = *formatted_bool.value();
3655                        fragment
3656                            .global
3657                            .force_exclude
3658                            .push_override(val, source, file.clone(), None);
3659                    } else {
3660                        log::warn!(
3661                            "[WARN] Expected boolean for global key '{}' in {}, found {}",
3662                            key,
3663                            path,
3664                            value_item.type_name()
3665                        );
3666                    }
3667                }
3668                "line_length" | "line-length" => {
3669                    // Handle both cases
3670                    if let Some(toml_edit::Value::Integer(formatted_int)) = value_item.as_value() {
3671                        let val = LineLength::new(*formatted_int.value() as usize);
3672                        fragment
3673                            .global
3674                            .line_length
3675                            .push_override(val, source, file.clone(), None);
3676                    } else {
3677                        log::warn!(
3678                            "[WARN] Expected integer for global key '{}' in {}, found {}",
3679                            key,
3680                            path,
3681                            value_item.type_name()
3682                        );
3683                    }
3684                }
3685                "output_format" | "output-format" => {
3686                    // Handle both cases
3687                    if let Some(toml_edit::Value::String(formatted_string)) = value_item.as_value() {
3688                        let val = formatted_string.value().clone();
3689                        if fragment.global.output_format.is_none() {
3690                            fragment.global.output_format = Some(SourcedValue::new(val.clone(), source));
3691                        } else {
3692                            fragment.global.output_format.as_mut().unwrap().push_override(
3693                                val,
3694                                source,
3695                                file.clone(),
3696                                None,
3697                            );
3698                        }
3699                    } else {
3700                        log::warn!(
3701                            "[WARN] Expected string for global key '{}' in {}, found {}",
3702                            key,
3703                            path,
3704                            value_item.type_name()
3705                        );
3706                    }
3707                }
3708                "cache_dir" | "cache-dir" => {
3709                    // Handle both cases
3710                    if let Some(toml_edit::Value::String(formatted_string)) = value_item.as_value() {
3711                        let val = formatted_string.value().clone();
3712                        if fragment.global.cache_dir.is_none() {
3713                            fragment.global.cache_dir = Some(SourcedValue::new(val.clone(), source));
3714                        } else {
3715                            fragment
3716                                .global
3717                                .cache_dir
3718                                .as_mut()
3719                                .unwrap()
3720                                .push_override(val, source, file.clone(), None);
3721                        }
3722                    } else {
3723                        log::warn!(
3724                            "[WARN] Expected string for global key '{}' in {}, found {}",
3725                            key,
3726                            path,
3727                            value_item.type_name()
3728                        );
3729                    }
3730                }
3731                "cache" => {
3732                    if let Some(toml_edit::Value::Boolean(b)) = value_item.as_value() {
3733                        let val = *b.value();
3734                        fragment.global.cache.push_override(val, source, file.clone(), None);
3735                    } else {
3736                        log::warn!(
3737                            "[WARN] Expected boolean for global key '{}' in {}, found {}",
3738                            key,
3739                            path,
3740                            value_item.type_name()
3741                        );
3742                    }
3743                }
3744                "fixable" => {
3745                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
3746                        let values: Vec<String> = formatted_array
3747                            .iter()
3748                            .filter_map(|item| item.as_str())
3749                            .map(normalize_key)
3750                            .collect();
3751                        fragment
3752                            .global
3753                            .fixable
3754                            .push_override(values, source, file.clone(), None);
3755                    } else {
3756                        log::warn!(
3757                            "[WARN] Expected array for global key '{}' in {}, found {}",
3758                            key,
3759                            path,
3760                            value_item.type_name()
3761                        );
3762                    }
3763                }
3764                "unfixable" => {
3765                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
3766                        let values: Vec<String> = formatted_array
3767                            .iter()
3768                            .filter_map(|item| item.as_str())
3769                            .map(|s| registry.resolve_rule_name(s).unwrap_or_else(|| normalize_key(s)))
3770                            .collect();
3771                        fragment
3772                            .global
3773                            .unfixable
3774                            .push_override(values, source, file.clone(), None);
3775                    } else {
3776                        log::warn!(
3777                            "[WARN] Expected array for global key '{}' in {}, found {}",
3778                            key,
3779                            path,
3780                            value_item.type_name()
3781                        );
3782                    }
3783                }
3784                "flavor" => {
3785                    if let Some(toml_edit::Value::String(formatted_string)) = value_item.as_value() {
3786                        let val = formatted_string.value();
3787                        if let Ok(flavor) = MarkdownFlavor::from_str(val) {
3788                            fragment.global.flavor.push_override(flavor, source, file.clone(), None);
3789                        } else {
3790                            log::warn!("[WARN] Unknown markdown flavor '{val}' in {path}");
3791                        }
3792                    } else {
3793                        log::warn!(
3794                            "[WARN] Expected string for global key '{}' in {}, found {}",
3795                            key,
3796                            path,
3797                            value_item.type_name()
3798                        );
3799                    }
3800                }
3801                _ => {
3802                    // Track unknown global keys for validation
3803                    fragment
3804                        .unknown_keys
3805                        .push(("[global]".to_string(), key.to_string(), Some(path.to_string())));
3806                    log::warn!("[WARN] Unknown key in [global] section of {path}: {key}");
3807                }
3808            }
3809        }
3810    }
3811
3812    // Handle [per-file-ignores] section
3813    if let Some(per_file_item) = doc.get("per-file-ignores")
3814        && let Some(per_file_table) = per_file_item.as_table()
3815    {
3816        let mut per_file_map = HashMap::new();
3817        for (pattern, value_item) in per_file_table.iter() {
3818            if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
3819                let rules: Vec<String> = formatted_array
3820                    .iter()
3821                    .filter_map(|item| item.as_str())
3822                    .map(|s| registry.resolve_rule_name(s).unwrap_or_else(|| normalize_key(s)))
3823                    .collect();
3824                per_file_map.insert(pattern.to_string(), rules);
3825            } else {
3826                let type_name = value_item.type_name();
3827                log::warn!(
3828                    "[WARN] Expected array for per-file-ignores pattern '{pattern}' in {path}, found {type_name}"
3829                );
3830            }
3831        }
3832        fragment
3833            .per_file_ignores
3834            .push_override(per_file_map, source, file.clone(), None);
3835    }
3836
3837    // Rule-specific: all other top-level tables
3838    for (key, item) in doc.iter() {
3839        // Skip known special sections
3840        if key == "global" || key == "per-file-ignores" {
3841            continue;
3842        }
3843
3844        // Resolve rule name (handles both canonical names like "MD004" and aliases like "ul-style")
3845        let norm_rule_name = if let Some(resolved) = registry.resolve_rule_name(key) {
3846            resolved
3847        } else {
3848            // Unknown rule - always track it for validation and suggestions
3849            fragment
3850                .unknown_keys
3851                .push((format!("[{key}]"), String::new(), Some(path.to_string())));
3852            continue;
3853        };
3854
3855        if let Some(tbl) = item.as_table() {
3856            let rule_entry = fragment.rules.entry(norm_rule_name.clone()).or_default();
3857            for (rk, rv_item) in tbl.iter() {
3858                let norm_rk = normalize_key(rk);
3859
3860                // Special handling for severity
3861                if norm_rk == "severity" {
3862                    if let Some(toml_edit::Value::String(formatted_string)) = rv_item.as_value() {
3863                        let severity_str = formatted_string.value();
3864                        match crate::rule::Severity::deserialize(toml::Value::String(severity_str.to_string())) {
3865                            Ok(severity) => {
3866                                if !fragment.rule_severities.contains_key(&norm_rule_name) {
3867                                    fragment
3868                                        .rule_severities
3869                                        .insert(norm_rule_name.clone(), SourcedValue::new(severity, source));
3870                                } else {
3871                                    fragment
3872                                        .rule_severities
3873                                        .get_mut(&norm_rule_name)
3874                                        .unwrap()
3875                                        .push_override(severity, source, file.clone(), None);
3876                                }
3877                            }
3878                            Err(_) => {
3879                                log::warn!(
3880                                    "[WARN] Invalid severity '{severity_str}' for rule {norm_rule_name} in {path}. Valid values: error, warning"
3881                                );
3882                            }
3883                        }
3884                    }
3885                    continue; // Skip regular value processing for severity
3886                }
3887
3888                let maybe_toml_val: Option<toml::Value> = match rv_item.as_value() {
3889                    Some(toml_edit::Value::String(formatted)) => Some(toml::Value::String(formatted.value().clone())),
3890                    Some(toml_edit::Value::Integer(formatted)) => Some(toml::Value::Integer(*formatted.value())),
3891                    Some(toml_edit::Value::Float(formatted)) => Some(toml::Value::Float(*formatted.value())),
3892                    Some(toml_edit::Value::Boolean(formatted)) => Some(toml::Value::Boolean(*formatted.value())),
3893                    Some(toml_edit::Value::Datetime(formatted)) => Some(toml::Value::Datetime(*formatted.value())),
3894                    Some(toml_edit::Value::Array(formatted_array)) => {
3895                        // Convert toml_edit Array to toml::Value::Array
3896                        let mut values = Vec::new();
3897                        for item in formatted_array.iter() {
3898                            match item {
3899                                toml_edit::Value::String(formatted) => {
3900                                    values.push(toml::Value::String(formatted.value().clone()))
3901                                }
3902                                toml_edit::Value::Integer(formatted) => {
3903                                    values.push(toml::Value::Integer(*formatted.value()))
3904                                }
3905                                toml_edit::Value::Float(formatted) => {
3906                                    values.push(toml::Value::Float(*formatted.value()))
3907                                }
3908                                toml_edit::Value::Boolean(formatted) => {
3909                                    values.push(toml::Value::Boolean(*formatted.value()))
3910                                }
3911                                toml_edit::Value::Datetime(formatted) => {
3912                                    values.push(toml::Value::Datetime(*formatted.value()))
3913                                }
3914                                _ => {
3915                                    log::warn!(
3916                                        "[WARN] Skipping unsupported array element type in key '{norm_rule_name}.{norm_rk}' in {path}"
3917                                    );
3918                                }
3919                            }
3920                        }
3921                        Some(toml::Value::Array(values))
3922                    }
3923                    Some(toml_edit::Value::InlineTable(_)) => {
3924                        log::warn!(
3925                            "[WARN] Skipping inline table value for key '{norm_rule_name}.{norm_rk}' in {path}. Table conversion not yet fully implemented in parser."
3926                        );
3927                        None
3928                    }
3929                    None => {
3930                        log::warn!(
3931                            "[WARN] Skipping non-value item for key '{norm_rule_name}.{norm_rk}' in {path}. Expected simple value."
3932                        );
3933                        None
3934                    }
3935                };
3936                if let Some(toml_val) = maybe_toml_val {
3937                    let sv = rule_entry
3938                        .values
3939                        .entry(norm_rk.clone())
3940                        .or_insert_with(|| SourcedValue::new(toml_val.clone(), ConfigSource::Default));
3941                    sv.push_override(toml_val, source, file.clone(), None);
3942                }
3943            }
3944        } else if item.is_value() {
3945            log::warn!("[WARN] Ignoring top-level value key in {path}: '{key}'. Expected a table like [{key}].");
3946        }
3947    }
3948
3949    Ok(fragment)
3950}
3951
3952/// Loads and converts a markdownlint config file (.json or .yaml) into a SourcedConfigFragment.
3953fn load_from_markdownlint(path: &str) -> Result<SourcedConfigFragment, ConfigError> {
3954    // Use the unified loader from markdownlint_config.rs
3955    let ml_config = crate::markdownlint_config::load_markdownlint_config(path)
3956        .map_err(|e| ConfigError::ParseError(format!("{path}: {e}")))?;
3957    Ok(ml_config.map_to_sourced_rumdl_config_fragment(Some(path)))
3958}
3959
3960#[cfg(test)]
3961#[path = "config_intelligent_merge_tests.rs"]
3962mod config_intelligent_merge_tests;