rumdl_lib/
config.rs

1//!
2//! This module defines configuration structures, loading logic, and provenance tracking for rumdl.
3//! Supports TOML, pyproject.toml, and markdownlint config formats, and provides merging and override logic.
4
5use crate::rule::Rule;
6use crate::rules;
7use log;
8use serde::{Deserialize, Serialize};
9use std::collections::BTreeMap;
10use std::collections::{BTreeSet, HashMap, HashSet};
11use std::fmt;
12use std::fs;
13use std::io;
14use std::path::Path;
15use std::str::FromStr;
16use toml_edit::DocumentMut;
17
18/// Markdown flavor/dialect enumeration
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, schemars::JsonSchema)]
20#[serde(rename_all = "lowercase")]
21pub enum MarkdownFlavor {
22    /// Standard Markdown without flavor-specific adjustments
23    #[serde(rename = "standard", alias = "none", alias = "")]
24    #[default]
25    Standard,
26    /// MkDocs flavor with auto-reference support
27    #[serde(rename = "mkdocs")]
28    MkDocs,
29    /// MDX flavor with JSX and ESM support (.mdx files)
30    #[serde(rename = "mdx")]
31    MDX,
32    /// Quarto/RMarkdown flavor for scientific publishing (.qmd, .Rmd files)
33    #[serde(rename = "quarto")]
34    Quarto,
35    // Future flavors can be added here when they have actual implementation differences
36    // Planned: GFM (GitHub Flavored Markdown) - for GitHub-specific features like tables, strikethrough
37    // Planned: CommonMark - for strict CommonMark compliance
38}
39
40impl fmt::Display for MarkdownFlavor {
41    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
42        match self {
43            MarkdownFlavor::Standard => write!(f, "standard"),
44            MarkdownFlavor::MkDocs => write!(f, "mkdocs"),
45            MarkdownFlavor::MDX => write!(f, "mdx"),
46            MarkdownFlavor::Quarto => write!(f, "quarto"),
47        }
48    }
49}
50
51impl FromStr for MarkdownFlavor {
52    type Err = String;
53
54    fn from_str(s: &str) -> Result<Self, Self::Err> {
55        match s.to_lowercase().as_str() {
56            "standard" | "" | "none" => Ok(MarkdownFlavor::Standard),
57            "mkdocs" => Ok(MarkdownFlavor::MkDocs),
58            "mdx" => Ok(MarkdownFlavor::MDX),
59            "quarto" | "qmd" | "rmd" | "rmarkdown" => Ok(MarkdownFlavor::Quarto),
60            // Accept but warn about unimplemented flavors
61            "gfm" | "github" => {
62                eprintln!("Warning: GFM flavor not yet implemented, using standard");
63                Ok(MarkdownFlavor::Standard)
64            }
65            "commonmark" => {
66                eprintln!("Warning: CommonMark flavor not yet implemented, using standard");
67                Ok(MarkdownFlavor::Standard)
68            }
69            _ => Err(format!("Unknown markdown flavor: {s}")),
70        }
71    }
72}
73
74impl MarkdownFlavor {
75    /// Detect flavor from file extension
76    pub fn from_extension(ext: &str) -> Self {
77        match ext.to_lowercase().as_str() {
78            "mdx" => Self::MDX,
79            "qmd" => Self::Quarto,
80            "rmd" => Self::Quarto,
81            _ => Self::Standard,
82        }
83    }
84
85    /// Detect flavor from file path
86    pub fn from_path(path: &std::path::Path) -> Self {
87        path.extension()
88            .and_then(|e| e.to_str())
89            .map(Self::from_extension)
90            .unwrap_or(Self::Standard)
91    }
92
93    /// Check if this flavor supports ESM imports/exports (MDX-specific)
94    pub fn supports_esm_blocks(self) -> bool {
95        matches!(self, Self::MDX)
96    }
97
98    /// Check if this flavor supports JSX components (MDX-specific)
99    pub fn supports_jsx(self) -> bool {
100        matches!(self, Self::MDX)
101    }
102
103    /// Check if this flavor supports auto-references (MkDocs-specific)
104    pub fn supports_auto_references(self) -> bool {
105        matches!(self, Self::MkDocs)
106    }
107
108    /// Get a human-readable name for this flavor
109    pub fn name(self) -> &'static str {
110        match self {
111            Self::Standard => "Standard",
112            Self::MkDocs => "MkDocs",
113            Self::MDX => "MDX",
114            Self::Quarto => "Quarto",
115        }
116    }
117}
118
119/// Normalizes configuration keys (rule names, option names) to lowercase kebab-case.
120pub fn normalize_key(key: &str) -> String {
121    // If the key looks like a rule name (e.g., MD013), uppercase it
122    if key.len() == 5 && key.to_ascii_lowercase().starts_with("md") && key[2..].chars().all(|c| c.is_ascii_digit()) {
123        key.to_ascii_uppercase()
124    } else {
125        key.replace('_', "-").to_ascii_lowercase()
126    }
127}
128
129/// Represents a rule-specific configuration
130#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, schemars::JsonSchema)]
131pub struct RuleConfig {
132    /// Configuration values for the rule
133    #[serde(flatten)]
134    #[schemars(schema_with = "arbitrary_value_schema")]
135    pub values: BTreeMap<String, toml::Value>,
136}
137
138/// Generate a JSON schema for arbitrary configuration values
139fn arbitrary_value_schema(_gen: &mut schemars::SchemaGenerator) -> schemars::Schema {
140    schemars::json_schema!({
141        "type": "object",
142        "additionalProperties": true
143    })
144}
145
146/// Represents the complete configuration loaded from rumdl.toml
147#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, schemars::JsonSchema)]
148#[schemars(
149    description = "rumdl configuration for linting Markdown files. Rules can be configured individually using [MD###] sections with rule-specific options."
150)]
151pub struct Config {
152    /// Global configuration options
153    #[serde(default)]
154    pub global: GlobalConfig,
155
156    /// Per-file rule ignores: maps file patterns to lists of rules to ignore
157    /// Example: { "README.md": ["MD033"], "docs/**/*.md": ["MD013"] }
158    #[serde(default, rename = "per-file-ignores")]
159    pub per_file_ignores: HashMap<String, Vec<String>>,
160
161    /// Rule-specific configurations (e.g., MD013, MD007, MD044)
162    /// Each rule section can contain options specific to that rule.
163    ///
164    /// Common examples:
165    /// - MD013: line_length, code_blocks, tables, headings
166    /// - MD007: indent
167    /// - MD003: style ("atx", "atx_closed", "setext")
168    /// - MD044: names (array of proper names to check)
169    ///
170    /// See https://github.com/rvben/rumdl for full rule documentation.
171    #[serde(flatten)]
172    pub rules: BTreeMap<String, RuleConfig>,
173}
174
175impl Config {
176    /// Check if the Markdown flavor is set to MkDocs
177    pub fn is_mkdocs_flavor(&self) -> bool {
178        self.global.flavor == MarkdownFlavor::MkDocs
179    }
180
181    // Future methods for when GFM and CommonMark are implemented:
182    // pub fn is_gfm_flavor(&self) -> bool
183    // pub fn is_commonmark_flavor(&self) -> bool
184
185    /// Get the configured Markdown flavor
186    pub fn markdown_flavor(&self) -> MarkdownFlavor {
187        self.global.flavor
188    }
189
190    /// Legacy method for backwards compatibility - redirects to is_mkdocs_flavor
191    pub fn is_mkdocs_project(&self) -> bool {
192        self.is_mkdocs_flavor()
193    }
194
195    /// Get the set of rules that should be ignored for a specific file based on per-file-ignores configuration
196    /// Returns a HashSet of rule names (uppercase, e.g., "MD033") that match the given file path
197    pub fn get_ignored_rules_for_file(&self, file_path: &Path) -> HashSet<String> {
198        use globset::{Glob, GlobSetBuilder};
199
200        let mut ignored_rules = HashSet::new();
201
202        if self.per_file_ignores.is_empty() {
203            return ignored_rules;
204        }
205
206        // Build a globset for efficient matching
207        let mut builder = GlobSetBuilder::new();
208        let mut pattern_to_rules: Vec<(usize, &Vec<String>)> = Vec::new();
209
210        for (idx, (pattern, rules)) in self.per_file_ignores.iter().enumerate() {
211            if let Ok(glob) = Glob::new(pattern) {
212                builder.add(glob);
213                pattern_to_rules.push((idx, rules));
214            } else {
215                log::warn!("Invalid glob pattern in per-file-ignores: {pattern}");
216            }
217        }
218
219        let globset = match builder.build() {
220            Ok(gs) => gs,
221            Err(e) => {
222                log::error!("Failed to build globset for per-file-ignores: {e}");
223                return ignored_rules;
224            }
225        };
226
227        // Match the file path against all patterns
228        for match_idx in globset.matches(file_path) {
229            if let Some((_, rules)) = pattern_to_rules.get(match_idx) {
230                for rule in rules.iter() {
231                    // Normalize rule names to uppercase (MD033, md033 -> MD033)
232                    ignored_rules.insert(normalize_key(rule));
233                }
234            }
235        }
236
237        ignored_rules
238    }
239}
240
241/// Global configuration options
242#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, schemars::JsonSchema)]
243#[serde(default)]
244pub struct GlobalConfig {
245    /// Enabled rules
246    #[serde(default)]
247    pub enable: Vec<String>,
248
249    /// Disabled rules
250    #[serde(default)]
251    pub disable: Vec<String>,
252
253    /// Files to exclude
254    #[serde(default)]
255    pub exclude: Vec<String>,
256
257    /// Files to include
258    #[serde(default)]
259    pub include: Vec<String>,
260
261    /// Respect .gitignore files when scanning directories
262    #[serde(default = "default_respect_gitignore")]
263    pub respect_gitignore: bool,
264
265    /// Global line length setting (used by MD013 and other rules if not overridden)
266    #[serde(default = "default_line_length")]
267    pub line_length: u64,
268
269    /// Output format for linting results (e.g., "text", "json", "pylint", etc.)
270    #[serde(skip_serializing_if = "Option::is_none")]
271    pub output_format: Option<String>,
272
273    /// Rules that are allowed to be fixed when --fix is used
274    /// If specified, only these rules will be fixed
275    #[serde(default)]
276    pub fixable: Vec<String>,
277
278    /// Rules that should never be fixed, even when --fix is used
279    /// Takes precedence over fixable
280    #[serde(default)]
281    pub unfixable: Vec<String>,
282
283    /// Markdown flavor/dialect to use (mkdocs, gfm, commonmark, etc.)
284    /// When set, adjusts parsing and validation rules for that specific Markdown variant
285    #[serde(default)]
286    pub flavor: MarkdownFlavor,
287
288    /// [DEPRECATED] Whether to enforce exclude patterns for explicitly passed paths.
289    /// This option is deprecated as of v0.0.156 and has no effect.
290    /// Exclude patterns are now always respected, even for explicitly provided files.
291    /// This prevents duplication between rumdl config and tool configs like pre-commit.
292    #[serde(default)]
293    #[deprecated(since = "0.0.156", note = "Exclude patterns are now always respected")]
294    pub force_exclude: bool,
295}
296
297fn default_respect_gitignore() -> bool {
298    true
299}
300
301fn default_line_length() -> u64 {
302    80
303}
304
305// Add the Default impl
306impl Default for GlobalConfig {
307    #[allow(deprecated)]
308    fn default() -> Self {
309        Self {
310            enable: Vec::new(),
311            disable: Vec::new(),
312            exclude: Vec::new(),
313            include: Vec::new(),
314            respect_gitignore: true,
315            line_length: 80,
316            output_format: None,
317            fixable: Vec::new(),
318            unfixable: Vec::new(),
319            flavor: MarkdownFlavor::default(),
320            force_exclude: false,
321        }
322    }
323}
324
325const MARKDOWNLINT_CONFIG_FILES: &[&str] = &[
326    ".markdownlint.json",
327    ".markdownlint.jsonc",
328    ".markdownlint.yaml",
329    ".markdownlint.yml",
330    "markdownlint.json",
331    "markdownlint.jsonc",
332    "markdownlint.yaml",
333    "markdownlint.yml",
334];
335
336/// Create a default configuration file at the specified path
337pub fn create_default_config(path: &str) -> Result<(), ConfigError> {
338    // Check if file already exists
339    if Path::new(path).exists() {
340        return Err(ConfigError::FileExists { path: path.to_string() });
341    }
342
343    // Default configuration content
344    let default_config = r#"# rumdl configuration file
345
346# Global configuration options
347[global]
348# List of rules to disable (uncomment and modify as needed)
349# disable = ["MD013", "MD033"]
350
351# List of rules to enable exclusively (if provided, only these rules will run)
352# enable = ["MD001", "MD003", "MD004"]
353
354# List of file/directory patterns to include for linting (if provided, only these will be linted)
355# include = [
356#    "docs/*.md",
357#    "src/**/*.md",
358#    "README.md"
359# ]
360
361# List of file/directory patterns to exclude from linting
362exclude = [
363    # Common directories to exclude
364    ".git",
365    ".github",
366    "node_modules",
367    "vendor",
368    "dist",
369    "build",
370
371    # Specific files or patterns
372    "CHANGELOG.md",
373    "LICENSE.md",
374]
375
376# Respect .gitignore files when scanning directories (default: true)
377respect-gitignore = true
378
379# Markdown flavor/dialect (uncomment to enable)
380# Options: mkdocs, gfm, commonmark
381# flavor = "mkdocs"
382
383# Rule-specific configurations (uncomment and modify as needed)
384
385# [MD003]
386# style = "atx"  # Heading style (atx, atx_closed, setext)
387
388# [MD004]
389# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
390
391# [MD007]
392# indent = 4  # Unordered list indentation
393
394# [MD013]
395# line-length = 100  # Line length
396# code-blocks = false  # Exclude code blocks from line length check
397# tables = false  # Exclude tables from line length check
398# headings = true  # Include headings in line length check
399
400# [MD044]
401# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
402# code-blocks = false  # Check code blocks for proper names (default: false, skips code blocks)
403"#;
404
405    // Write the default configuration to the file
406    match fs::write(path, default_config) {
407        Ok(_) => Ok(()),
408        Err(err) => Err(ConfigError::IoError {
409            source: err,
410            path: path.to_string(),
411        }),
412    }
413}
414
415/// Errors that can occur when loading configuration
416#[derive(Debug, thiserror::Error)]
417pub enum ConfigError {
418    /// Failed to read the configuration file
419    #[error("Failed to read config file at {path}: {source}")]
420    IoError { source: io::Error, path: String },
421
422    /// Failed to parse the configuration content (TOML or JSON)
423    #[error("Failed to parse config: {0}")]
424    ParseError(String),
425
426    /// Configuration file already exists
427    #[error("Configuration file already exists at {path}")]
428    FileExists { path: String },
429}
430
431/// Get a rule-specific configuration value
432/// Automatically tries both the original key and normalized variants (kebab-case ↔ snake_case)
433/// for better markdownlint compatibility
434pub fn get_rule_config_value<T: serde::de::DeserializeOwned>(config: &Config, rule_name: &str, key: &str) -> Option<T> {
435    let norm_rule_name = rule_name.to_ascii_uppercase(); // Use uppercase for lookup
436
437    let rule_config = config.rules.get(&norm_rule_name)?;
438
439    // Try multiple key variants to support both underscore and kebab-case formats
440    let key_variants = [
441        key.to_string(),       // Original key as provided
442        normalize_key(key),    // Normalized key (lowercase, kebab-case)
443        key.replace('-', "_"), // Convert kebab-case to snake_case
444        key.replace('_', "-"), // Convert snake_case to kebab-case
445    ];
446
447    // Try each variant until we find a match
448    for variant in &key_variants {
449        if let Some(value) = rule_config.values.get(variant)
450            && let Ok(result) = T::deserialize(value.clone())
451        {
452            return Some(result);
453        }
454    }
455
456    None
457}
458
459/// Generate default rumdl configuration for pyproject.toml
460pub fn generate_pyproject_config() -> String {
461    let config_content = r#"
462[tool.rumdl]
463# Global configuration options
464line-length = 100
465disable = []
466exclude = [
467    # Common directories to exclude
468    ".git",
469    ".github",
470    "node_modules",
471    "vendor",
472    "dist",
473    "build",
474]
475respect-gitignore = true
476
477# Rule-specific configurations (uncomment and modify as needed)
478
479# [tool.rumdl.MD003]
480# style = "atx"  # Heading style (atx, atx_closed, setext)
481
482# [tool.rumdl.MD004]
483# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
484
485# [tool.rumdl.MD007]
486# indent = 4  # Unordered list indentation
487
488# [tool.rumdl.MD013]
489# line-length = 100  # Line length
490# code-blocks = false  # Exclude code blocks from line length check
491# tables = false  # Exclude tables from line length check
492# headings = true  # Include headings in line length check
493
494# [tool.rumdl.MD044]
495# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
496# code-blocks = false  # Check code blocks for proper names (default: false, skips code blocks)
497"#;
498
499    config_content.to_string()
500}
501
502#[cfg(test)]
503mod tests {
504    use super::*;
505    use std::fs;
506    use tempfile::tempdir;
507
508    #[test]
509    fn test_flavor_loading() {
510        let temp_dir = tempdir().unwrap();
511        let config_path = temp_dir.path().join(".rumdl.toml");
512        let config_content = r#"
513[global]
514flavor = "mkdocs"
515disable = ["MD001"]
516"#;
517        fs::write(&config_path, config_content).unwrap();
518
519        // Load the config
520        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
521        let config: Config = sourced.into();
522
523        // Check that flavor was loaded
524        assert_eq!(config.global.flavor, MarkdownFlavor::MkDocs);
525        assert!(config.is_mkdocs_flavor());
526        assert!(config.is_mkdocs_project()); // Test backwards compatibility
527        assert_eq!(config.global.disable, vec!["MD001".to_string()]);
528    }
529
530    #[test]
531    fn test_pyproject_toml_root_level_config() {
532        let temp_dir = tempdir().unwrap();
533        let config_path = temp_dir.path().join("pyproject.toml");
534
535        // Create a test pyproject.toml with root-level configuration
536        let content = r#"
537[tool.rumdl]
538line-length = 120
539disable = ["MD033"]
540enable = ["MD001", "MD004"]
541include = ["docs/*.md"]
542exclude = ["node_modules"]
543respect-gitignore = true
544        "#;
545
546        fs::write(&config_path, content).unwrap();
547
548        // Load the config with skip_auto_discovery to avoid environment config files
549        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
550        let config: Config = sourced.into(); // Convert to plain config for assertions
551
552        // Check global settings
553        assert_eq!(config.global.disable, vec!["MD033".to_string()]);
554        assert_eq!(config.global.enable, vec!["MD001".to_string(), "MD004".to_string()]);
555        // Should now contain only the configured pattern since auto-discovery is disabled
556        assert_eq!(config.global.include, vec!["docs/*.md".to_string()]);
557        assert_eq!(config.global.exclude, vec!["node_modules".to_string()]);
558        assert!(config.global.respect_gitignore);
559
560        // Check line-length was correctly added to MD013
561        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
562        assert_eq!(line_length, Some(120));
563    }
564
565    #[test]
566    fn test_pyproject_toml_snake_case_and_kebab_case() {
567        let temp_dir = tempdir().unwrap();
568        let config_path = temp_dir.path().join("pyproject.toml");
569
570        // Test with both kebab-case and snake_case variants
571        let content = r#"
572[tool.rumdl]
573line-length = 150
574respect_gitignore = true
575        "#;
576
577        fs::write(&config_path, content).unwrap();
578
579        // Load the config with skip_auto_discovery to avoid environment config files
580        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
581        let config: Config = sourced.into(); // Convert to plain config for assertions
582
583        // Check settings were correctly loaded
584        assert!(config.global.respect_gitignore);
585        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
586        assert_eq!(line_length, Some(150));
587    }
588
589    #[test]
590    fn test_md013_key_normalization_in_rumdl_toml() {
591        let temp_dir = tempdir().unwrap();
592        let config_path = temp_dir.path().join(".rumdl.toml");
593        let config_content = r#"
594[MD013]
595line_length = 111
596line-length = 222
597"#;
598        fs::write(&config_path, config_content).unwrap();
599        // Load the config with skip_auto_discovery to avoid environment config files
600        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
601        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
602        // Now we should only get the explicitly configured key
603        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
604        assert_eq!(keys, vec!["line-length"]);
605        let val = &rule_cfg.values["line-length"].value;
606        assert_eq!(val.as_integer(), Some(222));
607        // get_rule_config_value should retrieve the value for both snake_case and kebab-case
608        let config: Config = sourced.clone().into();
609        let v1 = get_rule_config_value::<usize>(&config, "MD013", "line_length");
610        let v2 = get_rule_config_value::<usize>(&config, "MD013", "line-length");
611        assert_eq!(v1, Some(222));
612        assert_eq!(v2, Some(222));
613    }
614
615    #[test]
616    fn test_md013_section_case_insensitivity() {
617        let temp_dir = tempdir().unwrap();
618        let config_path = temp_dir.path().join(".rumdl.toml");
619        let config_content = r#"
620[md013]
621line-length = 101
622
623[Md013]
624line-length = 102
625
626[MD013]
627line-length = 103
628"#;
629        fs::write(&config_path, config_content).unwrap();
630        // Load the config with skip_auto_discovery to avoid environment config files
631        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
632        let config: Config = sourced.clone().into();
633        // Only the last section should win, and be present
634        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
635        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
636        assert_eq!(keys, vec!["line-length"]);
637        let val = &rule_cfg.values["line-length"].value;
638        assert_eq!(val.as_integer(), Some(103));
639        let v = get_rule_config_value::<usize>(&config, "MD013", "line-length");
640        assert_eq!(v, Some(103));
641    }
642
643    #[test]
644    fn test_md013_key_snake_and_kebab_case() {
645        let temp_dir = tempdir().unwrap();
646        let config_path = temp_dir.path().join(".rumdl.toml");
647        let config_content = r#"
648[MD013]
649line_length = 201
650line-length = 202
651"#;
652        fs::write(&config_path, config_content).unwrap();
653        // Load the config with skip_auto_discovery to avoid environment config files
654        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
655        let config: Config = sourced.clone().into();
656        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
657        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
658        assert_eq!(keys, vec!["line-length"]);
659        let val = &rule_cfg.values["line-length"].value;
660        assert_eq!(val.as_integer(), Some(202));
661        let v1 = get_rule_config_value::<usize>(&config, "MD013", "line_length");
662        let v2 = get_rule_config_value::<usize>(&config, "MD013", "line-length");
663        assert_eq!(v1, Some(202));
664        assert_eq!(v2, Some(202));
665    }
666
667    #[test]
668    fn test_unknown_rule_section_is_ignored() {
669        let temp_dir = tempdir().unwrap();
670        let config_path = temp_dir.path().join(".rumdl.toml");
671        let config_content = r#"
672[MD999]
673foo = 1
674bar = 2
675[MD013]
676line-length = 303
677"#;
678        fs::write(&config_path, config_content).unwrap();
679        // Load the config with skip_auto_discovery to avoid environment config files
680        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
681        let config: Config = sourced.clone().into();
682        // MD999 should not be present
683        assert!(!sourced.rules.contains_key("MD999"));
684        // MD013 should be present and correct
685        let v = get_rule_config_value::<usize>(&config, "MD013", "line-length");
686        assert_eq!(v, Some(303));
687    }
688
689    #[test]
690    fn test_invalid_toml_syntax() {
691        let temp_dir = tempdir().unwrap();
692        let config_path = temp_dir.path().join(".rumdl.toml");
693
694        // Invalid TOML with unclosed string
695        let config_content = r#"
696[MD013]
697line-length = "unclosed string
698"#;
699        fs::write(&config_path, config_content).unwrap();
700
701        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
702        assert!(result.is_err());
703        match result.unwrap_err() {
704            ConfigError::ParseError(msg) => {
705                // The actual error message from toml parser might vary
706                assert!(msg.contains("expected") || msg.contains("invalid") || msg.contains("unterminated"));
707            }
708            _ => panic!("Expected ParseError"),
709        }
710    }
711
712    #[test]
713    fn test_wrong_type_for_config_value() {
714        let temp_dir = tempdir().unwrap();
715        let config_path = temp_dir.path().join(".rumdl.toml");
716
717        // line-length should be a number, not a string
718        let config_content = r#"
719[MD013]
720line-length = "not a number"
721"#;
722        fs::write(&config_path, config_content).unwrap();
723
724        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
725        let config: Config = sourced.into();
726
727        // The value should be loaded as a string, not converted
728        let rule_config = config.rules.get("MD013").unwrap();
729        let value = rule_config.values.get("line-length").unwrap();
730        assert!(matches!(value, toml::Value::String(_)));
731    }
732
733    #[test]
734    fn test_empty_config_file() {
735        let temp_dir = tempdir().unwrap();
736        let config_path = temp_dir.path().join(".rumdl.toml");
737
738        // Empty file
739        fs::write(&config_path, "").unwrap();
740
741        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
742        let config: Config = sourced.into();
743
744        // Should have default values
745        assert_eq!(config.global.line_length, 80);
746        assert!(config.global.respect_gitignore);
747        assert!(config.rules.is_empty());
748    }
749
750    #[test]
751    fn test_malformed_pyproject_toml() {
752        let temp_dir = tempdir().unwrap();
753        let config_path = temp_dir.path().join("pyproject.toml");
754
755        // Missing closing bracket
756        let content = r#"
757[tool.rumdl
758line-length = 120
759"#;
760        fs::write(&config_path, content).unwrap();
761
762        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
763        assert!(result.is_err());
764    }
765
766    #[test]
767    fn test_conflicting_config_values() {
768        let temp_dir = tempdir().unwrap();
769        let config_path = temp_dir.path().join(".rumdl.toml");
770
771        // Both enable and disable the same rule - these need to be in a global section
772        let config_content = r#"
773[global]
774enable = ["MD013"]
775disable = ["MD013"]
776"#;
777        fs::write(&config_path, config_content).unwrap();
778
779        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
780        let config: Config = sourced.into();
781
782        // Conflict resolution: enable wins over disable
783        assert!(config.global.enable.contains(&"MD013".to_string()));
784        assert!(!config.global.disable.contains(&"MD013".to_string()));
785    }
786
787    #[test]
788    fn test_invalid_rule_names() {
789        let temp_dir = tempdir().unwrap();
790        let config_path = temp_dir.path().join(".rumdl.toml");
791
792        let config_content = r#"
793[global]
794enable = ["MD001", "NOT_A_RULE", "md002", "12345"]
795disable = ["MD-001", "MD_002"]
796"#;
797        fs::write(&config_path, config_content).unwrap();
798
799        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
800        let config: Config = sourced.into();
801
802        // All values should be preserved as-is
803        assert_eq!(config.global.enable.len(), 4);
804        assert_eq!(config.global.disable.len(), 2);
805    }
806
807    #[test]
808    fn test_deeply_nested_config() {
809        let temp_dir = tempdir().unwrap();
810        let config_path = temp_dir.path().join(".rumdl.toml");
811
812        // This should be ignored as we don't support nested tables within rule configs
813        let config_content = r#"
814[MD013]
815line-length = 100
816[MD013.nested]
817value = 42
818"#;
819        fs::write(&config_path, config_content).unwrap();
820
821        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
822        let config: Config = sourced.into();
823
824        let rule_config = config.rules.get("MD013").unwrap();
825        assert_eq!(
826            rule_config.values.get("line-length").unwrap(),
827            &toml::Value::Integer(100)
828        );
829        // Nested table should not be present
830        assert!(!rule_config.values.contains_key("nested"));
831    }
832
833    #[test]
834    fn test_unicode_in_config() {
835        let temp_dir = tempdir().unwrap();
836        let config_path = temp_dir.path().join(".rumdl.toml");
837
838        let config_content = r#"
839[global]
840include = ["文档/*.md", "ドキュメント/*.md"]
841exclude = ["测试/*", "🚀/*"]
842
843[MD013]
844line-length = 80
845message = "行太长了 🚨"
846"#;
847        fs::write(&config_path, config_content).unwrap();
848
849        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
850        let config: Config = sourced.into();
851
852        assert_eq!(config.global.include.len(), 2);
853        assert_eq!(config.global.exclude.len(), 2);
854        assert!(config.global.include[0].contains("文档"));
855        assert!(config.global.exclude[1].contains("🚀"));
856
857        let rule_config = config.rules.get("MD013").unwrap();
858        let message = rule_config.values.get("message").unwrap();
859        if let toml::Value::String(s) = message {
860            assert!(s.contains("行太长了"));
861            assert!(s.contains("🚨"));
862        }
863    }
864
865    #[test]
866    fn test_extremely_long_values() {
867        let temp_dir = tempdir().unwrap();
868        let config_path = temp_dir.path().join(".rumdl.toml");
869
870        let long_string = "a".repeat(10000);
871        let config_content = format!(
872            r#"
873[global]
874exclude = ["{long_string}"]
875
876[MD013]
877line-length = 999999999
878"#
879        );
880
881        fs::write(&config_path, config_content).unwrap();
882
883        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
884        let config: Config = sourced.into();
885
886        assert_eq!(config.global.exclude[0].len(), 10000);
887        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
888        assert_eq!(line_length, Some(999999999));
889    }
890
891    #[test]
892    fn test_config_with_comments() {
893        let temp_dir = tempdir().unwrap();
894        let config_path = temp_dir.path().join(".rumdl.toml");
895
896        let config_content = r#"
897[global]
898# This is a comment
899enable = ["MD001"] # Enable MD001
900# disable = ["MD002"] # This is commented out
901
902[MD013] # Line length rule
903line-length = 100 # Set to 100 characters
904# ignored = true # This setting is commented out
905"#;
906        fs::write(&config_path, config_content).unwrap();
907
908        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
909        let config: Config = sourced.into();
910
911        assert_eq!(config.global.enable, vec!["MD001"]);
912        assert!(config.global.disable.is_empty()); // Commented out
913
914        let rule_config = config.rules.get("MD013").unwrap();
915        assert_eq!(rule_config.values.len(), 1); // Only line-length
916        assert!(!rule_config.values.contains_key("ignored"));
917    }
918
919    #[test]
920    fn test_arrays_in_rule_config() {
921        let temp_dir = tempdir().unwrap();
922        let config_path = temp_dir.path().join(".rumdl.toml");
923
924        let config_content = r#"
925[MD003]
926levels = [1, 2, 3]
927tags = ["important", "critical"]
928mixed = [1, "two", true]
929"#;
930        fs::write(&config_path, config_content).unwrap();
931
932        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
933        let config: Config = sourced.into();
934
935        // Arrays should now be properly parsed
936        let rule_config = config.rules.get("MD003").expect("MD003 config should exist");
937
938        // Check that arrays are present and correctly parsed
939        assert!(rule_config.values.contains_key("levels"));
940        assert!(rule_config.values.contains_key("tags"));
941        assert!(rule_config.values.contains_key("mixed"));
942
943        // Verify array contents
944        if let Some(toml::Value::Array(levels)) = rule_config.values.get("levels") {
945            assert_eq!(levels.len(), 3);
946            assert_eq!(levels[0], toml::Value::Integer(1));
947            assert_eq!(levels[1], toml::Value::Integer(2));
948            assert_eq!(levels[2], toml::Value::Integer(3));
949        } else {
950            panic!("levels should be an array");
951        }
952
953        if let Some(toml::Value::Array(tags)) = rule_config.values.get("tags") {
954            assert_eq!(tags.len(), 2);
955            assert_eq!(tags[0], toml::Value::String("important".to_string()));
956            assert_eq!(tags[1], toml::Value::String("critical".to_string()));
957        } else {
958            panic!("tags should be an array");
959        }
960
961        if let Some(toml::Value::Array(mixed)) = rule_config.values.get("mixed") {
962            assert_eq!(mixed.len(), 3);
963            assert_eq!(mixed[0], toml::Value::Integer(1));
964            assert_eq!(mixed[1], toml::Value::String("two".to_string()));
965            assert_eq!(mixed[2], toml::Value::Boolean(true));
966        } else {
967            panic!("mixed should be an array");
968        }
969    }
970
971    #[test]
972    fn test_normalize_key_edge_cases() {
973        // Rule names
974        assert_eq!(normalize_key("MD001"), "MD001");
975        assert_eq!(normalize_key("md001"), "MD001");
976        assert_eq!(normalize_key("Md001"), "MD001");
977        assert_eq!(normalize_key("mD001"), "MD001");
978
979        // Non-rule names
980        assert_eq!(normalize_key("line_length"), "line-length");
981        assert_eq!(normalize_key("line-length"), "line-length");
982        assert_eq!(normalize_key("LINE_LENGTH"), "line-length");
983        assert_eq!(normalize_key("respect_gitignore"), "respect-gitignore");
984
985        // Edge cases
986        assert_eq!(normalize_key("MD"), "md"); // Too short to be a rule
987        assert_eq!(normalize_key("MD00"), "md00"); // Too short
988        assert_eq!(normalize_key("MD0001"), "md0001"); // Too long
989        assert_eq!(normalize_key("MDabc"), "mdabc"); // Non-digit
990        assert_eq!(normalize_key("MD00a"), "md00a"); // Partial digit
991        assert_eq!(normalize_key(""), "");
992        assert_eq!(normalize_key("_"), "-");
993        assert_eq!(normalize_key("___"), "---");
994    }
995
996    #[test]
997    fn test_missing_config_file() {
998        let temp_dir = tempdir().unwrap();
999        let config_path = temp_dir.path().join("nonexistent.toml");
1000
1001        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
1002        assert!(result.is_err());
1003        match result.unwrap_err() {
1004            ConfigError::IoError { .. } => {}
1005            _ => panic!("Expected IoError for missing file"),
1006        }
1007    }
1008
1009    #[test]
1010    #[cfg(unix)]
1011    fn test_permission_denied_config() {
1012        use std::os::unix::fs::PermissionsExt;
1013
1014        let temp_dir = tempdir().unwrap();
1015        let config_path = temp_dir.path().join(".rumdl.toml");
1016
1017        fs::write(&config_path, "enable = [\"MD001\"]").unwrap();
1018
1019        // Remove read permissions
1020        let mut perms = fs::metadata(&config_path).unwrap().permissions();
1021        perms.set_mode(0o000);
1022        fs::set_permissions(&config_path, perms).unwrap();
1023
1024        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
1025
1026        // Restore permissions for cleanup
1027        let mut perms = fs::metadata(&config_path).unwrap().permissions();
1028        perms.set_mode(0o644);
1029        fs::set_permissions(&config_path, perms).unwrap();
1030
1031        assert!(result.is_err());
1032        match result.unwrap_err() {
1033            ConfigError::IoError { .. } => {}
1034            _ => panic!("Expected IoError for permission denied"),
1035        }
1036    }
1037
1038    #[test]
1039    fn test_circular_reference_detection() {
1040        // This test is more conceptual since TOML doesn't support circular references
1041        // But we test that deeply nested structures don't cause stack overflow
1042        let temp_dir = tempdir().unwrap();
1043        let config_path = temp_dir.path().join(".rumdl.toml");
1044
1045        let mut config_content = String::from("[MD001]\n");
1046        for i in 0..100 {
1047            config_content.push_str(&format!("key{i} = {i}\n"));
1048        }
1049
1050        fs::write(&config_path, config_content).unwrap();
1051
1052        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1053        let config: Config = sourced.into();
1054
1055        let rule_config = config.rules.get("MD001").unwrap();
1056        assert_eq!(rule_config.values.len(), 100);
1057    }
1058
1059    #[test]
1060    fn test_special_toml_values() {
1061        let temp_dir = tempdir().unwrap();
1062        let config_path = temp_dir.path().join(".rumdl.toml");
1063
1064        let config_content = r#"
1065[MD001]
1066infinity = inf
1067neg_infinity = -inf
1068not_a_number = nan
1069datetime = 1979-05-27T07:32:00Z
1070local_date = 1979-05-27
1071local_time = 07:32:00
1072"#;
1073        fs::write(&config_path, config_content).unwrap();
1074
1075        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1076        let config: Config = sourced.into();
1077
1078        // Some values might not be parsed due to parser limitations
1079        if let Some(rule_config) = config.rules.get("MD001") {
1080            // Check special float values if present
1081            if let Some(toml::Value::Float(f)) = rule_config.values.get("infinity") {
1082                assert!(f.is_infinite() && f.is_sign_positive());
1083            }
1084            if let Some(toml::Value::Float(f)) = rule_config.values.get("neg_infinity") {
1085                assert!(f.is_infinite() && f.is_sign_negative());
1086            }
1087            if let Some(toml::Value::Float(f)) = rule_config.values.get("not_a_number") {
1088                assert!(f.is_nan());
1089            }
1090
1091            // Check datetime values if present
1092            if let Some(val) = rule_config.values.get("datetime") {
1093                assert!(matches!(val, toml::Value::Datetime(_)));
1094            }
1095            // Note: local_date and local_time might not be parsed by the current implementation
1096        }
1097    }
1098
1099    #[test]
1100    fn test_default_config_passes_validation() {
1101        use crate::rules;
1102
1103        let temp_dir = tempdir().unwrap();
1104        let config_path = temp_dir.path().join(".rumdl.toml");
1105        let config_path_str = config_path.to_str().unwrap();
1106
1107        // Create the default config using the same function that `rumdl init` uses
1108        create_default_config(config_path_str).unwrap();
1109
1110        // Load it back as a SourcedConfig
1111        let sourced =
1112            SourcedConfig::load(Some(config_path_str), None).expect("Default config should load successfully");
1113
1114        // Create the rule registry
1115        let all_rules = rules::all_rules(&Config::default());
1116        let registry = RuleRegistry::from_rules(&all_rules);
1117
1118        // Validate the config
1119        let warnings = validate_config_sourced(&sourced, &registry);
1120
1121        // The default config should have no warnings
1122        if !warnings.is_empty() {
1123            for warning in &warnings {
1124                eprintln!("Config validation warning: {}", warning.message);
1125                if let Some(rule) = &warning.rule {
1126                    eprintln!("  Rule: {rule}");
1127                }
1128                if let Some(key) = &warning.key {
1129                    eprintln!("  Key: {key}");
1130                }
1131            }
1132        }
1133        assert!(
1134            warnings.is_empty(),
1135            "Default config from rumdl init should pass validation without warnings"
1136        );
1137    }
1138
1139    #[test]
1140    fn test_per_file_ignores_config_parsing() {
1141        let temp_dir = tempdir().unwrap();
1142        let config_path = temp_dir.path().join(".rumdl.toml");
1143        let config_content = r#"
1144[per-file-ignores]
1145"README.md" = ["MD033"]
1146"docs/**/*.md" = ["MD013", "MD033"]
1147"test/*.md" = ["MD041"]
1148"#;
1149        fs::write(&config_path, config_content).unwrap();
1150
1151        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1152        let config: Config = sourced.into();
1153
1154        // Verify per-file-ignores was loaded
1155        assert_eq!(config.per_file_ignores.len(), 3);
1156        assert_eq!(
1157            config.per_file_ignores.get("README.md"),
1158            Some(&vec!["MD033".to_string()])
1159        );
1160        assert_eq!(
1161            config.per_file_ignores.get("docs/**/*.md"),
1162            Some(&vec!["MD013".to_string(), "MD033".to_string()])
1163        );
1164        assert_eq!(
1165            config.per_file_ignores.get("test/*.md"),
1166            Some(&vec!["MD041".to_string()])
1167        );
1168    }
1169
1170    #[test]
1171    fn test_per_file_ignores_glob_matching() {
1172        use std::path::PathBuf;
1173
1174        let temp_dir = tempdir().unwrap();
1175        let config_path = temp_dir.path().join(".rumdl.toml");
1176        let config_content = r#"
1177[per-file-ignores]
1178"README.md" = ["MD033"]
1179"docs/**/*.md" = ["MD013"]
1180"**/test_*.md" = ["MD041"]
1181"#;
1182        fs::write(&config_path, config_content).unwrap();
1183
1184        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1185        let config: Config = sourced.into();
1186
1187        // Test exact match
1188        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1189        assert!(ignored.contains("MD033"));
1190        assert_eq!(ignored.len(), 1);
1191
1192        // Test glob pattern matching
1193        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("docs/api/overview.md"));
1194        assert!(ignored.contains("MD013"));
1195        assert_eq!(ignored.len(), 1);
1196
1197        // Test recursive glob pattern
1198        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("tests/fixtures/test_example.md"));
1199        assert!(ignored.contains("MD041"));
1200        assert_eq!(ignored.len(), 1);
1201
1202        // Test non-matching path
1203        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("other/file.md"));
1204        assert!(ignored.is_empty());
1205    }
1206
1207    #[test]
1208    fn test_per_file_ignores_pyproject_toml() {
1209        let temp_dir = tempdir().unwrap();
1210        let config_path = temp_dir.path().join("pyproject.toml");
1211        let config_content = r#"
1212[tool.rumdl]
1213[tool.rumdl.per-file-ignores]
1214"README.md" = ["MD033", "MD013"]
1215"generated/*.md" = ["MD041"]
1216"#;
1217        fs::write(&config_path, config_content).unwrap();
1218
1219        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1220        let config: Config = sourced.into();
1221
1222        // Verify per-file-ignores was loaded from pyproject.toml
1223        assert_eq!(config.per_file_ignores.len(), 2);
1224        assert_eq!(
1225            config.per_file_ignores.get("README.md"),
1226            Some(&vec!["MD033".to_string(), "MD013".to_string()])
1227        );
1228        assert_eq!(
1229            config.per_file_ignores.get("generated/*.md"),
1230            Some(&vec!["MD041".to_string()])
1231        );
1232    }
1233
1234    #[test]
1235    fn test_per_file_ignores_multiple_patterns_match() {
1236        use std::path::PathBuf;
1237
1238        let temp_dir = tempdir().unwrap();
1239        let config_path = temp_dir.path().join(".rumdl.toml");
1240        let config_content = r#"
1241[per-file-ignores]
1242"docs/**/*.md" = ["MD013"]
1243"**/api/*.md" = ["MD033"]
1244"docs/api/overview.md" = ["MD041"]
1245"#;
1246        fs::write(&config_path, config_content).unwrap();
1247
1248        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1249        let config: Config = sourced.into();
1250
1251        // File matches multiple patterns - should get union of all rules
1252        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("docs/api/overview.md"));
1253        assert_eq!(ignored.len(), 3);
1254        assert!(ignored.contains("MD013"));
1255        assert!(ignored.contains("MD033"));
1256        assert!(ignored.contains("MD041"));
1257    }
1258
1259    #[test]
1260    fn test_per_file_ignores_rule_name_normalization() {
1261        use std::path::PathBuf;
1262
1263        let temp_dir = tempdir().unwrap();
1264        let config_path = temp_dir.path().join(".rumdl.toml");
1265        let config_content = r#"
1266[per-file-ignores]
1267"README.md" = ["md033", "MD013", "Md041"]
1268"#;
1269        fs::write(&config_path, config_content).unwrap();
1270
1271        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1272        let config: Config = sourced.into();
1273
1274        // All rule names should be normalized to uppercase
1275        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1276        assert_eq!(ignored.len(), 3);
1277        assert!(ignored.contains("MD033"));
1278        assert!(ignored.contains("MD013"));
1279        assert!(ignored.contains("MD041"));
1280    }
1281
1282    #[test]
1283    fn test_per_file_ignores_invalid_glob_pattern() {
1284        use std::path::PathBuf;
1285
1286        let temp_dir = tempdir().unwrap();
1287        let config_path = temp_dir.path().join(".rumdl.toml");
1288        let config_content = r#"
1289[per-file-ignores]
1290"[invalid" = ["MD033"]
1291"valid/*.md" = ["MD013"]
1292"#;
1293        fs::write(&config_path, config_content).unwrap();
1294
1295        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1296        let config: Config = sourced.into();
1297
1298        // Invalid pattern should be skipped, valid pattern should work
1299        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("valid/test.md"));
1300        assert!(ignored.contains("MD013"));
1301
1302        // Invalid pattern should not cause issues
1303        let ignored2 = config.get_ignored_rules_for_file(&PathBuf::from("[invalid"));
1304        assert!(ignored2.is_empty());
1305    }
1306
1307    #[test]
1308    fn test_per_file_ignores_empty_section() {
1309        use std::path::PathBuf;
1310
1311        let temp_dir = tempdir().unwrap();
1312        let config_path = temp_dir.path().join(".rumdl.toml");
1313        let config_content = r#"
1314[global]
1315disable = ["MD001"]
1316
1317[per-file-ignores]
1318"#;
1319        fs::write(&config_path, config_content).unwrap();
1320
1321        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1322        let config: Config = sourced.into();
1323
1324        // Empty per-file-ignores should work fine
1325        assert_eq!(config.per_file_ignores.len(), 0);
1326        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1327        assert!(ignored.is_empty());
1328    }
1329
1330    #[test]
1331    fn test_per_file_ignores_with_underscores_in_pyproject() {
1332        let temp_dir = tempdir().unwrap();
1333        let config_path = temp_dir.path().join("pyproject.toml");
1334        let config_content = r#"
1335[tool.rumdl]
1336[tool.rumdl.per_file_ignores]
1337"README.md" = ["MD033"]
1338"#;
1339        fs::write(&config_path, config_content).unwrap();
1340
1341        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1342        let config: Config = sourced.into();
1343
1344        // Should support both per-file-ignores and per_file_ignores
1345        assert_eq!(config.per_file_ignores.len(), 1);
1346        assert_eq!(
1347            config.per_file_ignores.get("README.md"),
1348            Some(&vec!["MD033".to_string()])
1349        );
1350    }
1351
1352    #[test]
1353    fn test_generate_json_schema() {
1354        use schemars::schema_for;
1355        use std::env;
1356
1357        let schema = schema_for!(Config);
1358        let schema_json = serde_json::to_string_pretty(&schema).expect("Failed to serialize schema");
1359
1360        // Write schema to file if RUMDL_UPDATE_SCHEMA env var is set
1361        if env::var("RUMDL_UPDATE_SCHEMA").is_ok() {
1362            let schema_path = env::current_dir().unwrap().join("rumdl.schema.json");
1363            fs::write(&schema_path, &schema_json).expect("Failed to write schema file");
1364            println!("Schema written to: {}", schema_path.display());
1365        }
1366
1367        // Basic validation that schema was generated
1368        assert!(schema_json.contains("\"title\": \"Config\""));
1369        assert!(schema_json.contains("\"global\""));
1370        assert!(schema_json.contains("\"per-file-ignores\""));
1371    }
1372
1373    #[test]
1374    fn test_user_config_loaded_with_explicit_project_config() {
1375        // Regression test for issue #131: User config should always be loaded as base layer,
1376        // even when an explicit project config path is provided
1377        let temp_dir = tempdir().unwrap();
1378
1379        // Create a fake user config directory
1380        // Note: user_configuration_path_impl adds /rumdl to the config dir
1381        let user_config_dir = temp_dir.path().join("user_config");
1382        let rumdl_config_dir = user_config_dir.join("rumdl");
1383        fs::create_dir_all(&rumdl_config_dir).unwrap();
1384        let user_config_path = rumdl_config_dir.join("rumdl.toml");
1385
1386        // User config disables MD013 and MD041
1387        let user_config_content = r#"
1388[global]
1389disable = ["MD013", "MD041"]
1390line-length = 100
1391"#;
1392        fs::write(&user_config_path, user_config_content).unwrap();
1393
1394        // Create a project config that enables MD001
1395        let project_config_path = temp_dir.path().join("project").join("pyproject.toml");
1396        fs::create_dir_all(project_config_path.parent().unwrap()).unwrap();
1397        let project_config_content = r#"
1398[tool.rumdl]
1399enable = ["MD001"]
1400"#;
1401        fs::write(&project_config_path, project_config_content).unwrap();
1402
1403        // Load config with explicit project path, passing user_config_dir
1404        let sourced = SourcedConfig::load_with_discovery_impl(
1405            Some(project_config_path.to_str().unwrap()),
1406            None,
1407            false,
1408            Some(&user_config_dir),
1409        )
1410        .unwrap();
1411
1412        let config: Config = sourced.into();
1413
1414        // User config settings should be preserved
1415        assert!(
1416            config.global.disable.contains(&"MD013".to_string()),
1417            "User config disabled rules should be preserved"
1418        );
1419        assert!(
1420            config.global.disable.contains(&"MD041".to_string()),
1421            "User config disabled rules should be preserved"
1422        );
1423
1424        // Project config settings should also be applied (merged on top)
1425        assert!(
1426            config.global.enable.contains(&"MD001".to_string()),
1427            "Project config enabled rules should be applied"
1428        );
1429    }
1430}
1431
1432/// Configuration source with clear precedence hierarchy.
1433///
1434/// Precedence order (lower values override higher values):
1435/// - Default (0): Built-in defaults
1436/// - UserConfig (1): User-level ~/.config/rumdl/rumdl.toml
1437/// - PyprojectToml (2): Project-level pyproject.toml
1438/// - ProjectConfig (3): Project-level .rumdl.toml (most specific)
1439/// - Cli (4): Command-line flags (highest priority)
1440#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1441pub enum ConfigSource {
1442    /// Built-in default configuration
1443    Default,
1444    /// User-level configuration from ~/.config/rumdl/rumdl.toml
1445    UserConfig,
1446    /// Project-level configuration from pyproject.toml
1447    PyprojectToml,
1448    /// Project-level configuration from .rumdl.toml or rumdl.toml
1449    ProjectConfig,
1450    /// Command-line flags (highest precedence)
1451    Cli,
1452}
1453
1454#[derive(Debug, Clone)]
1455pub struct ConfigOverride<T> {
1456    pub value: T,
1457    pub source: ConfigSource,
1458    pub file: Option<String>,
1459    pub line: Option<usize>,
1460}
1461
1462#[derive(Debug, Clone)]
1463pub struct SourcedValue<T> {
1464    pub value: T,
1465    pub source: ConfigSource,
1466    pub overrides: Vec<ConfigOverride<T>>,
1467}
1468
1469impl<T: Clone> SourcedValue<T> {
1470    pub fn new(value: T, source: ConfigSource) -> Self {
1471        Self {
1472            value: value.clone(),
1473            source,
1474            overrides: vec![ConfigOverride {
1475                value,
1476                source,
1477                file: None,
1478                line: None,
1479            }],
1480        }
1481    }
1482
1483    /// Merges a new override into this SourcedValue based on source precedence.
1484    /// If the new source has higher or equal precedence, the value and source are updated,
1485    /// and the new override is added to the history.
1486    pub fn merge_override(
1487        &mut self,
1488        new_value: T,
1489        new_source: ConfigSource,
1490        new_file: Option<String>,
1491        new_line: Option<usize>,
1492    ) {
1493        // Helper function to get precedence, defined locally or globally
1494        fn source_precedence(src: ConfigSource) -> u8 {
1495            match src {
1496                ConfigSource::Default => 0,
1497                ConfigSource::UserConfig => 1,
1498                ConfigSource::PyprojectToml => 2,
1499                ConfigSource::ProjectConfig => 3,
1500                ConfigSource::Cli => 4,
1501            }
1502        }
1503
1504        if source_precedence(new_source) >= source_precedence(self.source) {
1505            self.value = new_value.clone();
1506            self.source = new_source;
1507            self.overrides.push(ConfigOverride {
1508                value: new_value,
1509                source: new_source,
1510                file: new_file,
1511                line: new_line,
1512            });
1513        }
1514    }
1515
1516    pub fn push_override(&mut self, value: T, source: ConfigSource, file: Option<String>, line: Option<usize>) {
1517        // This is essentially merge_override without the precedence check
1518        // We might consolidate these later, but keep separate for now during refactor
1519        self.value = value.clone();
1520        self.source = source;
1521        self.overrides.push(ConfigOverride {
1522            value,
1523            source,
1524            file,
1525            line,
1526        });
1527    }
1528}
1529
1530impl<T: Clone + Eq + std::hash::Hash> SourcedValue<Vec<T>> {
1531    /// Merges a new value using union semantics (for arrays like `disable`)
1532    /// Values from both sources are combined, with deduplication
1533    pub fn merge_union(
1534        &mut self,
1535        new_value: Vec<T>,
1536        new_source: ConfigSource,
1537        new_file: Option<String>,
1538        new_line: Option<usize>,
1539    ) {
1540        fn source_precedence(src: ConfigSource) -> u8 {
1541            match src {
1542                ConfigSource::Default => 0,
1543                ConfigSource::UserConfig => 1,
1544                ConfigSource::PyprojectToml => 2,
1545                ConfigSource::ProjectConfig => 3,
1546                ConfigSource::Cli => 4,
1547            }
1548        }
1549
1550        if source_precedence(new_source) >= source_precedence(self.source) {
1551            // Union: combine values from both sources with deduplication
1552            let mut combined = self.value.clone();
1553            for item in new_value.iter() {
1554                if !combined.contains(item) {
1555                    combined.push(item.clone());
1556                }
1557            }
1558
1559            self.value = combined;
1560            self.source = new_source;
1561            self.overrides.push(ConfigOverride {
1562                value: new_value,
1563                source: new_source,
1564                file: new_file,
1565                line: new_line,
1566            });
1567        }
1568    }
1569}
1570
1571#[derive(Debug, Clone)]
1572pub struct SourcedGlobalConfig {
1573    pub enable: SourcedValue<Vec<String>>,
1574    pub disable: SourcedValue<Vec<String>>,
1575    pub exclude: SourcedValue<Vec<String>>,
1576    pub include: SourcedValue<Vec<String>>,
1577    pub respect_gitignore: SourcedValue<bool>,
1578    pub line_length: SourcedValue<u64>,
1579    pub output_format: Option<SourcedValue<String>>,
1580    pub fixable: SourcedValue<Vec<String>>,
1581    pub unfixable: SourcedValue<Vec<String>>,
1582    pub flavor: SourcedValue<MarkdownFlavor>,
1583    pub force_exclude: SourcedValue<bool>,
1584}
1585
1586impl Default for SourcedGlobalConfig {
1587    fn default() -> Self {
1588        SourcedGlobalConfig {
1589            enable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1590            disable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1591            exclude: SourcedValue::new(Vec::new(), ConfigSource::Default),
1592            include: SourcedValue::new(Vec::new(), ConfigSource::Default),
1593            respect_gitignore: SourcedValue::new(true, ConfigSource::Default),
1594            line_length: SourcedValue::new(80, ConfigSource::Default),
1595            output_format: None,
1596            fixable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1597            unfixable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1598            flavor: SourcedValue::new(MarkdownFlavor::default(), ConfigSource::Default),
1599            force_exclude: SourcedValue::new(false, ConfigSource::Default),
1600        }
1601    }
1602}
1603
1604#[derive(Debug, Default, Clone)]
1605pub struct SourcedRuleConfig {
1606    pub values: BTreeMap<String, SourcedValue<toml::Value>>,
1607}
1608
1609/// Represents configuration loaded from a single source file, with provenance.
1610/// Used as an intermediate step before merging into the final SourcedConfig.
1611#[derive(Debug, Clone)]
1612pub struct SourcedConfigFragment {
1613    pub global: SourcedGlobalConfig,
1614    pub per_file_ignores: SourcedValue<HashMap<String, Vec<String>>>,
1615    pub rules: BTreeMap<String, SourcedRuleConfig>,
1616    pub unknown_keys: Vec<(String, String, Option<String>)>, // (section, key, file_path)
1617                                                             // Note: loaded_files is tracked globally in SourcedConfig.
1618}
1619
1620impl Default for SourcedConfigFragment {
1621    fn default() -> Self {
1622        Self {
1623            global: SourcedGlobalConfig::default(),
1624            per_file_ignores: SourcedValue::new(HashMap::new(), ConfigSource::Default),
1625            rules: BTreeMap::new(),
1626            unknown_keys: Vec::new(),
1627        }
1628    }
1629}
1630
1631#[derive(Debug, Clone)]
1632pub struct SourcedConfig {
1633    pub global: SourcedGlobalConfig,
1634    pub per_file_ignores: SourcedValue<HashMap<String, Vec<String>>>,
1635    pub rules: BTreeMap<String, SourcedRuleConfig>,
1636    pub loaded_files: Vec<String>,
1637    pub unknown_keys: Vec<(String, String, Option<String>)>, // (section, key, file_path)
1638}
1639
1640impl Default for SourcedConfig {
1641    fn default() -> Self {
1642        Self {
1643            global: SourcedGlobalConfig::default(),
1644            per_file_ignores: SourcedValue::new(HashMap::new(), ConfigSource::Default),
1645            rules: BTreeMap::new(),
1646            loaded_files: Vec::new(),
1647            unknown_keys: Vec::new(),
1648        }
1649    }
1650}
1651
1652impl SourcedConfig {
1653    /// Merges another SourcedConfigFragment into this SourcedConfig.
1654    /// Uses source precedence to determine which values take effect.
1655    fn merge(&mut self, fragment: SourcedConfigFragment) {
1656        // Merge global config
1657        // Enable uses replace semantics (project can enforce rules)
1658        self.global.enable.merge_override(
1659            fragment.global.enable.value,
1660            fragment.global.enable.source,
1661            fragment.global.enable.overrides.first().and_then(|o| o.file.clone()),
1662            fragment.global.enable.overrides.first().and_then(|o| o.line),
1663        );
1664
1665        // Disable uses union semantics (user can add to project disables)
1666        self.global.disable.merge_union(
1667            fragment.global.disable.value,
1668            fragment.global.disable.source,
1669            fragment.global.disable.overrides.first().and_then(|o| o.file.clone()),
1670            fragment.global.disable.overrides.first().and_then(|o| o.line),
1671        );
1672
1673        // Conflict resolution: Enable overrides disable
1674        // Remove any rules from disable that appear in enable
1675        self.global
1676            .disable
1677            .value
1678            .retain(|rule| !self.global.enable.value.contains(rule));
1679        self.global.include.merge_override(
1680            fragment.global.include.value,
1681            fragment.global.include.source,
1682            fragment.global.include.overrides.first().and_then(|o| o.file.clone()),
1683            fragment.global.include.overrides.first().and_then(|o| o.line),
1684        );
1685        self.global.exclude.merge_override(
1686            fragment.global.exclude.value,
1687            fragment.global.exclude.source,
1688            fragment.global.exclude.overrides.first().and_then(|o| o.file.clone()),
1689            fragment.global.exclude.overrides.first().and_then(|o| o.line),
1690        );
1691        self.global.respect_gitignore.merge_override(
1692            fragment.global.respect_gitignore.value,
1693            fragment.global.respect_gitignore.source,
1694            fragment
1695                .global
1696                .respect_gitignore
1697                .overrides
1698                .first()
1699                .and_then(|o| o.file.clone()),
1700            fragment.global.respect_gitignore.overrides.first().and_then(|o| o.line),
1701        );
1702        self.global.line_length.merge_override(
1703            fragment.global.line_length.value,
1704            fragment.global.line_length.source,
1705            fragment
1706                .global
1707                .line_length
1708                .overrides
1709                .first()
1710                .and_then(|o| o.file.clone()),
1711            fragment.global.line_length.overrides.first().and_then(|o| o.line),
1712        );
1713        self.global.fixable.merge_override(
1714            fragment.global.fixable.value,
1715            fragment.global.fixable.source,
1716            fragment.global.fixable.overrides.first().and_then(|o| o.file.clone()),
1717            fragment.global.fixable.overrides.first().and_then(|o| o.line),
1718        );
1719        self.global.unfixable.merge_override(
1720            fragment.global.unfixable.value,
1721            fragment.global.unfixable.source,
1722            fragment.global.unfixable.overrides.first().and_then(|o| o.file.clone()),
1723            fragment.global.unfixable.overrides.first().and_then(|o| o.line),
1724        );
1725
1726        // Merge flavor
1727        self.global.flavor.merge_override(
1728            fragment.global.flavor.value,
1729            fragment.global.flavor.source,
1730            fragment.global.flavor.overrides.first().and_then(|o| o.file.clone()),
1731            fragment.global.flavor.overrides.first().and_then(|o| o.line),
1732        );
1733
1734        // Merge force_exclude
1735        self.global.force_exclude.merge_override(
1736            fragment.global.force_exclude.value,
1737            fragment.global.force_exclude.source,
1738            fragment
1739                .global
1740                .force_exclude
1741                .overrides
1742                .first()
1743                .and_then(|o| o.file.clone()),
1744            fragment.global.force_exclude.overrides.first().and_then(|o| o.line),
1745        );
1746
1747        // Merge output_format if present
1748        if let Some(output_format_fragment) = fragment.global.output_format {
1749            if let Some(ref mut output_format) = self.global.output_format {
1750                output_format.merge_override(
1751                    output_format_fragment.value,
1752                    output_format_fragment.source,
1753                    output_format_fragment.overrides.first().and_then(|o| o.file.clone()),
1754                    output_format_fragment.overrides.first().and_then(|o| o.line),
1755                );
1756            } else {
1757                self.global.output_format = Some(output_format_fragment);
1758            }
1759        }
1760
1761        // Merge per_file_ignores
1762        self.per_file_ignores.merge_override(
1763            fragment.per_file_ignores.value,
1764            fragment.per_file_ignores.source,
1765            fragment.per_file_ignores.overrides.first().and_then(|o| o.file.clone()),
1766            fragment.per_file_ignores.overrides.first().and_then(|o| o.line),
1767        );
1768
1769        // Merge rule configs
1770        for (rule_name, rule_fragment) in fragment.rules {
1771            let norm_rule_name = rule_name.to_ascii_uppercase(); // Normalize to uppercase for case-insensitivity
1772            let rule_entry = self.rules.entry(norm_rule_name).or_default();
1773            for (key, sourced_value_fragment) in rule_fragment.values {
1774                let sv_entry = rule_entry
1775                    .values
1776                    .entry(key.clone())
1777                    .or_insert_with(|| SourcedValue::new(sourced_value_fragment.value.clone(), ConfigSource::Default));
1778                let file_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.file.clone());
1779                let line_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.line);
1780                sv_entry.merge_override(
1781                    sourced_value_fragment.value,  // Use the value from the fragment
1782                    sourced_value_fragment.source, // Use the source from the fragment
1783                    file_from_fragment,            // Pass the file path from the fragment override
1784                    line_from_fragment,            // Pass the line number from the fragment override
1785                );
1786            }
1787        }
1788
1789        // Merge unknown_keys from fragment
1790        for (section, key, file_path) in fragment.unknown_keys {
1791            // Deduplicate: only add if not already present
1792            if !self.unknown_keys.iter().any(|(s, k, _)| s == &section && k == &key) {
1793                self.unknown_keys.push((section, key, file_path));
1794            }
1795        }
1796    }
1797
1798    /// Load and merge configurations from files and CLI overrides.
1799    pub fn load(config_path: Option<&str>, cli_overrides: Option<&SourcedGlobalConfig>) -> Result<Self, ConfigError> {
1800        Self::load_with_discovery(config_path, cli_overrides, false)
1801    }
1802
1803    /// Discover configuration file by traversing up the directory tree.
1804    /// Returns the first configuration file found.
1805    fn discover_config_upward() -> Option<std::path::PathBuf> {
1806        use std::env;
1807
1808        const CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", "pyproject.toml"];
1809        const MAX_DEPTH: usize = 100; // Prevent infinite traversal
1810
1811        let start_dir = match env::current_dir() {
1812            Ok(dir) => dir,
1813            Err(e) => {
1814                log::debug!("[rumdl-config] Failed to get current directory: {e}");
1815                return None;
1816            }
1817        };
1818
1819        let mut current_dir = start_dir.clone();
1820        let mut depth = 0;
1821
1822        loop {
1823            if depth >= MAX_DEPTH {
1824                log::debug!("[rumdl-config] Maximum traversal depth reached");
1825                break;
1826            }
1827
1828            log::debug!("[rumdl-config] Searching for config in: {}", current_dir.display());
1829
1830            // Check for config files in order of precedence
1831            for config_name in CONFIG_FILES {
1832                let config_path = current_dir.join(config_name);
1833
1834                if config_path.exists() {
1835                    // For pyproject.toml, verify it contains [tool.rumdl] section
1836                    if *config_name == "pyproject.toml" {
1837                        if let Ok(content) = std::fs::read_to_string(&config_path) {
1838                            if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
1839                                log::debug!("[rumdl-config] Found config file: {}", config_path.display());
1840                                return Some(config_path);
1841                            }
1842                            log::debug!("[rumdl-config] Found pyproject.toml but no [tool.rumdl] section");
1843                            continue;
1844                        }
1845                    } else {
1846                        log::debug!("[rumdl-config] Found config file: {}", config_path.display());
1847                        return Some(config_path);
1848                    }
1849                }
1850            }
1851
1852            // Check for .git directory (stop boundary)
1853            if current_dir.join(".git").exists() {
1854                log::debug!("[rumdl-config] Stopping at .git directory");
1855                break;
1856            }
1857
1858            // Move to parent directory
1859            match current_dir.parent() {
1860                Some(parent) => {
1861                    current_dir = parent.to_owned();
1862                    depth += 1;
1863                }
1864                None => {
1865                    log::debug!("[rumdl-config] Reached filesystem root");
1866                    break;
1867                }
1868            }
1869        }
1870
1871        None
1872    }
1873
1874    /// Internal implementation that accepts config directory for testing
1875    fn user_configuration_path_impl(config_dir: &Path) -> Option<std::path::PathBuf> {
1876        let config_dir = config_dir.join("rumdl");
1877
1878        // Check for config files in precedence order (same as project discovery)
1879        const USER_CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", "pyproject.toml"];
1880
1881        log::debug!(
1882            "[rumdl-config] Checking for user configuration in: {}",
1883            config_dir.display()
1884        );
1885
1886        for filename in USER_CONFIG_FILES {
1887            let config_path = config_dir.join(filename);
1888
1889            if config_path.exists() {
1890                // For pyproject.toml, verify it contains [tool.rumdl] section
1891                if *filename == "pyproject.toml" {
1892                    if let Ok(content) = std::fs::read_to_string(&config_path) {
1893                        if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
1894                            log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
1895                            return Some(config_path);
1896                        }
1897                        log::debug!("[rumdl-config] Found user pyproject.toml but no [tool.rumdl] section");
1898                        continue;
1899                    }
1900                } else {
1901                    log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
1902                    return Some(config_path);
1903                }
1904            }
1905        }
1906
1907        log::debug!(
1908            "[rumdl-config] No user configuration found in: {}",
1909            config_dir.display()
1910        );
1911        None
1912    }
1913
1914    /// Discover user-level configuration file from platform-specific config directory.
1915    /// Returns the first configuration file found in the user config directory.
1916    fn user_configuration_path() -> Option<std::path::PathBuf> {
1917        use etcetera::{BaseStrategy, choose_base_strategy};
1918
1919        match choose_base_strategy() {
1920            Ok(strategy) => {
1921                let config_dir = strategy.config_dir();
1922                Self::user_configuration_path_impl(&config_dir)
1923            }
1924            Err(e) => {
1925                log::debug!("[rumdl-config] Failed to determine user config directory: {e}");
1926                None
1927            }
1928        }
1929    }
1930
1931    /// Internal implementation that accepts user config directory for testing
1932    #[doc(hidden)]
1933    pub fn load_with_discovery_impl(
1934        config_path: Option<&str>,
1935        cli_overrides: Option<&SourcedGlobalConfig>,
1936        skip_auto_discovery: bool,
1937        user_config_dir: Option<&Path>,
1938    ) -> Result<Self, ConfigError> {
1939        use std::env;
1940        log::debug!("[rumdl-config] Current working directory: {:?}", env::current_dir());
1941        if config_path.is_none() {
1942            if skip_auto_discovery {
1943                log::debug!("[rumdl-config] Skipping auto-discovery due to --no-config flag");
1944            } else {
1945                log::debug!("[rumdl-config] No explicit config_path provided, will search default locations");
1946            }
1947        } else {
1948            log::debug!("[rumdl-config] Explicit config_path provided: {config_path:?}");
1949        }
1950        let mut sourced_config = SourcedConfig::default();
1951
1952        // 1. Always load user configuration first (unless auto-discovery is disabled)
1953        // User config serves as the base layer that project configs build upon
1954        if !skip_auto_discovery {
1955            let user_config_path = if let Some(dir) = user_config_dir {
1956                Self::user_configuration_path_impl(dir)
1957            } else {
1958                Self::user_configuration_path()
1959            };
1960
1961            if let Some(user_config_path) = user_config_path {
1962                let path_str = user_config_path.display().to_string();
1963                let filename = user_config_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1964
1965                log::debug!("[rumdl-config] Loading user configuration file: {path_str}");
1966
1967                if filename == "pyproject.toml" {
1968                    let content = std::fs::read_to_string(&user_config_path).map_err(|e| ConfigError::IoError {
1969                        source: e,
1970                        path: path_str.clone(),
1971                    })?;
1972                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
1973                        sourced_config.merge(fragment);
1974                        sourced_config.loaded_files.push(path_str);
1975                    }
1976                } else {
1977                    let content = std::fs::read_to_string(&user_config_path).map_err(|e| ConfigError::IoError {
1978                        source: e,
1979                        path: path_str.clone(),
1980                    })?;
1981                    let fragment = parse_rumdl_toml(&content, &path_str, ConfigSource::UserConfig)?;
1982                    sourced_config.merge(fragment);
1983                    sourced_config.loaded_files.push(path_str);
1984                }
1985            } else {
1986                log::debug!("[rumdl-config] No user configuration file found");
1987            }
1988        }
1989
1990        // 2. Load explicit config path if provided (overrides user config)
1991        if let Some(path) = config_path {
1992            let path_obj = Path::new(path);
1993            let filename = path_obj.file_name().and_then(|name| name.to_str()).unwrap_or("");
1994            log::debug!("[rumdl-config] Trying to load config file: {filename}");
1995            let path_str = path.to_string();
1996
1997            // Known markdownlint config files
1998            const MARKDOWNLINT_FILENAMES: &[&str] = &[".markdownlint.json", ".markdownlint.yaml", ".markdownlint.yml"];
1999
2000            if filename == "pyproject.toml" || filename == ".rumdl.toml" || filename == "rumdl.toml" {
2001                let content = std::fs::read_to_string(path).map_err(|e| ConfigError::IoError {
2002                    source: e,
2003                    path: path_str.clone(),
2004                })?;
2005                if filename == "pyproject.toml" {
2006                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
2007                        sourced_config.merge(fragment);
2008                        sourced_config.loaded_files.push(path_str.clone());
2009                    }
2010                } else {
2011                    let fragment = parse_rumdl_toml(&content, &path_str, ConfigSource::ProjectConfig)?;
2012                    sourced_config.merge(fragment);
2013                    sourced_config.loaded_files.push(path_str.clone());
2014                }
2015            } else if MARKDOWNLINT_FILENAMES.contains(&filename)
2016                || path_str.ends_with(".json")
2017                || path_str.ends_with(".jsonc")
2018                || path_str.ends_with(".yaml")
2019                || path_str.ends_with(".yml")
2020            {
2021                // Parse as markdownlint config (JSON/YAML)
2022                let fragment = load_from_markdownlint(&path_str)?;
2023                sourced_config.merge(fragment);
2024                sourced_config.loaded_files.push(path_str.clone());
2025                // markdownlint is fallback only
2026            } else {
2027                // Try TOML only
2028                let content = std::fs::read_to_string(path).map_err(|e| ConfigError::IoError {
2029                    source: e,
2030                    path: path_str.clone(),
2031                })?;
2032                let fragment = parse_rumdl_toml(&content, &path_str, ConfigSource::ProjectConfig)?;
2033                sourced_config.merge(fragment);
2034                sourced_config.loaded_files.push(path_str.clone());
2035            }
2036        }
2037
2038        // 3. Perform auto-discovery for project config if not skipped AND no explicit config path
2039        if !skip_auto_discovery && config_path.is_none() {
2040            // Look for project configuration files (override user config)
2041            if let Some(config_file) = Self::discover_config_upward() {
2042                let path_str = config_file.display().to_string();
2043                let filename = config_file.file_name().and_then(|n| n.to_str()).unwrap_or("");
2044
2045                log::debug!("[rumdl-config] Loading discovered config file: {path_str}");
2046
2047                if filename == "pyproject.toml" {
2048                    let content = std::fs::read_to_string(&config_file).map_err(|e| ConfigError::IoError {
2049                        source: e,
2050                        path: path_str.clone(),
2051                    })?;
2052                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
2053                        sourced_config.merge(fragment);
2054                        sourced_config.loaded_files.push(path_str);
2055                    }
2056                } else if filename == ".rumdl.toml" || filename == "rumdl.toml" {
2057                    let content = std::fs::read_to_string(&config_file).map_err(|e| ConfigError::IoError {
2058                        source: e,
2059                        path: path_str.clone(),
2060                    })?;
2061                    let fragment = parse_rumdl_toml(&content, &path_str, ConfigSource::ProjectConfig)?;
2062                    sourced_config.merge(fragment);
2063                    sourced_config.loaded_files.push(path_str);
2064                }
2065            } else {
2066                log::debug!("[rumdl-config] No configuration file found via upward traversal");
2067
2068                // If no project config found, fallback to markdownlint config in current directory
2069                let mut found_markdownlint = false;
2070                for filename in MARKDOWNLINT_CONFIG_FILES {
2071                    if std::path::Path::new(filename).exists() {
2072                        match load_from_markdownlint(filename) {
2073                            Ok(fragment) => {
2074                                sourced_config.merge(fragment);
2075                                sourced_config.loaded_files.push(filename.to_string());
2076                                found_markdownlint = true;
2077                                break; // Load only the first one found
2078                            }
2079                            Err(_e) => {
2080                                // Log error but continue (it's just a fallback)
2081                            }
2082                        }
2083                    }
2084                }
2085
2086                if !found_markdownlint {
2087                    log::debug!("[rumdl-config] No markdownlint configuration file found");
2088                }
2089            }
2090        }
2091
2092        // 4. Apply CLI overrides (highest precedence)
2093        if let Some(cli) = cli_overrides {
2094            sourced_config
2095                .global
2096                .enable
2097                .merge_override(cli.enable.value.clone(), ConfigSource::Cli, None, None);
2098            sourced_config
2099                .global
2100                .disable
2101                .merge_override(cli.disable.value.clone(), ConfigSource::Cli, None, None);
2102            sourced_config
2103                .global
2104                .exclude
2105                .merge_override(cli.exclude.value.clone(), ConfigSource::Cli, None, None);
2106            sourced_config
2107                .global
2108                .include
2109                .merge_override(cli.include.value.clone(), ConfigSource::Cli, None, None);
2110            sourced_config.global.respect_gitignore.merge_override(
2111                cli.respect_gitignore.value,
2112                ConfigSource::Cli,
2113                None,
2114                None,
2115            );
2116            sourced_config
2117                .global
2118                .fixable
2119                .merge_override(cli.fixable.value.clone(), ConfigSource::Cli, None, None);
2120            sourced_config
2121                .global
2122                .unfixable
2123                .merge_override(cli.unfixable.value.clone(), ConfigSource::Cli, None, None);
2124            // No rule-specific CLI overrides implemented yet
2125        }
2126
2127        // Unknown keys are now collected during parsing and validated via validate_config_sourced()
2128
2129        Ok(sourced_config)
2130    }
2131
2132    /// Load and merge configurations from files and CLI overrides.
2133    /// If skip_auto_discovery is true, only explicit config paths are loaded.
2134    pub fn load_with_discovery(
2135        config_path: Option<&str>,
2136        cli_overrides: Option<&SourcedGlobalConfig>,
2137        skip_auto_discovery: bool,
2138    ) -> Result<Self, ConfigError> {
2139        Self::load_with_discovery_impl(config_path, cli_overrides, skip_auto_discovery, None)
2140    }
2141}
2142
2143impl From<SourcedConfig> for Config {
2144    fn from(sourced: SourcedConfig) -> Self {
2145        let mut rules = BTreeMap::new();
2146        for (rule_name, sourced_rule_cfg) in sourced.rules {
2147            // Normalize rule name to uppercase for case-insensitive lookup
2148            let normalized_rule_name = rule_name.to_ascii_uppercase();
2149            let mut values = BTreeMap::new();
2150            for (key, sourced_val) in sourced_rule_cfg.values {
2151                values.insert(key, sourced_val.value);
2152            }
2153            rules.insert(normalized_rule_name, RuleConfig { values });
2154        }
2155        #[allow(deprecated)]
2156        let global = GlobalConfig {
2157            enable: sourced.global.enable.value,
2158            disable: sourced.global.disable.value,
2159            exclude: sourced.global.exclude.value,
2160            include: sourced.global.include.value,
2161            respect_gitignore: sourced.global.respect_gitignore.value,
2162            line_length: sourced.global.line_length.value,
2163            output_format: sourced.global.output_format.as_ref().map(|v| v.value.clone()),
2164            fixable: sourced.global.fixable.value,
2165            unfixable: sourced.global.unfixable.value,
2166            flavor: sourced.global.flavor.value,
2167            force_exclude: sourced.global.force_exclude.value,
2168        };
2169        Config {
2170            global,
2171            per_file_ignores: sourced.per_file_ignores.value,
2172            rules,
2173        }
2174    }
2175}
2176
2177/// Registry of all known rules and their config schemas
2178pub struct RuleRegistry {
2179    /// Map of rule name (e.g. "MD013") to set of valid config keys and their TOML value types
2180    pub rule_schemas: std::collections::BTreeMap<String, toml::map::Map<String, toml::Value>>,
2181    /// Map of rule name to config key aliases
2182    pub rule_aliases: std::collections::BTreeMap<String, std::collections::HashMap<String, String>>,
2183}
2184
2185impl RuleRegistry {
2186    /// Build a registry from a list of rules
2187    pub fn from_rules(rules: &[Box<dyn Rule>]) -> Self {
2188        let mut rule_schemas = std::collections::BTreeMap::new();
2189        let mut rule_aliases = std::collections::BTreeMap::new();
2190
2191        for rule in rules {
2192            let norm_name = if let Some((name, toml::Value::Table(table))) = rule.default_config_section() {
2193                let norm_name = normalize_key(&name); // Normalize the name from default_config_section
2194                rule_schemas.insert(norm_name.clone(), table);
2195                norm_name
2196            } else {
2197                let norm_name = normalize_key(rule.name()); // Normalize the name from rule.name()
2198                rule_schemas.insert(norm_name.clone(), toml::map::Map::new());
2199                norm_name
2200            };
2201
2202            // Store aliases if the rule provides them
2203            if let Some(aliases) = rule.config_aliases() {
2204                rule_aliases.insert(norm_name, aliases);
2205            }
2206        }
2207
2208        RuleRegistry {
2209            rule_schemas,
2210            rule_aliases,
2211        }
2212    }
2213
2214    /// Get all known rule names
2215    pub fn rule_names(&self) -> std::collections::BTreeSet<String> {
2216        self.rule_schemas.keys().cloned().collect()
2217    }
2218
2219    /// Get the valid configuration keys for a rule, including both original and normalized variants
2220    pub fn config_keys_for(&self, rule: &str) -> Option<std::collections::BTreeSet<String>> {
2221        self.rule_schemas.get(rule).map(|schema| {
2222            let mut all_keys = std::collections::BTreeSet::new();
2223
2224            // Add original keys from schema
2225            for key in schema.keys() {
2226                all_keys.insert(key.clone());
2227            }
2228
2229            // Add normalized variants for markdownlint compatibility
2230            for key in schema.keys() {
2231                // Add kebab-case variant
2232                all_keys.insert(key.replace('_', "-"));
2233                // Add snake_case variant
2234                all_keys.insert(key.replace('-', "_"));
2235                // Add normalized variant
2236                all_keys.insert(normalize_key(key));
2237            }
2238
2239            // Add any aliases defined by the rule
2240            if let Some(aliases) = self.rule_aliases.get(rule) {
2241                for alias_key in aliases.keys() {
2242                    all_keys.insert(alias_key.clone());
2243                    // Also add normalized variants of the alias
2244                    all_keys.insert(alias_key.replace('_', "-"));
2245                    all_keys.insert(alias_key.replace('-', "_"));
2246                    all_keys.insert(normalize_key(alias_key));
2247                }
2248            }
2249
2250            all_keys
2251        })
2252    }
2253
2254    /// Get the expected value type for a rule's configuration key, trying variants
2255    pub fn expected_value_for(&self, rule: &str, key: &str) -> Option<&toml::Value> {
2256        if let Some(schema) = self.rule_schemas.get(rule) {
2257            // Check if this key is an alias
2258            if let Some(aliases) = self.rule_aliases.get(rule)
2259                && let Some(canonical_key) = aliases.get(key)
2260            {
2261                // Use the canonical key for schema lookup
2262                if let Some(value) = schema.get(canonical_key) {
2263                    return Some(value);
2264                }
2265            }
2266
2267            // Try the original key
2268            if let Some(value) = schema.get(key) {
2269                return Some(value);
2270            }
2271
2272            // Try key variants
2273            let key_variants = [
2274                key.replace('-', "_"), // Convert kebab-case to snake_case
2275                key.replace('_', "-"), // Convert snake_case to kebab-case
2276                normalize_key(key),    // Normalized key (lowercase, kebab-case)
2277            ];
2278
2279            for variant in &key_variants {
2280                if let Some(value) = schema.get(variant) {
2281                    return Some(value);
2282                }
2283            }
2284        }
2285        None
2286    }
2287}
2288
2289/// Represents a config validation warning or error
2290#[derive(Debug, Clone)]
2291pub struct ConfigValidationWarning {
2292    pub message: String,
2293    pub rule: Option<String>,
2294    pub key: Option<String>,
2295}
2296
2297/// Validate a loaded config against the rule registry, using SourcedConfig for unknown key tracking
2298pub fn validate_config_sourced(sourced: &SourcedConfig, registry: &RuleRegistry) -> Vec<ConfigValidationWarning> {
2299    let mut warnings = Vec::new();
2300    let known_rules = registry.rule_names();
2301    // 1. Unknown rules
2302    for rule in sourced.rules.keys() {
2303        if !known_rules.contains(rule) {
2304            warnings.push(ConfigValidationWarning {
2305                message: format!("Unknown rule in config: {rule}"),
2306                rule: Some(rule.clone()),
2307                key: None,
2308            });
2309        }
2310    }
2311    // 2. Unknown options and type mismatches
2312    for (rule, rule_cfg) in &sourced.rules {
2313        if let Some(valid_keys) = registry.config_keys_for(rule) {
2314            for key in rule_cfg.values.keys() {
2315                if !valid_keys.contains(key) {
2316                    let valid_keys_vec: Vec<String> = valid_keys.iter().cloned().collect();
2317                    let message = if let Some(suggestion) = suggest_similar_key(key, &valid_keys_vec) {
2318                        format!("Unknown option for rule {rule}: {key} (did you mean: {suggestion}?)")
2319                    } else {
2320                        format!("Unknown option for rule {rule}: {key}")
2321                    };
2322                    warnings.push(ConfigValidationWarning {
2323                        message,
2324                        rule: Some(rule.clone()),
2325                        key: Some(key.clone()),
2326                    });
2327                } else {
2328                    // Type check: compare type of value to type of default
2329                    if let Some(expected) = registry.expected_value_for(rule, key) {
2330                        let actual = &rule_cfg.values[key].value;
2331                        if !toml_value_type_matches(expected, actual) {
2332                            warnings.push(ConfigValidationWarning {
2333                                message: format!(
2334                                    "Type mismatch for {}.{}: expected {}, got {}",
2335                                    rule,
2336                                    key,
2337                                    toml_type_name(expected),
2338                                    toml_type_name(actual)
2339                                ),
2340                                rule: Some(rule.clone()),
2341                                key: Some(key.clone()),
2342                            });
2343                        }
2344                    }
2345                }
2346            }
2347        }
2348    }
2349    // 3. Unknown global options (from unknown_keys)
2350    let known_global_keys = vec![
2351        "enable".to_string(),
2352        "disable".to_string(),
2353        "include".to_string(),
2354        "exclude".to_string(),
2355        "respect-gitignore".to_string(),
2356        "line-length".to_string(),
2357        "fixable".to_string(),
2358        "unfixable".to_string(),
2359        "flavor".to_string(),
2360        "force-exclude".to_string(),
2361        "output-format".to_string(),
2362    ];
2363
2364    for (section, key, file_path) in &sourced.unknown_keys {
2365        if section.contains("[global]") || section.contains("[tool.rumdl]") {
2366            let message = if let Some(suggestion) = suggest_similar_key(key, &known_global_keys) {
2367                if let Some(path) = file_path {
2368                    format!("Unknown global option in {path}: {key} (did you mean: {suggestion}?)")
2369                } else {
2370                    format!("Unknown global option: {key} (did you mean: {suggestion}?)")
2371                }
2372            } else if let Some(path) = file_path {
2373                format!("Unknown global option in {path}: {key}")
2374            } else {
2375                format!("Unknown global option: {key}")
2376            };
2377            warnings.push(ConfigValidationWarning {
2378                message,
2379                rule: None,
2380                key: Some(key.clone()),
2381            });
2382        } else if !key.is_empty() {
2383            // This is an unknown rule section (key is empty means it's a section header)
2384            // No suggestions for rule names - just warn
2385            continue;
2386        } else {
2387            // Unknown rule section
2388            let message = if let Some(path) = file_path {
2389                format!(
2390                    "Unknown rule in {path}: {}",
2391                    section.trim_matches(|c| c == '[' || c == ']')
2392                )
2393            } else {
2394                format!(
2395                    "Unknown rule in config: {}",
2396                    section.trim_matches(|c| c == '[' || c == ']')
2397                )
2398            };
2399            warnings.push(ConfigValidationWarning {
2400                message,
2401                rule: None,
2402                key: None,
2403            });
2404        }
2405    }
2406    warnings
2407}
2408
2409fn toml_type_name(val: &toml::Value) -> &'static str {
2410    match val {
2411        toml::Value::String(_) => "string",
2412        toml::Value::Integer(_) => "integer",
2413        toml::Value::Float(_) => "float",
2414        toml::Value::Boolean(_) => "boolean",
2415        toml::Value::Array(_) => "array",
2416        toml::Value::Table(_) => "table",
2417        toml::Value::Datetime(_) => "datetime",
2418    }
2419}
2420
2421/// Calculate Levenshtein distance between two strings (simple implementation)
2422fn levenshtein_distance(s1: &str, s2: &str) -> usize {
2423    let len1 = s1.len();
2424    let len2 = s2.len();
2425
2426    if len1 == 0 {
2427        return len2;
2428    }
2429    if len2 == 0 {
2430        return len1;
2431    }
2432
2433    let s1_chars: Vec<char> = s1.chars().collect();
2434    let s2_chars: Vec<char> = s2.chars().collect();
2435
2436    let mut prev_row: Vec<usize> = (0..=len2).collect();
2437    let mut curr_row = vec![0; len2 + 1];
2438
2439    for i in 1..=len1 {
2440        curr_row[0] = i;
2441        for j in 1..=len2 {
2442            let cost = if s1_chars[i - 1] == s2_chars[j - 1] { 0 } else { 1 };
2443            curr_row[j] = (prev_row[j] + 1)          // deletion
2444                .min(curr_row[j - 1] + 1)            // insertion
2445                .min(prev_row[j - 1] + cost); // substitution
2446        }
2447        std::mem::swap(&mut prev_row, &mut curr_row);
2448    }
2449
2450    prev_row[len2]
2451}
2452
2453/// Suggest a similar key from a list of valid keys using fuzzy matching
2454fn suggest_similar_key(unknown: &str, valid_keys: &[String]) -> Option<String> {
2455    let unknown_lower = unknown.to_lowercase();
2456    let max_distance = 2.max(unknown.len() / 3); // Allow up to 2 edits or 30% of string length
2457
2458    let mut best_match: Option<(String, usize)> = None;
2459
2460    for valid in valid_keys {
2461        let valid_lower = valid.to_lowercase();
2462        let distance = levenshtein_distance(&unknown_lower, &valid_lower);
2463
2464        if distance <= max_distance {
2465            if let Some((_, best_dist)) = &best_match {
2466                if distance < *best_dist {
2467                    best_match = Some((valid.clone(), distance));
2468                }
2469            } else {
2470                best_match = Some((valid.clone(), distance));
2471            }
2472        }
2473    }
2474
2475    best_match.map(|(key, _)| key)
2476}
2477
2478fn toml_value_type_matches(expected: &toml::Value, actual: &toml::Value) -> bool {
2479    use toml::Value::*;
2480    match (expected, actual) {
2481        (String(_), String(_)) => true,
2482        (Integer(_), Integer(_)) => true,
2483        (Float(_), Float(_)) => true,
2484        (Boolean(_), Boolean(_)) => true,
2485        (Array(_), Array(_)) => true,
2486        (Table(_), Table(_)) => true,
2487        (Datetime(_), Datetime(_)) => true,
2488        // Allow integer for float
2489        (Float(_), Integer(_)) => true,
2490        _ => false,
2491    }
2492}
2493
2494/// Parses pyproject.toml content and extracts the [tool.rumdl] section if present.
2495fn parse_pyproject_toml(content: &str, path: &str) -> Result<Option<SourcedConfigFragment>, ConfigError> {
2496    let doc: toml::Value =
2497        toml::from_str(content).map_err(|e| ConfigError::ParseError(format!("{path}: Failed to parse TOML: {e}")))?;
2498    let mut fragment = SourcedConfigFragment::default();
2499    let source = ConfigSource::PyprojectToml;
2500    let file = Some(path.to_string());
2501
2502    // 1. Handle [tool.rumdl] and [tool.rumdl.global] sections
2503    if let Some(rumdl_config) = doc.get("tool").and_then(|t| t.get("rumdl"))
2504        && let Some(rumdl_table) = rumdl_config.as_table()
2505    {
2506        // Helper function to extract global config from a table
2507        let extract_global_config = |fragment: &mut SourcedConfigFragment, table: &toml::value::Table| {
2508            // Extract global options from the given table
2509            if let Some(enable) = table.get("enable")
2510                && let Ok(values) = Vec::<String>::deserialize(enable.clone())
2511            {
2512                // Normalize rule names in the list
2513                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2514                fragment
2515                    .global
2516                    .enable
2517                    .push_override(normalized_values, source, file.clone(), None);
2518            }
2519
2520            if let Some(disable) = table.get("disable")
2521                && let Ok(values) = Vec::<String>::deserialize(disable.clone())
2522            {
2523                // Re-enable normalization
2524                let normalized_values: Vec<String> = values.into_iter().map(|s| normalize_key(&s)).collect();
2525                fragment
2526                    .global
2527                    .disable
2528                    .push_override(normalized_values, source, file.clone(), None);
2529            }
2530
2531            if let Some(include) = table.get("include")
2532                && let Ok(values) = Vec::<String>::deserialize(include.clone())
2533            {
2534                fragment
2535                    .global
2536                    .include
2537                    .push_override(values, source, file.clone(), None);
2538            }
2539
2540            if let Some(exclude) = table.get("exclude")
2541                && let Ok(values) = Vec::<String>::deserialize(exclude.clone())
2542            {
2543                fragment
2544                    .global
2545                    .exclude
2546                    .push_override(values, source, file.clone(), None);
2547            }
2548
2549            if let Some(respect_gitignore) = table
2550                .get("respect-gitignore")
2551                .or_else(|| table.get("respect_gitignore"))
2552                && let Ok(value) = bool::deserialize(respect_gitignore.clone())
2553            {
2554                fragment
2555                    .global
2556                    .respect_gitignore
2557                    .push_override(value, source, file.clone(), None);
2558            }
2559
2560            if let Some(force_exclude) = table.get("force-exclude").or_else(|| table.get("force_exclude"))
2561                && let Ok(value) = bool::deserialize(force_exclude.clone())
2562            {
2563                fragment
2564                    .global
2565                    .force_exclude
2566                    .push_override(value, source, file.clone(), None);
2567            }
2568
2569            if let Some(output_format) = table.get("output-format").or_else(|| table.get("output_format"))
2570                && let Ok(value) = String::deserialize(output_format.clone())
2571            {
2572                if fragment.global.output_format.is_none() {
2573                    fragment.global.output_format = Some(SourcedValue::new(value.clone(), source));
2574                } else {
2575                    fragment
2576                        .global
2577                        .output_format
2578                        .as_mut()
2579                        .unwrap()
2580                        .push_override(value, source, file.clone(), None);
2581                }
2582            }
2583
2584            if let Some(fixable) = table.get("fixable")
2585                && let Ok(values) = Vec::<String>::deserialize(fixable.clone())
2586            {
2587                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2588                fragment
2589                    .global
2590                    .fixable
2591                    .push_override(normalized_values, source, file.clone(), None);
2592            }
2593
2594            if let Some(unfixable) = table.get("unfixable")
2595                && let Ok(values) = Vec::<String>::deserialize(unfixable.clone())
2596            {
2597                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2598                fragment
2599                    .global
2600                    .unfixable
2601                    .push_override(normalized_values, source, file.clone(), None);
2602            }
2603
2604            if let Some(flavor) = table.get("flavor")
2605                && let Ok(value) = MarkdownFlavor::deserialize(flavor.clone())
2606            {
2607                fragment.global.flavor.push_override(value, source, file.clone(), None);
2608            }
2609
2610            // Handle line-length special case - this should set the global line_length
2611            if let Some(line_length) = table.get("line-length").or_else(|| table.get("line_length"))
2612                && let Ok(value) = u64::deserialize(line_length.clone())
2613            {
2614                fragment
2615                    .global
2616                    .line_length
2617                    .push_override(value, source, file.clone(), None);
2618
2619                // Also add to MD013 rule config for backward compatibility
2620                let norm_md013_key = normalize_key("MD013");
2621                let rule_entry = fragment.rules.entry(norm_md013_key).or_default();
2622                let norm_line_length_key = normalize_key("line-length");
2623                let sv = rule_entry
2624                    .values
2625                    .entry(norm_line_length_key)
2626                    .or_insert_with(|| SourcedValue::new(line_length.clone(), ConfigSource::Default));
2627                sv.push_override(line_length.clone(), source, file.clone(), None);
2628            }
2629        };
2630
2631        // First, check for [tool.rumdl.global] section
2632        if let Some(global_table) = rumdl_table.get("global").and_then(|g| g.as_table()) {
2633            extract_global_config(&mut fragment, global_table);
2634        }
2635
2636        // Also extract global options from [tool.rumdl] directly (for flat structure)
2637        extract_global_config(&mut fragment, rumdl_table);
2638
2639        // --- Extract per-file-ignores configurations ---
2640        // Check both hyphenated and underscored versions for compatibility
2641        let per_file_ignores_key = rumdl_table
2642            .get("per-file-ignores")
2643            .or_else(|| rumdl_table.get("per_file_ignores"));
2644
2645        if let Some(per_file_ignores_value) = per_file_ignores_key
2646            && let Some(per_file_table) = per_file_ignores_value.as_table()
2647        {
2648            let mut per_file_map = HashMap::new();
2649            for (pattern, rules_value) in per_file_table {
2650                if let Ok(rules) = Vec::<String>::deserialize(rules_value.clone()) {
2651                    let normalized_rules = rules.into_iter().map(|s| normalize_key(&s)).collect();
2652                    per_file_map.insert(pattern.clone(), normalized_rules);
2653                } else {
2654                    log::warn!(
2655                        "[WARN] Expected array for per-file-ignores pattern '{pattern}' in {path}, found {rules_value:?}"
2656                    );
2657                }
2658            }
2659            fragment
2660                .per_file_ignores
2661                .push_override(per_file_map, source, file.clone(), None);
2662        }
2663
2664        // --- Extract rule-specific configurations ---
2665        for (key, value) in rumdl_table {
2666            let norm_rule_key = normalize_key(key);
2667
2668            // Skip keys already handled as global or special cases
2669            if [
2670                "enable",
2671                "disable",
2672                "include",
2673                "exclude",
2674                "respect_gitignore",
2675                "respect-gitignore", // Added kebab-case here too
2676                "force_exclude",
2677                "force-exclude",
2678                "line_length",
2679                "line-length",
2680                "output_format",
2681                "output-format",
2682                "fixable",
2683                "unfixable",
2684                "per-file-ignores",
2685                "per_file_ignores",
2686                "global",
2687            ]
2688            .contains(&norm_rule_key.as_str())
2689            {
2690                continue;
2691            }
2692
2693            // Explicitly check if the key looks like a rule name (e.g., starts with 'md')
2694            // AND if the value is actually a TOML table before processing as rule config.
2695            // This prevents misinterpreting other top-level keys under [tool.rumdl]
2696            let norm_rule_key_upper = norm_rule_key.to_ascii_uppercase();
2697            if norm_rule_key_upper.len() == 5
2698                && norm_rule_key_upper.starts_with("MD")
2699                && norm_rule_key_upper[2..].chars().all(|c| c.is_ascii_digit())
2700                && value.is_table()
2701            {
2702                if let Some(rule_config_table) = value.as_table() {
2703                    // Get the entry for this rule (e.g., "md013")
2704                    let rule_entry = fragment.rules.entry(norm_rule_key_upper).or_default();
2705                    for (rk, rv) in rule_config_table {
2706                        let norm_rk = normalize_key(rk); // Normalize the config key itself
2707
2708                        let toml_val = rv.clone();
2709
2710                        let sv = rule_entry
2711                            .values
2712                            .entry(norm_rk.clone())
2713                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), ConfigSource::Default));
2714                        sv.push_override(toml_val, source, file.clone(), None);
2715                    }
2716                }
2717            } else {
2718                // Key is not a global/special key, doesn't start with 'md', or isn't a table.
2719                // Track unknown keys under [tool.rumdl] for validation
2720                fragment
2721                    .unknown_keys
2722                    .push(("[tool.rumdl]".to_string(), key.to_string(), Some(path.to_string())));
2723            }
2724        }
2725    }
2726
2727    // 2. Handle [tool.rumdl.MDxxx] sections as rule-specific config (nested under [tool])
2728    if let Some(tool_table) = doc.get("tool").and_then(|t| t.as_table()) {
2729        for (key, value) in tool_table.iter() {
2730            if let Some(rule_name) = key.strip_prefix("rumdl.") {
2731                let norm_rule_name = normalize_key(rule_name);
2732                if norm_rule_name.len() == 5
2733                    && norm_rule_name.to_ascii_uppercase().starts_with("MD")
2734                    && norm_rule_name[2..].chars().all(|c| c.is_ascii_digit())
2735                    && let Some(rule_table) = value.as_table()
2736                {
2737                    let rule_entry = fragment.rules.entry(norm_rule_name.to_ascii_uppercase()).or_default();
2738                    for (rk, rv) in rule_table {
2739                        let norm_rk = normalize_key(rk);
2740                        let toml_val = rv.clone();
2741                        let sv = rule_entry
2742                            .values
2743                            .entry(norm_rk.clone())
2744                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), source));
2745                        sv.push_override(toml_val, source, file.clone(), None);
2746                    }
2747                } else if rule_name.to_ascii_uppercase().starts_with("MD") {
2748                    // Track unknown rule sections like [tool.rumdl.MD999]
2749                    fragment.unknown_keys.push((
2750                        format!("[tool.rumdl.{rule_name}]"),
2751                        String::new(),
2752                        Some(path.to_string()),
2753                    ));
2754                }
2755            }
2756        }
2757    }
2758
2759    // 3. Handle [tool.rumdl.MDxxx] sections as top-level keys (e.g., [tool.rumdl.MD007])
2760    if let Some(doc_table) = doc.as_table() {
2761        for (key, value) in doc_table.iter() {
2762            if let Some(rule_name) = key.strip_prefix("tool.rumdl.") {
2763                let norm_rule_name = normalize_key(rule_name);
2764                if norm_rule_name.len() == 5
2765                    && norm_rule_name.to_ascii_uppercase().starts_with("MD")
2766                    && norm_rule_name[2..].chars().all(|c| c.is_ascii_digit())
2767                    && let Some(rule_table) = value.as_table()
2768                {
2769                    let rule_entry = fragment.rules.entry(norm_rule_name.to_ascii_uppercase()).or_default();
2770                    for (rk, rv) in rule_table {
2771                        let norm_rk = normalize_key(rk);
2772                        let toml_val = rv.clone();
2773                        let sv = rule_entry
2774                            .values
2775                            .entry(norm_rk.clone())
2776                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), source));
2777                        sv.push_override(toml_val, source, file.clone(), None);
2778                    }
2779                } else if rule_name.to_ascii_uppercase().starts_with("MD") {
2780                    // Track unknown rule sections like [tool.rumdl.MD999]
2781                    fragment.unknown_keys.push((
2782                        format!("[tool.rumdl.{rule_name}]"),
2783                        String::new(),
2784                        Some(path.to_string()),
2785                    ));
2786                }
2787            }
2788        }
2789    }
2790
2791    // Only return Some(fragment) if any config was found
2792    let has_any = !fragment.global.enable.value.is_empty()
2793        || !fragment.global.disable.value.is_empty()
2794        || !fragment.global.include.value.is_empty()
2795        || !fragment.global.exclude.value.is_empty()
2796        || !fragment.global.fixable.value.is_empty()
2797        || !fragment.global.unfixable.value.is_empty()
2798        || fragment.global.output_format.is_some()
2799        || !fragment.per_file_ignores.value.is_empty()
2800        || !fragment.rules.is_empty();
2801    if has_any { Ok(Some(fragment)) } else { Ok(None) }
2802}
2803
2804/// Parses rumdl.toml / .rumdl.toml content.
2805fn parse_rumdl_toml(content: &str, path: &str, source: ConfigSource) -> Result<SourcedConfigFragment, ConfigError> {
2806    let doc = content
2807        .parse::<DocumentMut>()
2808        .map_err(|e| ConfigError::ParseError(format!("{path}: Failed to parse TOML: {e}")))?;
2809    let mut fragment = SourcedConfigFragment::default();
2810    // source parameter provided by caller
2811    let file = Some(path.to_string());
2812
2813    // Define known rules before the loop
2814    let all_rules = rules::all_rules(&Config::default());
2815    let registry = RuleRegistry::from_rules(&all_rules);
2816    let known_rule_names: BTreeSet<String> = registry
2817        .rule_names()
2818        .into_iter()
2819        .map(|s| s.to_ascii_uppercase())
2820        .collect();
2821
2822    // Handle [global] section
2823    if let Some(global_item) = doc.get("global")
2824        && let Some(global_table) = global_item.as_table()
2825    {
2826        for (key, value_item) in global_table.iter() {
2827            let norm_key = normalize_key(key);
2828            match norm_key.as_str() {
2829                "enable" | "disable" | "include" | "exclude" => {
2830                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
2831                        // Corrected: Iterate directly over the Formatted<Array>
2832                        let values: Vec<String> = formatted_array
2833                                .iter()
2834                                .filter_map(|item| item.as_str()) // Extract strings
2835                                .map(|s| s.to_string())
2836                                .collect();
2837
2838                        // Normalize rule names for enable/disable
2839                        let final_values = if norm_key == "enable" || norm_key == "disable" {
2840                            // Corrected: Pass &str to normalize_key
2841                            values.into_iter().map(|s| normalize_key(&s)).collect()
2842                        } else {
2843                            values
2844                        };
2845
2846                        match norm_key.as_str() {
2847                            "enable" => fragment
2848                                .global
2849                                .enable
2850                                .push_override(final_values, source, file.clone(), None),
2851                            "disable" => {
2852                                fragment
2853                                    .global
2854                                    .disable
2855                                    .push_override(final_values, source, file.clone(), None)
2856                            }
2857                            "include" => {
2858                                fragment
2859                                    .global
2860                                    .include
2861                                    .push_override(final_values, source, file.clone(), None)
2862                            }
2863                            "exclude" => {
2864                                fragment
2865                                    .global
2866                                    .exclude
2867                                    .push_override(final_values, source, file.clone(), None)
2868                            }
2869                            _ => unreachable!("Outer match guarantees only enable/disable/include/exclude"),
2870                        }
2871                    } else {
2872                        log::warn!(
2873                            "[WARN] Expected array for global key '{}' in {}, found {}",
2874                            key,
2875                            path,
2876                            value_item.type_name()
2877                        );
2878                    }
2879                }
2880                "respect_gitignore" | "respect-gitignore" => {
2881                    // Handle both cases
2882                    if let Some(toml_edit::Value::Boolean(formatted_bool)) = value_item.as_value() {
2883                        let val = *formatted_bool.value();
2884                        fragment
2885                            .global
2886                            .respect_gitignore
2887                            .push_override(val, source, file.clone(), None);
2888                    } else {
2889                        log::warn!(
2890                            "[WARN] Expected boolean for global key '{}' in {}, found {}",
2891                            key,
2892                            path,
2893                            value_item.type_name()
2894                        );
2895                    }
2896                }
2897                "force_exclude" | "force-exclude" => {
2898                    // Handle both cases
2899                    if let Some(toml_edit::Value::Boolean(formatted_bool)) = value_item.as_value() {
2900                        let val = *formatted_bool.value();
2901                        fragment
2902                            .global
2903                            .force_exclude
2904                            .push_override(val, source, file.clone(), None);
2905                    } else {
2906                        log::warn!(
2907                            "[WARN] Expected boolean for global key '{}' in {}, found {}",
2908                            key,
2909                            path,
2910                            value_item.type_name()
2911                        );
2912                    }
2913                }
2914                "line_length" | "line-length" => {
2915                    // Handle both cases
2916                    if let Some(toml_edit::Value::Integer(formatted_int)) = value_item.as_value() {
2917                        let val = *formatted_int.value() as u64;
2918                        fragment
2919                            .global
2920                            .line_length
2921                            .push_override(val, source, file.clone(), None);
2922                    } else {
2923                        log::warn!(
2924                            "[WARN] Expected integer for global key '{}' in {}, found {}",
2925                            key,
2926                            path,
2927                            value_item.type_name()
2928                        );
2929                    }
2930                }
2931                "output_format" | "output-format" => {
2932                    // Handle both cases
2933                    if let Some(toml_edit::Value::String(formatted_string)) = value_item.as_value() {
2934                        let val = formatted_string.value().clone();
2935                        if fragment.global.output_format.is_none() {
2936                            fragment.global.output_format = Some(SourcedValue::new(val.clone(), source));
2937                        } else {
2938                            fragment.global.output_format.as_mut().unwrap().push_override(
2939                                val,
2940                                source,
2941                                file.clone(),
2942                                None,
2943                            );
2944                        }
2945                    } else {
2946                        log::warn!(
2947                            "[WARN] Expected string for global key '{}' in {}, found {}",
2948                            key,
2949                            path,
2950                            value_item.type_name()
2951                        );
2952                    }
2953                }
2954                "fixable" => {
2955                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
2956                        let values: Vec<String> = formatted_array
2957                            .iter()
2958                            .filter_map(|item| item.as_str())
2959                            .map(normalize_key)
2960                            .collect();
2961                        fragment
2962                            .global
2963                            .fixable
2964                            .push_override(values, source, file.clone(), None);
2965                    } else {
2966                        log::warn!(
2967                            "[WARN] Expected array for global key '{}' in {}, found {}",
2968                            key,
2969                            path,
2970                            value_item.type_name()
2971                        );
2972                    }
2973                }
2974                "unfixable" => {
2975                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
2976                        let values: Vec<String> = formatted_array
2977                            .iter()
2978                            .filter_map(|item| item.as_str())
2979                            .map(normalize_key)
2980                            .collect();
2981                        fragment
2982                            .global
2983                            .unfixable
2984                            .push_override(values, source, file.clone(), None);
2985                    } else {
2986                        log::warn!(
2987                            "[WARN] Expected array for global key '{}' in {}, found {}",
2988                            key,
2989                            path,
2990                            value_item.type_name()
2991                        );
2992                    }
2993                }
2994                "flavor" => {
2995                    if let Some(toml_edit::Value::String(formatted_string)) = value_item.as_value() {
2996                        let val = formatted_string.value();
2997                        if let Ok(flavor) = MarkdownFlavor::from_str(val) {
2998                            fragment.global.flavor.push_override(flavor, source, file.clone(), None);
2999                        } else {
3000                            log::warn!("[WARN] Unknown markdown flavor '{val}' in {path}");
3001                        }
3002                    } else {
3003                        log::warn!(
3004                            "[WARN] Expected string for global key '{}' in {}, found {}",
3005                            key,
3006                            path,
3007                            value_item.type_name()
3008                        );
3009                    }
3010                }
3011                _ => {
3012                    // Track unknown global keys for validation
3013                    fragment
3014                        .unknown_keys
3015                        .push(("[global]".to_string(), key.to_string(), Some(path.to_string())));
3016                    log::warn!("[WARN] Unknown key in [global] section of {path}: {key}");
3017                }
3018            }
3019        }
3020    }
3021
3022    // Handle [per-file-ignores] section
3023    if let Some(per_file_item) = doc.get("per-file-ignores")
3024        && let Some(per_file_table) = per_file_item.as_table()
3025    {
3026        let mut per_file_map = HashMap::new();
3027        for (pattern, value_item) in per_file_table.iter() {
3028            if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
3029                let rules: Vec<String> = formatted_array
3030                    .iter()
3031                    .filter_map(|item| item.as_str())
3032                    .map(normalize_key)
3033                    .collect();
3034                per_file_map.insert(pattern.to_string(), rules);
3035            } else {
3036                let type_name = value_item.type_name();
3037                log::warn!(
3038                    "[WARN] Expected array for per-file-ignores pattern '{pattern}' in {path}, found {type_name}"
3039                );
3040            }
3041        }
3042        fragment
3043            .per_file_ignores
3044            .push_override(per_file_map, source, file.clone(), None);
3045    }
3046
3047    // Rule-specific: all other top-level tables
3048    for (key, item) in doc.iter() {
3049        let norm_rule_name = key.to_ascii_uppercase();
3050
3051        // Skip known special sections
3052        if key == "global" || key == "per-file-ignores" {
3053            continue;
3054        }
3055
3056        // Track unknown rule sections (like [MD999])
3057        if !known_rule_names.contains(&norm_rule_name) {
3058            // Only track if it looks like a rule section (starts with MD or is uppercase)
3059            if norm_rule_name.starts_with("MD") || key.chars().all(|c| c.is_uppercase() || c.is_numeric()) {
3060                fragment
3061                    .unknown_keys
3062                    .push((format!("[{key}]"), String::new(), Some(path.to_string())));
3063            }
3064            continue;
3065        }
3066
3067        if let Some(tbl) = item.as_table() {
3068            let rule_entry = fragment.rules.entry(norm_rule_name.clone()).or_default();
3069            for (rk, rv_item) in tbl.iter() {
3070                let norm_rk = normalize_key(rk);
3071                let maybe_toml_val: Option<toml::Value> = match rv_item.as_value() {
3072                    Some(toml_edit::Value::String(formatted)) => Some(toml::Value::String(formatted.value().clone())),
3073                    Some(toml_edit::Value::Integer(formatted)) => Some(toml::Value::Integer(*formatted.value())),
3074                    Some(toml_edit::Value::Float(formatted)) => Some(toml::Value::Float(*formatted.value())),
3075                    Some(toml_edit::Value::Boolean(formatted)) => Some(toml::Value::Boolean(*formatted.value())),
3076                    Some(toml_edit::Value::Datetime(formatted)) => Some(toml::Value::Datetime(*formatted.value())),
3077                    Some(toml_edit::Value::Array(formatted_array)) => {
3078                        // Convert toml_edit Array to toml::Value::Array
3079                        let mut values = Vec::new();
3080                        for item in formatted_array.iter() {
3081                            match item {
3082                                toml_edit::Value::String(formatted) => {
3083                                    values.push(toml::Value::String(formatted.value().clone()))
3084                                }
3085                                toml_edit::Value::Integer(formatted) => {
3086                                    values.push(toml::Value::Integer(*formatted.value()))
3087                                }
3088                                toml_edit::Value::Float(formatted) => {
3089                                    values.push(toml::Value::Float(*formatted.value()))
3090                                }
3091                                toml_edit::Value::Boolean(formatted) => {
3092                                    values.push(toml::Value::Boolean(*formatted.value()))
3093                                }
3094                                toml_edit::Value::Datetime(formatted) => {
3095                                    values.push(toml::Value::Datetime(*formatted.value()))
3096                                }
3097                                _ => {
3098                                    log::warn!(
3099                                        "[WARN] Skipping unsupported array element type in key '{norm_rule_name}.{norm_rk}' in {path}"
3100                                    );
3101                                }
3102                            }
3103                        }
3104                        Some(toml::Value::Array(values))
3105                    }
3106                    Some(toml_edit::Value::InlineTable(_)) => {
3107                        log::warn!(
3108                            "[WARN] Skipping inline table value for key '{norm_rule_name}.{norm_rk}' in {path}. Table conversion not yet fully implemented in parser."
3109                        );
3110                        None
3111                    }
3112                    None => {
3113                        log::warn!(
3114                            "[WARN] Skipping non-value item for key '{norm_rule_name}.{norm_rk}' in {path}. Expected simple value."
3115                        );
3116                        None
3117                    }
3118                };
3119                if let Some(toml_val) = maybe_toml_val {
3120                    let sv = rule_entry
3121                        .values
3122                        .entry(norm_rk.clone())
3123                        .or_insert_with(|| SourcedValue::new(toml_val.clone(), ConfigSource::Default));
3124                    sv.push_override(toml_val, source, file.clone(), None);
3125                }
3126            }
3127        } else if item.is_value() {
3128            log::warn!("[WARN] Ignoring top-level value key in {path}: '{key}'. Expected a table like [{key}].");
3129        }
3130    }
3131
3132    Ok(fragment)
3133}
3134
3135/// Loads and converts a markdownlint config file (.json or .yaml) into a SourcedConfigFragment.
3136fn load_from_markdownlint(path: &str) -> Result<SourcedConfigFragment, ConfigError> {
3137    // Use the unified loader from markdownlint_config.rs
3138    let ml_config = crate::markdownlint_config::load_markdownlint_config(path)
3139        .map_err(|e| ConfigError::ParseError(format!("{path}: {e}")))?;
3140    Ok(ml_config.map_to_sourced_rumdl_config_fragment(Some(path)))
3141}
3142
3143#[cfg(test)]
3144#[path = "config_intelligent_merge_tests.rs"]
3145mod config_intelligent_merge_tests;