rumdl_lib/
config.rs

1//!
2//! This module defines configuration structures, loading logic, and provenance tracking for rumdl.
3//! Supports TOML, pyproject.toml, and markdownlint config formats, and provides merging and override logic.
4
5use crate::rule::Rule;
6use crate::rules;
7use log;
8use serde::{Deserialize, Serialize};
9use std::collections::BTreeMap;
10use std::collections::{BTreeSet, HashMap, HashSet};
11use std::fmt;
12use std::fs;
13use std::io;
14use std::path::Path;
15use std::str::FromStr;
16use toml_edit::DocumentMut;
17
18/// Markdown flavor/dialect enumeration
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, schemars::JsonSchema)]
20#[serde(rename_all = "lowercase")]
21pub enum MarkdownFlavor {
22    /// Standard Markdown without flavor-specific adjustments
23    #[serde(rename = "standard", alias = "none", alias = "")]
24    #[default]
25    Standard,
26    /// MkDocs flavor with auto-reference support
27    #[serde(rename = "mkdocs")]
28    MkDocs,
29    /// MDX flavor with JSX and ESM support (.mdx files)
30    #[serde(rename = "mdx")]
31    MDX,
32    /// Quarto/RMarkdown flavor for scientific publishing (.qmd, .Rmd files)
33    #[serde(rename = "quarto")]
34    Quarto,
35    // Future flavors can be added here when they have actual implementation differences
36    // Planned: GFM (GitHub Flavored Markdown) - for GitHub-specific features like tables, strikethrough
37    // Planned: CommonMark - for strict CommonMark compliance
38}
39
40impl fmt::Display for MarkdownFlavor {
41    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
42        match self {
43            MarkdownFlavor::Standard => write!(f, "standard"),
44            MarkdownFlavor::MkDocs => write!(f, "mkdocs"),
45            MarkdownFlavor::MDX => write!(f, "mdx"),
46            MarkdownFlavor::Quarto => write!(f, "quarto"),
47        }
48    }
49}
50
51impl FromStr for MarkdownFlavor {
52    type Err = String;
53
54    fn from_str(s: &str) -> Result<Self, Self::Err> {
55        match s.to_lowercase().as_str() {
56            "standard" | "" | "none" => Ok(MarkdownFlavor::Standard),
57            "mkdocs" => Ok(MarkdownFlavor::MkDocs),
58            "mdx" => Ok(MarkdownFlavor::MDX),
59            "quarto" | "qmd" | "rmd" | "rmarkdown" => Ok(MarkdownFlavor::Quarto),
60            // Accept but warn about unimplemented flavors
61            "gfm" | "github" => {
62                eprintln!("Warning: GFM flavor not yet implemented, using standard");
63                Ok(MarkdownFlavor::Standard)
64            }
65            "commonmark" => {
66                eprintln!("Warning: CommonMark flavor not yet implemented, using standard");
67                Ok(MarkdownFlavor::Standard)
68            }
69            _ => Err(format!("Unknown markdown flavor: {s}")),
70        }
71    }
72}
73
74impl MarkdownFlavor {
75    /// Detect flavor from file extension
76    pub fn from_extension(ext: &str) -> Self {
77        match ext.to_lowercase().as_str() {
78            "mdx" => Self::MDX,
79            "qmd" => Self::Quarto,
80            "rmd" => Self::Quarto,
81            _ => Self::Standard,
82        }
83    }
84
85    /// Detect flavor from file path
86    pub fn from_path(path: &std::path::Path) -> Self {
87        path.extension()
88            .and_then(|e| e.to_str())
89            .map(Self::from_extension)
90            .unwrap_or(Self::Standard)
91    }
92
93    /// Check if this flavor supports ESM imports/exports (MDX-specific)
94    pub fn supports_esm_blocks(self) -> bool {
95        matches!(self, Self::MDX)
96    }
97
98    /// Check if this flavor supports JSX components (MDX-specific)
99    pub fn supports_jsx(self) -> bool {
100        matches!(self, Self::MDX)
101    }
102
103    /// Check if this flavor supports auto-references (MkDocs-specific)
104    pub fn supports_auto_references(self) -> bool {
105        matches!(self, Self::MkDocs)
106    }
107
108    /// Get a human-readable name for this flavor
109    pub fn name(self) -> &'static str {
110        match self {
111            Self::Standard => "Standard",
112            Self::MkDocs => "MkDocs",
113            Self::MDX => "MDX",
114            Self::Quarto => "Quarto",
115        }
116    }
117}
118
119/// Normalizes configuration keys (rule names, option names) to lowercase kebab-case.
120pub fn normalize_key(key: &str) -> String {
121    // If the key looks like a rule name (e.g., MD013), uppercase it
122    if key.len() == 5 && key.to_ascii_lowercase().starts_with("md") && key[2..].chars().all(|c| c.is_ascii_digit()) {
123        key.to_ascii_uppercase()
124    } else {
125        key.replace('_', "-").to_ascii_lowercase()
126    }
127}
128
129/// Represents a rule-specific configuration
130#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, schemars::JsonSchema)]
131pub struct RuleConfig {
132    /// Configuration values for the rule
133    #[serde(flatten)]
134    #[schemars(schema_with = "arbitrary_value_schema")]
135    pub values: BTreeMap<String, toml::Value>,
136}
137
138/// Generate a JSON schema for arbitrary configuration values
139fn arbitrary_value_schema(_gen: &mut schemars::SchemaGenerator) -> schemars::Schema {
140    schemars::json_schema!({
141        "type": "object",
142        "additionalProperties": true
143    })
144}
145
146/// Represents the complete configuration loaded from rumdl.toml
147#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, schemars::JsonSchema)]
148#[schemars(
149    description = "rumdl configuration for linting Markdown files. Rules can be configured individually using [MD###] sections with rule-specific options."
150)]
151pub struct Config {
152    /// Global configuration options
153    #[serde(default)]
154    pub global: GlobalConfig,
155
156    /// Per-file rule ignores: maps file patterns to lists of rules to ignore
157    /// Example: { "README.md": ["MD033"], "docs/**/*.md": ["MD013"] }
158    #[serde(default, rename = "per-file-ignores")]
159    pub per_file_ignores: HashMap<String, Vec<String>>,
160
161    /// Rule-specific configurations (e.g., MD013, MD007, MD044)
162    /// Each rule section can contain options specific to that rule.
163    ///
164    /// Common examples:
165    /// - MD013: line_length, code_blocks, tables, headings
166    /// - MD007: indent
167    /// - MD003: style ("atx", "atx_closed", "setext")
168    /// - MD044: names (array of proper names to check)
169    ///
170    /// See https://github.com/rvben/rumdl for full rule documentation.
171    #[serde(flatten)]
172    pub rules: BTreeMap<String, RuleConfig>,
173}
174
175impl Config {
176    /// Check if the Markdown flavor is set to MkDocs
177    pub fn is_mkdocs_flavor(&self) -> bool {
178        self.global.flavor == MarkdownFlavor::MkDocs
179    }
180
181    // Future methods for when GFM and CommonMark are implemented:
182    // pub fn is_gfm_flavor(&self) -> bool
183    // pub fn is_commonmark_flavor(&self) -> bool
184
185    /// Get the configured Markdown flavor
186    pub fn markdown_flavor(&self) -> MarkdownFlavor {
187        self.global.flavor
188    }
189
190    /// Legacy method for backwards compatibility - redirects to is_mkdocs_flavor
191    pub fn is_mkdocs_project(&self) -> bool {
192        self.is_mkdocs_flavor()
193    }
194
195    /// Get the set of rules that should be ignored for a specific file based on per-file-ignores configuration
196    /// Returns a HashSet of rule names (uppercase, e.g., "MD033") that match the given file path
197    pub fn get_ignored_rules_for_file(&self, file_path: &Path) -> HashSet<String> {
198        use globset::{Glob, GlobSetBuilder};
199
200        let mut ignored_rules = HashSet::new();
201
202        if self.per_file_ignores.is_empty() {
203            return ignored_rules;
204        }
205
206        // Build a globset for efficient matching
207        let mut builder = GlobSetBuilder::new();
208        let mut pattern_to_rules: Vec<(usize, &Vec<String>)> = Vec::new();
209
210        for (idx, (pattern, rules)) in self.per_file_ignores.iter().enumerate() {
211            if let Ok(glob) = Glob::new(pattern) {
212                builder.add(glob);
213                pattern_to_rules.push((idx, rules));
214            } else {
215                log::warn!("Invalid glob pattern in per-file-ignores: {pattern}");
216            }
217        }
218
219        let globset = match builder.build() {
220            Ok(gs) => gs,
221            Err(e) => {
222                log::error!("Failed to build globset for per-file-ignores: {e}");
223                return ignored_rules;
224            }
225        };
226
227        // Match the file path against all patterns
228        for match_idx in globset.matches(file_path) {
229            if let Some((_, rules)) = pattern_to_rules.get(match_idx) {
230                for rule in rules.iter() {
231                    // Normalize rule names to uppercase (MD033, md033 -> MD033)
232                    ignored_rules.insert(normalize_key(rule));
233                }
234            }
235        }
236
237        ignored_rules
238    }
239}
240
241/// Global configuration options
242#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, schemars::JsonSchema)]
243#[serde(default)]
244pub struct GlobalConfig {
245    /// Enabled rules
246    #[serde(default)]
247    pub enable: Vec<String>,
248
249    /// Disabled rules
250    #[serde(default)]
251    pub disable: Vec<String>,
252
253    /// Files to exclude
254    #[serde(default)]
255    pub exclude: Vec<String>,
256
257    /// Files to include
258    #[serde(default)]
259    pub include: Vec<String>,
260
261    /// Respect .gitignore files when scanning directories
262    #[serde(default = "default_respect_gitignore")]
263    pub respect_gitignore: bool,
264
265    /// Global line length setting (used by MD013 and other rules if not overridden)
266    #[serde(default = "default_line_length")]
267    pub line_length: u64,
268
269    /// Output format for linting results (e.g., "text", "json", "pylint", etc.)
270    #[serde(skip_serializing_if = "Option::is_none")]
271    pub output_format: Option<String>,
272
273    /// Rules that are allowed to be fixed when --fix is used
274    /// If specified, only these rules will be fixed
275    #[serde(default)]
276    pub fixable: Vec<String>,
277
278    /// Rules that should never be fixed, even when --fix is used
279    /// Takes precedence over fixable
280    #[serde(default)]
281    pub unfixable: Vec<String>,
282
283    /// Markdown flavor/dialect to use (mkdocs, gfm, commonmark, etc.)
284    /// When set, adjusts parsing and validation rules for that specific Markdown variant
285    #[serde(default)]
286    pub flavor: MarkdownFlavor,
287
288    /// [DEPRECATED] Whether to enforce exclude patterns for explicitly passed paths.
289    /// This option is deprecated as of v0.0.156 and has no effect.
290    /// Exclude patterns are now always respected, even for explicitly provided files.
291    /// This prevents duplication between rumdl config and tool configs like pre-commit.
292    #[serde(default)]
293    #[deprecated(since = "0.0.156", note = "Exclude patterns are now always respected")]
294    pub force_exclude: bool,
295}
296
297fn default_respect_gitignore() -> bool {
298    true
299}
300
301fn default_line_length() -> u64 {
302    80
303}
304
305// Add the Default impl
306impl Default for GlobalConfig {
307    #[allow(deprecated)]
308    fn default() -> Self {
309        Self {
310            enable: Vec::new(),
311            disable: Vec::new(),
312            exclude: Vec::new(),
313            include: Vec::new(),
314            respect_gitignore: true,
315            line_length: 80,
316            output_format: None,
317            fixable: Vec::new(),
318            unfixable: Vec::new(),
319            flavor: MarkdownFlavor::default(),
320            force_exclude: false,
321        }
322    }
323}
324
325const MARKDOWNLINT_CONFIG_FILES: &[&str] = &[
326    ".markdownlint.json",
327    ".markdownlint.jsonc",
328    ".markdownlint.yaml",
329    ".markdownlint.yml",
330    "markdownlint.json",
331    "markdownlint.jsonc",
332    "markdownlint.yaml",
333    "markdownlint.yml",
334];
335
336/// Create a default configuration file at the specified path
337pub fn create_default_config(path: &str) -> Result<(), ConfigError> {
338    // Check if file already exists
339    if Path::new(path).exists() {
340        return Err(ConfigError::FileExists { path: path.to_string() });
341    }
342
343    // Default configuration content
344    let default_config = r#"# rumdl configuration file
345
346# Global configuration options
347[global]
348# List of rules to disable (uncomment and modify as needed)
349# disable = ["MD013", "MD033"]
350
351# List of rules to enable exclusively (if provided, only these rules will run)
352# enable = ["MD001", "MD003", "MD004"]
353
354# List of file/directory patterns to include for linting (if provided, only these will be linted)
355# include = [
356#    "docs/*.md",
357#    "src/**/*.md",
358#    "README.md"
359# ]
360
361# List of file/directory patterns to exclude from linting
362exclude = [
363    # Common directories to exclude
364    ".git",
365    ".github",
366    "node_modules",
367    "vendor",
368    "dist",
369    "build",
370
371    # Specific files or patterns
372    "CHANGELOG.md",
373    "LICENSE.md",
374]
375
376# Respect .gitignore files when scanning directories (default: true)
377respect-gitignore = true
378
379# Markdown flavor/dialect (uncomment to enable)
380# Options: mkdocs, gfm, commonmark
381# flavor = "mkdocs"
382
383# Rule-specific configurations (uncomment and modify as needed)
384
385# [MD003]
386# style = "atx"  # Heading style (atx, atx_closed, setext)
387
388# [MD004]
389# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
390
391# [MD007]
392# indent = 4  # Unordered list indentation
393
394# [MD013]
395# line-length = 100  # Line length
396# code-blocks = false  # Exclude code blocks from line length check
397# tables = false  # Exclude tables from line length check
398# headings = true  # Include headings in line length check
399
400# [MD044]
401# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
402# code-blocks = false  # Check code blocks for proper names (default: false, skips code blocks)
403"#;
404
405    // Write the default configuration to the file
406    match fs::write(path, default_config) {
407        Ok(_) => Ok(()),
408        Err(err) => Err(ConfigError::IoError {
409            source: err,
410            path: path.to_string(),
411        }),
412    }
413}
414
415/// Errors that can occur when loading configuration
416#[derive(Debug, thiserror::Error)]
417pub enum ConfigError {
418    /// Failed to read the configuration file
419    #[error("Failed to read config file at {path}: {source}")]
420    IoError { source: io::Error, path: String },
421
422    /// Failed to parse the configuration content (TOML or JSON)
423    #[error("Failed to parse config: {0}")]
424    ParseError(String),
425
426    /// Configuration file already exists
427    #[error("Configuration file already exists at {path}")]
428    FileExists { path: String },
429}
430
431/// Get a rule-specific configuration value
432/// Automatically tries both the original key and normalized variants (kebab-case ↔ snake_case)
433/// for better markdownlint compatibility
434pub fn get_rule_config_value<T: serde::de::DeserializeOwned>(config: &Config, rule_name: &str, key: &str) -> Option<T> {
435    let norm_rule_name = rule_name.to_ascii_uppercase(); // Use uppercase for lookup
436
437    let rule_config = config.rules.get(&norm_rule_name)?;
438
439    // Try multiple key variants to support both underscore and kebab-case formats
440    let key_variants = [
441        key.to_string(),       // Original key as provided
442        normalize_key(key),    // Normalized key (lowercase, kebab-case)
443        key.replace('-', "_"), // Convert kebab-case to snake_case
444        key.replace('_', "-"), // Convert snake_case to kebab-case
445    ];
446
447    // Try each variant until we find a match
448    for variant in &key_variants {
449        if let Some(value) = rule_config.values.get(variant)
450            && let Ok(result) = T::deserialize(value.clone())
451        {
452            return Some(result);
453        }
454    }
455
456    None
457}
458
459/// Generate default rumdl configuration for pyproject.toml
460pub fn generate_pyproject_config() -> String {
461    let config_content = r#"
462[tool.rumdl]
463# Global configuration options
464line-length = 100
465disable = []
466exclude = [
467    # Common directories to exclude
468    ".git",
469    ".github",
470    "node_modules",
471    "vendor",
472    "dist",
473    "build",
474]
475respect-gitignore = true
476
477# Rule-specific configurations (uncomment and modify as needed)
478
479# [tool.rumdl.MD003]
480# style = "atx"  # Heading style (atx, atx_closed, setext)
481
482# [tool.rumdl.MD004]
483# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
484
485# [tool.rumdl.MD007]
486# indent = 4  # Unordered list indentation
487
488# [tool.rumdl.MD013]
489# line-length = 100  # Line length
490# code-blocks = false  # Exclude code blocks from line length check
491# tables = false  # Exclude tables from line length check
492# headings = true  # Include headings in line length check
493
494# [tool.rumdl.MD044]
495# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
496# code-blocks = false  # Check code blocks for proper names (default: false, skips code blocks)
497"#;
498
499    config_content.to_string()
500}
501
502#[cfg(test)]
503mod tests {
504    use super::*;
505    use std::fs;
506    use tempfile::tempdir;
507
508    #[test]
509    fn test_flavor_loading() {
510        let temp_dir = tempdir().unwrap();
511        let config_path = temp_dir.path().join(".rumdl.toml");
512        let config_content = r#"
513[global]
514flavor = "mkdocs"
515disable = ["MD001"]
516"#;
517        fs::write(&config_path, config_content).unwrap();
518
519        // Load the config
520        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
521        let config: Config = sourced.into();
522
523        // Check that flavor was loaded
524        assert_eq!(config.global.flavor, MarkdownFlavor::MkDocs);
525        assert!(config.is_mkdocs_flavor());
526        assert!(config.is_mkdocs_project()); // Test backwards compatibility
527        assert_eq!(config.global.disable, vec!["MD001".to_string()]);
528    }
529
530    #[test]
531    fn test_pyproject_toml_root_level_config() {
532        let temp_dir = tempdir().unwrap();
533        let config_path = temp_dir.path().join("pyproject.toml");
534
535        // Create a test pyproject.toml with root-level configuration
536        let content = r#"
537[tool.rumdl]
538line-length = 120
539disable = ["MD033"]
540enable = ["MD001", "MD004"]
541include = ["docs/*.md"]
542exclude = ["node_modules"]
543respect-gitignore = true
544        "#;
545
546        fs::write(&config_path, content).unwrap();
547
548        // Load the config with skip_auto_discovery to avoid environment config files
549        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
550        let config: Config = sourced.into(); // Convert to plain config for assertions
551
552        // Check global settings
553        assert_eq!(config.global.disable, vec!["MD033".to_string()]);
554        assert_eq!(config.global.enable, vec!["MD001".to_string(), "MD004".to_string()]);
555        // Should now contain only the configured pattern since auto-discovery is disabled
556        assert_eq!(config.global.include, vec!["docs/*.md".to_string()]);
557        assert_eq!(config.global.exclude, vec!["node_modules".to_string()]);
558        assert!(config.global.respect_gitignore);
559
560        // Check line-length was correctly added to MD013
561        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
562        assert_eq!(line_length, Some(120));
563    }
564
565    #[test]
566    fn test_pyproject_toml_snake_case_and_kebab_case() {
567        let temp_dir = tempdir().unwrap();
568        let config_path = temp_dir.path().join("pyproject.toml");
569
570        // Test with both kebab-case and snake_case variants
571        let content = r#"
572[tool.rumdl]
573line-length = 150
574respect_gitignore = true
575        "#;
576
577        fs::write(&config_path, content).unwrap();
578
579        // Load the config with skip_auto_discovery to avoid environment config files
580        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
581        let config: Config = sourced.into(); // Convert to plain config for assertions
582
583        // Check settings were correctly loaded
584        assert!(config.global.respect_gitignore);
585        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
586        assert_eq!(line_length, Some(150));
587    }
588
589    #[test]
590    fn test_md013_key_normalization_in_rumdl_toml() {
591        let temp_dir = tempdir().unwrap();
592        let config_path = temp_dir.path().join(".rumdl.toml");
593        let config_content = r#"
594[MD013]
595line_length = 111
596line-length = 222
597"#;
598        fs::write(&config_path, config_content).unwrap();
599        // Load the config with skip_auto_discovery to avoid environment config files
600        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
601        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
602        // Now we should only get the explicitly configured key
603        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
604        assert_eq!(keys, vec!["line-length"]);
605        let val = &rule_cfg.values["line-length"].value;
606        assert_eq!(val.as_integer(), Some(222));
607        // get_rule_config_value should retrieve the value for both snake_case and kebab-case
608        let config: Config = sourced.clone().into();
609        let v1 = get_rule_config_value::<usize>(&config, "MD013", "line_length");
610        let v2 = get_rule_config_value::<usize>(&config, "MD013", "line-length");
611        assert_eq!(v1, Some(222));
612        assert_eq!(v2, Some(222));
613    }
614
615    #[test]
616    fn test_md013_section_case_insensitivity() {
617        let temp_dir = tempdir().unwrap();
618        let config_path = temp_dir.path().join(".rumdl.toml");
619        let config_content = r#"
620[md013]
621line-length = 101
622
623[Md013]
624line-length = 102
625
626[MD013]
627line-length = 103
628"#;
629        fs::write(&config_path, config_content).unwrap();
630        // Load the config with skip_auto_discovery to avoid environment config files
631        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
632        let config: Config = sourced.clone().into();
633        // Only the last section should win, and be present
634        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
635        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
636        assert_eq!(keys, vec!["line-length"]);
637        let val = &rule_cfg.values["line-length"].value;
638        assert_eq!(val.as_integer(), Some(103));
639        let v = get_rule_config_value::<usize>(&config, "MD013", "line-length");
640        assert_eq!(v, Some(103));
641    }
642
643    #[test]
644    fn test_md013_key_snake_and_kebab_case() {
645        let temp_dir = tempdir().unwrap();
646        let config_path = temp_dir.path().join(".rumdl.toml");
647        let config_content = r#"
648[MD013]
649line_length = 201
650line-length = 202
651"#;
652        fs::write(&config_path, config_content).unwrap();
653        // Load the config with skip_auto_discovery to avoid environment config files
654        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
655        let config: Config = sourced.clone().into();
656        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
657        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
658        assert_eq!(keys, vec!["line-length"]);
659        let val = &rule_cfg.values["line-length"].value;
660        assert_eq!(val.as_integer(), Some(202));
661        let v1 = get_rule_config_value::<usize>(&config, "MD013", "line_length");
662        let v2 = get_rule_config_value::<usize>(&config, "MD013", "line-length");
663        assert_eq!(v1, Some(202));
664        assert_eq!(v2, Some(202));
665    }
666
667    #[test]
668    fn test_unknown_rule_section_is_ignored() {
669        let temp_dir = tempdir().unwrap();
670        let config_path = temp_dir.path().join(".rumdl.toml");
671        let config_content = r#"
672[MD999]
673foo = 1
674bar = 2
675[MD013]
676line-length = 303
677"#;
678        fs::write(&config_path, config_content).unwrap();
679        // Load the config with skip_auto_discovery to avoid environment config files
680        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
681        let config: Config = sourced.clone().into();
682        // MD999 should not be present
683        assert!(!sourced.rules.contains_key("MD999"));
684        // MD013 should be present and correct
685        let v = get_rule_config_value::<usize>(&config, "MD013", "line-length");
686        assert_eq!(v, Some(303));
687    }
688
689    #[test]
690    fn test_invalid_toml_syntax() {
691        let temp_dir = tempdir().unwrap();
692        let config_path = temp_dir.path().join(".rumdl.toml");
693
694        // Invalid TOML with unclosed string
695        let config_content = r#"
696[MD013]
697line-length = "unclosed string
698"#;
699        fs::write(&config_path, config_content).unwrap();
700
701        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
702        assert!(result.is_err());
703        match result.unwrap_err() {
704            ConfigError::ParseError(msg) => {
705                // The actual error message from toml parser might vary
706                assert!(msg.contains("expected") || msg.contains("invalid") || msg.contains("unterminated"));
707            }
708            _ => panic!("Expected ParseError"),
709        }
710    }
711
712    #[test]
713    fn test_wrong_type_for_config_value() {
714        let temp_dir = tempdir().unwrap();
715        let config_path = temp_dir.path().join(".rumdl.toml");
716
717        // line-length should be a number, not a string
718        let config_content = r#"
719[MD013]
720line-length = "not a number"
721"#;
722        fs::write(&config_path, config_content).unwrap();
723
724        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
725        let config: Config = sourced.into();
726
727        // The value should be loaded as a string, not converted
728        let rule_config = config.rules.get("MD013").unwrap();
729        let value = rule_config.values.get("line-length").unwrap();
730        assert!(matches!(value, toml::Value::String(_)));
731    }
732
733    #[test]
734    fn test_empty_config_file() {
735        let temp_dir = tempdir().unwrap();
736        let config_path = temp_dir.path().join(".rumdl.toml");
737
738        // Empty file
739        fs::write(&config_path, "").unwrap();
740
741        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
742        let config: Config = sourced.into();
743
744        // Should have default values
745        assert_eq!(config.global.line_length, 80);
746        assert!(config.global.respect_gitignore);
747        assert!(config.rules.is_empty());
748    }
749
750    #[test]
751    fn test_malformed_pyproject_toml() {
752        let temp_dir = tempdir().unwrap();
753        let config_path = temp_dir.path().join("pyproject.toml");
754
755        // Missing closing bracket
756        let content = r#"
757[tool.rumdl
758line-length = 120
759"#;
760        fs::write(&config_path, content).unwrap();
761
762        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
763        assert!(result.is_err());
764    }
765
766    #[test]
767    fn test_conflicting_config_values() {
768        let temp_dir = tempdir().unwrap();
769        let config_path = temp_dir.path().join(".rumdl.toml");
770
771        // Both enable and disable the same rule - these need to be in a global section
772        let config_content = r#"
773[global]
774enable = ["MD013"]
775disable = ["MD013"]
776"#;
777        fs::write(&config_path, config_content).unwrap();
778
779        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
780        let config: Config = sourced.into();
781
782        // Both should be present - resolution happens at runtime
783        assert!(config.global.enable.contains(&"MD013".to_string()));
784        assert!(config.global.disable.contains(&"MD013".to_string()));
785    }
786
787    #[test]
788    fn test_invalid_rule_names() {
789        let temp_dir = tempdir().unwrap();
790        let config_path = temp_dir.path().join(".rumdl.toml");
791
792        let config_content = r#"
793[global]
794enable = ["MD001", "NOT_A_RULE", "md002", "12345"]
795disable = ["MD-001", "MD_002"]
796"#;
797        fs::write(&config_path, config_content).unwrap();
798
799        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
800        let config: Config = sourced.into();
801
802        // All values should be preserved as-is
803        assert_eq!(config.global.enable.len(), 4);
804        assert_eq!(config.global.disable.len(), 2);
805    }
806
807    #[test]
808    fn test_deeply_nested_config() {
809        let temp_dir = tempdir().unwrap();
810        let config_path = temp_dir.path().join(".rumdl.toml");
811
812        // This should be ignored as we don't support nested tables within rule configs
813        let config_content = r#"
814[MD013]
815line-length = 100
816[MD013.nested]
817value = 42
818"#;
819        fs::write(&config_path, config_content).unwrap();
820
821        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
822        let config: Config = sourced.into();
823
824        let rule_config = config.rules.get("MD013").unwrap();
825        assert_eq!(
826            rule_config.values.get("line-length").unwrap(),
827            &toml::Value::Integer(100)
828        );
829        // Nested table should not be present
830        assert!(!rule_config.values.contains_key("nested"));
831    }
832
833    #[test]
834    fn test_unicode_in_config() {
835        let temp_dir = tempdir().unwrap();
836        let config_path = temp_dir.path().join(".rumdl.toml");
837
838        let config_content = r#"
839[global]
840include = ["文档/*.md", "ドキュメント/*.md"]
841exclude = ["测试/*", "🚀/*"]
842
843[MD013]
844line-length = 80
845message = "行太长了 🚨"
846"#;
847        fs::write(&config_path, config_content).unwrap();
848
849        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
850        let config: Config = sourced.into();
851
852        assert_eq!(config.global.include.len(), 2);
853        assert_eq!(config.global.exclude.len(), 2);
854        assert!(config.global.include[0].contains("文档"));
855        assert!(config.global.exclude[1].contains("🚀"));
856
857        let rule_config = config.rules.get("MD013").unwrap();
858        let message = rule_config.values.get("message").unwrap();
859        if let toml::Value::String(s) = message {
860            assert!(s.contains("行太长了"));
861            assert!(s.contains("🚨"));
862        }
863    }
864
865    #[test]
866    fn test_extremely_long_values() {
867        let temp_dir = tempdir().unwrap();
868        let config_path = temp_dir.path().join(".rumdl.toml");
869
870        let long_string = "a".repeat(10000);
871        let config_content = format!(
872            r#"
873[global]
874exclude = ["{long_string}"]
875
876[MD013]
877line-length = 999999999
878"#
879        );
880
881        fs::write(&config_path, config_content).unwrap();
882
883        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
884        let config: Config = sourced.into();
885
886        assert_eq!(config.global.exclude[0].len(), 10000);
887        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
888        assert_eq!(line_length, Some(999999999));
889    }
890
891    #[test]
892    fn test_config_with_comments() {
893        let temp_dir = tempdir().unwrap();
894        let config_path = temp_dir.path().join(".rumdl.toml");
895
896        let config_content = r#"
897[global]
898# This is a comment
899enable = ["MD001"] # Enable MD001
900# disable = ["MD002"] # This is commented out
901
902[MD013] # Line length rule
903line-length = 100 # Set to 100 characters
904# ignored = true # This setting is commented out
905"#;
906        fs::write(&config_path, config_content).unwrap();
907
908        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
909        let config: Config = sourced.into();
910
911        assert_eq!(config.global.enable, vec!["MD001"]);
912        assert!(config.global.disable.is_empty()); // Commented out
913
914        let rule_config = config.rules.get("MD013").unwrap();
915        assert_eq!(rule_config.values.len(), 1); // Only line-length
916        assert!(!rule_config.values.contains_key("ignored"));
917    }
918
919    #[test]
920    fn test_arrays_in_rule_config() {
921        let temp_dir = tempdir().unwrap();
922        let config_path = temp_dir.path().join(".rumdl.toml");
923
924        let config_content = r#"
925[MD003]
926levels = [1, 2, 3]
927tags = ["important", "critical"]
928mixed = [1, "two", true]
929"#;
930        fs::write(&config_path, config_content).unwrap();
931
932        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
933        let config: Config = sourced.into();
934
935        // Arrays should now be properly parsed
936        let rule_config = config.rules.get("MD003").expect("MD003 config should exist");
937
938        // Check that arrays are present and correctly parsed
939        assert!(rule_config.values.contains_key("levels"));
940        assert!(rule_config.values.contains_key("tags"));
941        assert!(rule_config.values.contains_key("mixed"));
942
943        // Verify array contents
944        if let Some(toml::Value::Array(levels)) = rule_config.values.get("levels") {
945            assert_eq!(levels.len(), 3);
946            assert_eq!(levels[0], toml::Value::Integer(1));
947            assert_eq!(levels[1], toml::Value::Integer(2));
948            assert_eq!(levels[2], toml::Value::Integer(3));
949        } else {
950            panic!("levels should be an array");
951        }
952
953        if let Some(toml::Value::Array(tags)) = rule_config.values.get("tags") {
954            assert_eq!(tags.len(), 2);
955            assert_eq!(tags[0], toml::Value::String("important".to_string()));
956            assert_eq!(tags[1], toml::Value::String("critical".to_string()));
957        } else {
958            panic!("tags should be an array");
959        }
960
961        if let Some(toml::Value::Array(mixed)) = rule_config.values.get("mixed") {
962            assert_eq!(mixed.len(), 3);
963            assert_eq!(mixed[0], toml::Value::Integer(1));
964            assert_eq!(mixed[1], toml::Value::String("two".to_string()));
965            assert_eq!(mixed[2], toml::Value::Boolean(true));
966        } else {
967            panic!("mixed should be an array");
968        }
969    }
970
971    #[test]
972    fn test_normalize_key_edge_cases() {
973        // Rule names
974        assert_eq!(normalize_key("MD001"), "MD001");
975        assert_eq!(normalize_key("md001"), "MD001");
976        assert_eq!(normalize_key("Md001"), "MD001");
977        assert_eq!(normalize_key("mD001"), "MD001");
978
979        // Non-rule names
980        assert_eq!(normalize_key("line_length"), "line-length");
981        assert_eq!(normalize_key("line-length"), "line-length");
982        assert_eq!(normalize_key("LINE_LENGTH"), "line-length");
983        assert_eq!(normalize_key("respect_gitignore"), "respect-gitignore");
984
985        // Edge cases
986        assert_eq!(normalize_key("MD"), "md"); // Too short to be a rule
987        assert_eq!(normalize_key("MD00"), "md00"); // Too short
988        assert_eq!(normalize_key("MD0001"), "md0001"); // Too long
989        assert_eq!(normalize_key("MDabc"), "mdabc"); // Non-digit
990        assert_eq!(normalize_key("MD00a"), "md00a"); // Partial digit
991        assert_eq!(normalize_key(""), "");
992        assert_eq!(normalize_key("_"), "-");
993        assert_eq!(normalize_key("___"), "---");
994    }
995
996    #[test]
997    fn test_missing_config_file() {
998        let temp_dir = tempdir().unwrap();
999        let config_path = temp_dir.path().join("nonexistent.toml");
1000
1001        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
1002        assert!(result.is_err());
1003        match result.unwrap_err() {
1004            ConfigError::IoError { .. } => {}
1005            _ => panic!("Expected IoError for missing file"),
1006        }
1007    }
1008
1009    #[test]
1010    #[cfg(unix)]
1011    fn test_permission_denied_config() {
1012        use std::os::unix::fs::PermissionsExt;
1013
1014        let temp_dir = tempdir().unwrap();
1015        let config_path = temp_dir.path().join(".rumdl.toml");
1016
1017        fs::write(&config_path, "enable = [\"MD001\"]").unwrap();
1018
1019        // Remove read permissions
1020        let mut perms = fs::metadata(&config_path).unwrap().permissions();
1021        perms.set_mode(0o000);
1022        fs::set_permissions(&config_path, perms).unwrap();
1023
1024        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
1025
1026        // Restore permissions for cleanup
1027        let mut perms = fs::metadata(&config_path).unwrap().permissions();
1028        perms.set_mode(0o644);
1029        fs::set_permissions(&config_path, perms).unwrap();
1030
1031        assert!(result.is_err());
1032        match result.unwrap_err() {
1033            ConfigError::IoError { .. } => {}
1034            _ => panic!("Expected IoError for permission denied"),
1035        }
1036    }
1037
1038    #[test]
1039    fn test_circular_reference_detection() {
1040        // This test is more conceptual since TOML doesn't support circular references
1041        // But we test that deeply nested structures don't cause stack overflow
1042        let temp_dir = tempdir().unwrap();
1043        let config_path = temp_dir.path().join(".rumdl.toml");
1044
1045        let mut config_content = String::from("[MD001]\n");
1046        for i in 0..100 {
1047            config_content.push_str(&format!("key{i} = {i}\n"));
1048        }
1049
1050        fs::write(&config_path, config_content).unwrap();
1051
1052        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1053        let config: Config = sourced.into();
1054
1055        let rule_config = config.rules.get("MD001").unwrap();
1056        assert_eq!(rule_config.values.len(), 100);
1057    }
1058
1059    #[test]
1060    fn test_special_toml_values() {
1061        let temp_dir = tempdir().unwrap();
1062        let config_path = temp_dir.path().join(".rumdl.toml");
1063
1064        let config_content = r#"
1065[MD001]
1066infinity = inf
1067neg_infinity = -inf
1068not_a_number = nan
1069datetime = 1979-05-27T07:32:00Z
1070local_date = 1979-05-27
1071local_time = 07:32:00
1072"#;
1073        fs::write(&config_path, config_content).unwrap();
1074
1075        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1076        let config: Config = sourced.into();
1077
1078        // Some values might not be parsed due to parser limitations
1079        if let Some(rule_config) = config.rules.get("MD001") {
1080            // Check special float values if present
1081            if let Some(toml::Value::Float(f)) = rule_config.values.get("infinity") {
1082                assert!(f.is_infinite() && f.is_sign_positive());
1083            }
1084            if let Some(toml::Value::Float(f)) = rule_config.values.get("neg_infinity") {
1085                assert!(f.is_infinite() && f.is_sign_negative());
1086            }
1087            if let Some(toml::Value::Float(f)) = rule_config.values.get("not_a_number") {
1088                assert!(f.is_nan());
1089            }
1090
1091            // Check datetime values if present
1092            if let Some(val) = rule_config.values.get("datetime") {
1093                assert!(matches!(val, toml::Value::Datetime(_)));
1094            }
1095            // Note: local_date and local_time might not be parsed by the current implementation
1096        }
1097    }
1098
1099    #[test]
1100    fn test_default_config_passes_validation() {
1101        use crate::rules;
1102
1103        let temp_dir = tempdir().unwrap();
1104        let config_path = temp_dir.path().join(".rumdl.toml");
1105        let config_path_str = config_path.to_str().unwrap();
1106
1107        // Create the default config using the same function that `rumdl init` uses
1108        create_default_config(config_path_str).unwrap();
1109
1110        // Load it back as a SourcedConfig
1111        let sourced =
1112            SourcedConfig::load(Some(config_path_str), None).expect("Default config should load successfully");
1113
1114        // Create the rule registry
1115        let all_rules = rules::all_rules(&Config::default());
1116        let registry = RuleRegistry::from_rules(&all_rules);
1117
1118        // Validate the config
1119        let warnings = validate_config_sourced(&sourced, &registry);
1120
1121        // The default config should have no warnings
1122        if !warnings.is_empty() {
1123            for warning in &warnings {
1124                eprintln!("Config validation warning: {}", warning.message);
1125                if let Some(rule) = &warning.rule {
1126                    eprintln!("  Rule: {rule}");
1127                }
1128                if let Some(key) = &warning.key {
1129                    eprintln!("  Key: {key}");
1130                }
1131            }
1132        }
1133        assert!(
1134            warnings.is_empty(),
1135            "Default config from rumdl init should pass validation without warnings"
1136        );
1137    }
1138
1139    #[test]
1140    fn test_per_file_ignores_config_parsing() {
1141        let temp_dir = tempdir().unwrap();
1142        let config_path = temp_dir.path().join(".rumdl.toml");
1143        let config_content = r#"
1144[per-file-ignores]
1145"README.md" = ["MD033"]
1146"docs/**/*.md" = ["MD013", "MD033"]
1147"test/*.md" = ["MD041"]
1148"#;
1149        fs::write(&config_path, config_content).unwrap();
1150
1151        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1152        let config: Config = sourced.into();
1153
1154        // Verify per-file-ignores was loaded
1155        assert_eq!(config.per_file_ignores.len(), 3);
1156        assert_eq!(
1157            config.per_file_ignores.get("README.md"),
1158            Some(&vec!["MD033".to_string()])
1159        );
1160        assert_eq!(
1161            config.per_file_ignores.get("docs/**/*.md"),
1162            Some(&vec!["MD013".to_string(), "MD033".to_string()])
1163        );
1164        assert_eq!(
1165            config.per_file_ignores.get("test/*.md"),
1166            Some(&vec!["MD041".to_string()])
1167        );
1168    }
1169
1170    #[test]
1171    fn test_per_file_ignores_glob_matching() {
1172        use std::path::PathBuf;
1173
1174        let temp_dir = tempdir().unwrap();
1175        let config_path = temp_dir.path().join(".rumdl.toml");
1176        let config_content = r#"
1177[per-file-ignores]
1178"README.md" = ["MD033"]
1179"docs/**/*.md" = ["MD013"]
1180"**/test_*.md" = ["MD041"]
1181"#;
1182        fs::write(&config_path, config_content).unwrap();
1183
1184        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1185        let config: Config = sourced.into();
1186
1187        // Test exact match
1188        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1189        assert!(ignored.contains("MD033"));
1190        assert_eq!(ignored.len(), 1);
1191
1192        // Test glob pattern matching
1193        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("docs/api/overview.md"));
1194        assert!(ignored.contains("MD013"));
1195        assert_eq!(ignored.len(), 1);
1196
1197        // Test recursive glob pattern
1198        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("tests/fixtures/test_example.md"));
1199        assert!(ignored.contains("MD041"));
1200        assert_eq!(ignored.len(), 1);
1201
1202        // Test non-matching path
1203        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("other/file.md"));
1204        assert!(ignored.is_empty());
1205    }
1206
1207    #[test]
1208    fn test_per_file_ignores_pyproject_toml() {
1209        let temp_dir = tempdir().unwrap();
1210        let config_path = temp_dir.path().join("pyproject.toml");
1211        let config_content = r#"
1212[tool.rumdl]
1213[tool.rumdl.per-file-ignores]
1214"README.md" = ["MD033", "MD013"]
1215"generated/*.md" = ["MD041"]
1216"#;
1217        fs::write(&config_path, config_content).unwrap();
1218
1219        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1220        let config: Config = sourced.into();
1221
1222        // Verify per-file-ignores was loaded from pyproject.toml
1223        assert_eq!(config.per_file_ignores.len(), 2);
1224        assert_eq!(
1225            config.per_file_ignores.get("README.md"),
1226            Some(&vec!["MD033".to_string(), "MD013".to_string()])
1227        );
1228        assert_eq!(
1229            config.per_file_ignores.get("generated/*.md"),
1230            Some(&vec!["MD041".to_string()])
1231        );
1232    }
1233
1234    #[test]
1235    fn test_per_file_ignores_multiple_patterns_match() {
1236        use std::path::PathBuf;
1237
1238        let temp_dir = tempdir().unwrap();
1239        let config_path = temp_dir.path().join(".rumdl.toml");
1240        let config_content = r#"
1241[per-file-ignores]
1242"docs/**/*.md" = ["MD013"]
1243"**/api/*.md" = ["MD033"]
1244"docs/api/overview.md" = ["MD041"]
1245"#;
1246        fs::write(&config_path, config_content).unwrap();
1247
1248        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1249        let config: Config = sourced.into();
1250
1251        // File matches multiple patterns - should get union of all rules
1252        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("docs/api/overview.md"));
1253        assert_eq!(ignored.len(), 3);
1254        assert!(ignored.contains("MD013"));
1255        assert!(ignored.contains("MD033"));
1256        assert!(ignored.contains("MD041"));
1257    }
1258
1259    #[test]
1260    fn test_per_file_ignores_rule_name_normalization() {
1261        use std::path::PathBuf;
1262
1263        let temp_dir = tempdir().unwrap();
1264        let config_path = temp_dir.path().join(".rumdl.toml");
1265        let config_content = r#"
1266[per-file-ignores]
1267"README.md" = ["md033", "MD013", "Md041"]
1268"#;
1269        fs::write(&config_path, config_content).unwrap();
1270
1271        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1272        let config: Config = sourced.into();
1273
1274        // All rule names should be normalized to uppercase
1275        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1276        assert_eq!(ignored.len(), 3);
1277        assert!(ignored.contains("MD033"));
1278        assert!(ignored.contains("MD013"));
1279        assert!(ignored.contains("MD041"));
1280    }
1281
1282    #[test]
1283    fn test_per_file_ignores_invalid_glob_pattern() {
1284        use std::path::PathBuf;
1285
1286        let temp_dir = tempdir().unwrap();
1287        let config_path = temp_dir.path().join(".rumdl.toml");
1288        let config_content = r#"
1289[per-file-ignores]
1290"[invalid" = ["MD033"]
1291"valid/*.md" = ["MD013"]
1292"#;
1293        fs::write(&config_path, config_content).unwrap();
1294
1295        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1296        let config: Config = sourced.into();
1297
1298        // Invalid pattern should be skipped, valid pattern should work
1299        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("valid/test.md"));
1300        assert!(ignored.contains("MD013"));
1301
1302        // Invalid pattern should not cause issues
1303        let ignored2 = config.get_ignored_rules_for_file(&PathBuf::from("[invalid"));
1304        assert!(ignored2.is_empty());
1305    }
1306
1307    #[test]
1308    fn test_per_file_ignores_empty_section() {
1309        use std::path::PathBuf;
1310
1311        let temp_dir = tempdir().unwrap();
1312        let config_path = temp_dir.path().join(".rumdl.toml");
1313        let config_content = r#"
1314[global]
1315disable = ["MD001"]
1316
1317[per-file-ignores]
1318"#;
1319        fs::write(&config_path, config_content).unwrap();
1320
1321        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1322        let config: Config = sourced.into();
1323
1324        // Empty per-file-ignores should work fine
1325        assert_eq!(config.per_file_ignores.len(), 0);
1326        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1327        assert!(ignored.is_empty());
1328    }
1329
1330    #[test]
1331    fn test_per_file_ignores_with_underscores_in_pyproject() {
1332        let temp_dir = tempdir().unwrap();
1333        let config_path = temp_dir.path().join("pyproject.toml");
1334        let config_content = r#"
1335[tool.rumdl]
1336[tool.rumdl.per_file_ignores]
1337"README.md" = ["MD033"]
1338"#;
1339        fs::write(&config_path, config_content).unwrap();
1340
1341        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1342        let config: Config = sourced.into();
1343
1344        // Should support both per-file-ignores and per_file_ignores
1345        assert_eq!(config.per_file_ignores.len(), 1);
1346        assert_eq!(
1347            config.per_file_ignores.get("README.md"),
1348            Some(&vec!["MD033".to_string()])
1349        );
1350    }
1351
1352    #[test]
1353    fn test_generate_json_schema() {
1354        use schemars::schema_for;
1355        use std::env;
1356
1357        let schema = schema_for!(Config);
1358        let schema_json = serde_json::to_string_pretty(&schema).expect("Failed to serialize schema");
1359
1360        // Write schema to file if RUMDL_UPDATE_SCHEMA env var is set
1361        if env::var("RUMDL_UPDATE_SCHEMA").is_ok() {
1362            let schema_path = env::current_dir().unwrap().join("rumdl.schema.json");
1363            fs::write(&schema_path, &schema_json).expect("Failed to write schema file");
1364            println!("Schema written to: {}", schema_path.display());
1365        }
1366
1367        // Basic validation that schema was generated
1368        assert!(schema_json.contains("\"title\": \"Config\""));
1369        assert!(schema_json.contains("\"global\""));
1370        assert!(schema_json.contains("\"per-file-ignores\""));
1371    }
1372}
1373
1374#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1375pub enum ConfigSource {
1376    Default,
1377    RumdlToml,
1378    PyprojectToml,
1379    Cli,
1380    /// Value was loaded from a markdownlint config file (e.g. .markdownlint.json, .markdownlint.yaml)
1381    Markdownlint,
1382}
1383
1384#[derive(Debug, Clone)]
1385pub struct ConfigOverride<T> {
1386    pub value: T,
1387    pub source: ConfigSource,
1388    pub file: Option<String>,
1389    pub line: Option<usize>,
1390}
1391
1392#[derive(Debug, Clone)]
1393pub struct SourcedValue<T> {
1394    pub value: T,
1395    pub source: ConfigSource,
1396    pub overrides: Vec<ConfigOverride<T>>,
1397}
1398
1399impl<T: Clone> SourcedValue<T> {
1400    pub fn new(value: T, source: ConfigSource) -> Self {
1401        Self {
1402            value: value.clone(),
1403            source,
1404            overrides: vec![ConfigOverride {
1405                value,
1406                source,
1407                file: None,
1408                line: None,
1409            }],
1410        }
1411    }
1412
1413    /// Merges a new override into this SourcedValue based on source precedence.
1414    /// If the new source has higher or equal precedence, the value and source are updated,
1415    /// and the new override is added to the history.
1416    pub fn merge_override(
1417        &mut self,
1418        new_value: T,
1419        new_source: ConfigSource,
1420        new_file: Option<String>,
1421        new_line: Option<usize>,
1422    ) {
1423        // Helper function to get precedence, defined locally or globally
1424        fn source_precedence(src: ConfigSource) -> u8 {
1425            match src {
1426                ConfigSource::Default => 0,
1427                ConfigSource::PyprojectToml => 1,
1428                ConfigSource::Markdownlint => 2,
1429                ConfigSource::RumdlToml => 3,
1430                ConfigSource::Cli => 4,
1431            }
1432        }
1433
1434        if source_precedence(new_source) >= source_precedence(self.source) {
1435            self.value = new_value.clone();
1436            self.source = new_source;
1437            self.overrides.push(ConfigOverride {
1438                value: new_value,
1439                source: new_source,
1440                file: new_file,
1441                line: new_line,
1442            });
1443        }
1444    }
1445
1446    pub fn push_override(&mut self, value: T, source: ConfigSource, file: Option<String>, line: Option<usize>) {
1447        // This is essentially merge_override without the precedence check
1448        // We might consolidate these later, but keep separate for now during refactor
1449        self.value = value.clone();
1450        self.source = source;
1451        self.overrides.push(ConfigOverride {
1452            value,
1453            source,
1454            file,
1455            line,
1456        });
1457    }
1458}
1459
1460#[derive(Debug, Clone)]
1461pub struct SourcedGlobalConfig {
1462    pub enable: SourcedValue<Vec<String>>,
1463    pub disable: SourcedValue<Vec<String>>,
1464    pub exclude: SourcedValue<Vec<String>>,
1465    pub include: SourcedValue<Vec<String>>,
1466    pub respect_gitignore: SourcedValue<bool>,
1467    pub line_length: SourcedValue<u64>,
1468    pub output_format: Option<SourcedValue<String>>,
1469    pub fixable: SourcedValue<Vec<String>>,
1470    pub unfixable: SourcedValue<Vec<String>>,
1471    pub flavor: SourcedValue<MarkdownFlavor>,
1472    pub force_exclude: SourcedValue<bool>,
1473}
1474
1475impl Default for SourcedGlobalConfig {
1476    fn default() -> Self {
1477        SourcedGlobalConfig {
1478            enable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1479            disable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1480            exclude: SourcedValue::new(Vec::new(), ConfigSource::Default),
1481            include: SourcedValue::new(Vec::new(), ConfigSource::Default),
1482            respect_gitignore: SourcedValue::new(true, ConfigSource::Default),
1483            line_length: SourcedValue::new(80, ConfigSource::Default),
1484            output_format: None,
1485            fixable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1486            unfixable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1487            flavor: SourcedValue::new(MarkdownFlavor::default(), ConfigSource::Default),
1488            force_exclude: SourcedValue::new(false, ConfigSource::Default),
1489        }
1490    }
1491}
1492
1493#[derive(Debug, Default, Clone)]
1494pub struct SourcedRuleConfig {
1495    pub values: BTreeMap<String, SourcedValue<toml::Value>>,
1496}
1497
1498/// Represents configuration loaded from a single source file, with provenance.
1499/// Used as an intermediate step before merging into the final SourcedConfig.
1500#[derive(Debug, Clone)]
1501pub struct SourcedConfigFragment {
1502    pub global: SourcedGlobalConfig,
1503    pub per_file_ignores: SourcedValue<HashMap<String, Vec<String>>>,
1504    pub rules: BTreeMap<String, SourcedRuleConfig>,
1505    pub unknown_keys: Vec<(String, String, Option<String>)>, // (section, key, file_path)
1506                                                             // Note: loaded_files is tracked globally in SourcedConfig.
1507}
1508
1509impl Default for SourcedConfigFragment {
1510    fn default() -> Self {
1511        Self {
1512            global: SourcedGlobalConfig::default(),
1513            per_file_ignores: SourcedValue::new(HashMap::new(), ConfigSource::Default),
1514            rules: BTreeMap::new(),
1515            unknown_keys: Vec::new(),
1516        }
1517    }
1518}
1519
1520#[derive(Debug, Clone)]
1521pub struct SourcedConfig {
1522    pub global: SourcedGlobalConfig,
1523    pub per_file_ignores: SourcedValue<HashMap<String, Vec<String>>>,
1524    pub rules: BTreeMap<String, SourcedRuleConfig>,
1525    pub loaded_files: Vec<String>,
1526    pub unknown_keys: Vec<(String, String, Option<String>)>, // (section, key, file_path)
1527}
1528
1529impl Default for SourcedConfig {
1530    fn default() -> Self {
1531        Self {
1532            global: SourcedGlobalConfig::default(),
1533            per_file_ignores: SourcedValue::new(HashMap::new(), ConfigSource::Default),
1534            rules: BTreeMap::new(),
1535            loaded_files: Vec::new(),
1536            unknown_keys: Vec::new(),
1537        }
1538    }
1539}
1540
1541impl SourcedConfig {
1542    /// Merges another SourcedConfigFragment into this SourcedConfig.
1543    /// Uses source precedence to determine which values take effect.
1544    fn merge(&mut self, fragment: SourcedConfigFragment) {
1545        // Merge global config
1546        self.global.enable.merge_override(
1547            fragment.global.enable.value,
1548            fragment.global.enable.source,
1549            fragment.global.enable.overrides.first().and_then(|o| o.file.clone()),
1550            fragment.global.enable.overrides.first().and_then(|o| o.line),
1551        );
1552        self.global.disable.merge_override(
1553            fragment.global.disable.value,
1554            fragment.global.disable.source,
1555            fragment.global.disable.overrides.first().and_then(|o| o.file.clone()),
1556            fragment.global.disable.overrides.first().and_then(|o| o.line),
1557        );
1558        self.global.include.merge_override(
1559            fragment.global.include.value,
1560            fragment.global.include.source,
1561            fragment.global.include.overrides.first().and_then(|o| o.file.clone()),
1562            fragment.global.include.overrides.first().and_then(|o| o.line),
1563        );
1564        self.global.exclude.merge_override(
1565            fragment.global.exclude.value,
1566            fragment.global.exclude.source,
1567            fragment.global.exclude.overrides.first().and_then(|o| o.file.clone()),
1568            fragment.global.exclude.overrides.first().and_then(|o| o.line),
1569        );
1570        self.global.respect_gitignore.merge_override(
1571            fragment.global.respect_gitignore.value,
1572            fragment.global.respect_gitignore.source,
1573            fragment
1574                .global
1575                .respect_gitignore
1576                .overrides
1577                .first()
1578                .and_then(|o| o.file.clone()),
1579            fragment.global.respect_gitignore.overrides.first().and_then(|o| o.line),
1580        );
1581        self.global.line_length.merge_override(
1582            fragment.global.line_length.value,
1583            fragment.global.line_length.source,
1584            fragment
1585                .global
1586                .line_length
1587                .overrides
1588                .first()
1589                .and_then(|o| o.file.clone()),
1590            fragment.global.line_length.overrides.first().and_then(|o| o.line),
1591        );
1592        self.global.fixable.merge_override(
1593            fragment.global.fixable.value,
1594            fragment.global.fixable.source,
1595            fragment.global.fixable.overrides.first().and_then(|o| o.file.clone()),
1596            fragment.global.fixable.overrides.first().and_then(|o| o.line),
1597        );
1598        self.global.unfixable.merge_override(
1599            fragment.global.unfixable.value,
1600            fragment.global.unfixable.source,
1601            fragment.global.unfixable.overrides.first().and_then(|o| o.file.clone()),
1602            fragment.global.unfixable.overrides.first().and_then(|o| o.line),
1603        );
1604
1605        // Merge flavor
1606        self.global.flavor.merge_override(
1607            fragment.global.flavor.value,
1608            fragment.global.flavor.source,
1609            fragment.global.flavor.overrides.first().and_then(|o| o.file.clone()),
1610            fragment.global.flavor.overrides.first().and_then(|o| o.line),
1611        );
1612
1613        // Merge force_exclude
1614        self.global.force_exclude.merge_override(
1615            fragment.global.force_exclude.value,
1616            fragment.global.force_exclude.source,
1617            fragment
1618                .global
1619                .force_exclude
1620                .overrides
1621                .first()
1622                .and_then(|o| o.file.clone()),
1623            fragment.global.force_exclude.overrides.first().and_then(|o| o.line),
1624        );
1625
1626        // Merge output_format if present
1627        if let Some(output_format_fragment) = fragment.global.output_format {
1628            if let Some(ref mut output_format) = self.global.output_format {
1629                output_format.merge_override(
1630                    output_format_fragment.value,
1631                    output_format_fragment.source,
1632                    output_format_fragment.overrides.first().and_then(|o| o.file.clone()),
1633                    output_format_fragment.overrides.first().and_then(|o| o.line),
1634                );
1635            } else {
1636                self.global.output_format = Some(output_format_fragment);
1637            }
1638        }
1639
1640        // Merge per_file_ignores
1641        self.per_file_ignores.merge_override(
1642            fragment.per_file_ignores.value,
1643            fragment.per_file_ignores.source,
1644            fragment.per_file_ignores.overrides.first().and_then(|o| o.file.clone()),
1645            fragment.per_file_ignores.overrides.first().and_then(|o| o.line),
1646        );
1647
1648        // Merge rule configs
1649        for (rule_name, rule_fragment) in fragment.rules {
1650            let norm_rule_name = rule_name.to_ascii_uppercase(); // Normalize to uppercase for case-insensitivity
1651            let rule_entry = self.rules.entry(norm_rule_name).or_default();
1652            for (key, sourced_value_fragment) in rule_fragment.values {
1653                let sv_entry = rule_entry
1654                    .values
1655                    .entry(key.clone())
1656                    .or_insert_with(|| SourcedValue::new(sourced_value_fragment.value.clone(), ConfigSource::Default));
1657                let file_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.file.clone());
1658                let line_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.line);
1659                sv_entry.merge_override(
1660                    sourced_value_fragment.value,  // Use the value from the fragment
1661                    sourced_value_fragment.source, // Use the source from the fragment
1662                    file_from_fragment,            // Pass the file path from the fragment override
1663                    line_from_fragment,            // Pass the line number from the fragment override
1664                );
1665            }
1666        }
1667
1668        // Merge unknown_keys from fragment
1669        for (section, key, file_path) in fragment.unknown_keys {
1670            // Deduplicate: only add if not already present
1671            if !self.unknown_keys.iter().any(|(s, k, _)| s == &section && k == &key) {
1672                self.unknown_keys.push((section, key, file_path));
1673            }
1674        }
1675    }
1676
1677    /// Load and merge configurations from files and CLI overrides.
1678    pub fn load(config_path: Option<&str>, cli_overrides: Option<&SourcedGlobalConfig>) -> Result<Self, ConfigError> {
1679        Self::load_with_discovery(config_path, cli_overrides, false)
1680    }
1681
1682    /// Discover configuration file by traversing up the directory tree.
1683    /// Returns the first configuration file found.
1684    fn discover_config_upward() -> Option<std::path::PathBuf> {
1685        use std::env;
1686
1687        const CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", "pyproject.toml"];
1688        const MAX_DEPTH: usize = 100; // Prevent infinite traversal
1689
1690        let start_dir = match env::current_dir() {
1691            Ok(dir) => dir,
1692            Err(e) => {
1693                log::debug!("[rumdl-config] Failed to get current directory: {e}");
1694                return None;
1695            }
1696        };
1697
1698        let mut current_dir = start_dir.clone();
1699        let mut depth = 0;
1700
1701        loop {
1702            if depth >= MAX_DEPTH {
1703                log::debug!("[rumdl-config] Maximum traversal depth reached");
1704                break;
1705            }
1706
1707            log::debug!("[rumdl-config] Searching for config in: {}", current_dir.display());
1708
1709            // Check for config files in order of precedence
1710            for config_name in CONFIG_FILES {
1711                let config_path = current_dir.join(config_name);
1712
1713                if config_path.exists() {
1714                    // For pyproject.toml, verify it contains [tool.rumdl] section
1715                    if *config_name == "pyproject.toml" {
1716                        if let Ok(content) = std::fs::read_to_string(&config_path) {
1717                            if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
1718                                log::debug!("[rumdl-config] Found config file: {}", config_path.display());
1719                                return Some(config_path);
1720                            }
1721                            log::debug!("[rumdl-config] Found pyproject.toml but no [tool.rumdl] section");
1722                            continue;
1723                        }
1724                    } else {
1725                        log::debug!("[rumdl-config] Found config file: {}", config_path.display());
1726                        return Some(config_path);
1727                    }
1728                }
1729            }
1730
1731            // Check for .git directory (stop boundary)
1732            if current_dir.join(".git").exists() {
1733                log::debug!("[rumdl-config] Stopping at .git directory");
1734                break;
1735            }
1736
1737            // Move to parent directory
1738            match current_dir.parent() {
1739                Some(parent) => {
1740                    current_dir = parent.to_owned();
1741                    depth += 1;
1742                }
1743                None => {
1744                    log::debug!("[rumdl-config] Reached filesystem root");
1745                    break;
1746                }
1747            }
1748        }
1749
1750        None
1751    }
1752
1753    /// Internal implementation that accepts config directory for testing
1754    fn user_configuration_path_impl(config_dir: &Path) -> Option<std::path::PathBuf> {
1755        let config_dir = config_dir.join("rumdl");
1756
1757        // Check for config files in precedence order (same as project discovery)
1758        const USER_CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", "pyproject.toml"];
1759
1760        log::debug!(
1761            "[rumdl-config] Checking for user configuration in: {}",
1762            config_dir.display()
1763        );
1764
1765        for filename in USER_CONFIG_FILES {
1766            let config_path = config_dir.join(filename);
1767
1768            if config_path.exists() {
1769                // For pyproject.toml, verify it contains [tool.rumdl] section
1770                if *filename == "pyproject.toml" {
1771                    if let Ok(content) = std::fs::read_to_string(&config_path) {
1772                        if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
1773                            log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
1774                            return Some(config_path);
1775                        }
1776                        log::debug!("[rumdl-config] Found user pyproject.toml but no [tool.rumdl] section");
1777                        continue;
1778                    }
1779                } else {
1780                    log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
1781                    return Some(config_path);
1782                }
1783            }
1784        }
1785
1786        log::debug!(
1787            "[rumdl-config] No user configuration found in: {}",
1788            config_dir.display()
1789        );
1790        None
1791    }
1792
1793    /// Discover user-level configuration file from platform-specific config directory.
1794    /// Returns the first configuration file found in the user config directory.
1795    fn user_configuration_path() -> Option<std::path::PathBuf> {
1796        use etcetera::{BaseStrategy, choose_base_strategy};
1797
1798        match choose_base_strategy() {
1799            Ok(strategy) => {
1800                let config_dir = strategy.config_dir();
1801                Self::user_configuration_path_impl(&config_dir)
1802            }
1803            Err(e) => {
1804                log::debug!("[rumdl-config] Failed to determine user config directory: {e}");
1805                None
1806            }
1807        }
1808    }
1809
1810    /// Internal implementation that accepts user config directory for testing
1811    #[doc(hidden)]
1812    pub fn load_with_discovery_impl(
1813        config_path: Option<&str>,
1814        cli_overrides: Option<&SourcedGlobalConfig>,
1815        skip_auto_discovery: bool,
1816        user_config_dir: Option<&Path>,
1817    ) -> Result<Self, ConfigError> {
1818        use std::env;
1819        log::debug!("[rumdl-config] Current working directory: {:?}", env::current_dir());
1820        if config_path.is_none() {
1821            if skip_auto_discovery {
1822                log::debug!("[rumdl-config] Skipping auto-discovery due to --no-config flag");
1823            } else {
1824                log::debug!("[rumdl-config] No explicit config_path provided, will search default locations");
1825            }
1826        } else {
1827            log::debug!("[rumdl-config] Explicit config_path provided: {config_path:?}");
1828        }
1829        let mut sourced_config = SourcedConfig::default();
1830
1831        // 1. Load explicit config path if provided
1832        if let Some(path) = config_path {
1833            let path_obj = Path::new(path);
1834            let filename = path_obj.file_name().and_then(|name| name.to_str()).unwrap_or("");
1835            log::debug!("[rumdl-config] Trying to load config file: {filename}");
1836            let path_str = path.to_string();
1837
1838            // Known markdownlint config files
1839            const MARKDOWNLINT_FILENAMES: &[&str] = &[".markdownlint.json", ".markdownlint.yaml", ".markdownlint.yml"];
1840
1841            if filename == "pyproject.toml" || filename == ".rumdl.toml" || filename == "rumdl.toml" {
1842                let content = std::fs::read_to_string(path).map_err(|e| ConfigError::IoError {
1843                    source: e,
1844                    path: path_str.clone(),
1845                })?;
1846                if filename == "pyproject.toml" {
1847                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
1848                        sourced_config.merge(fragment);
1849                        sourced_config.loaded_files.push(path_str.clone());
1850                    }
1851                } else {
1852                    let fragment = parse_rumdl_toml(&content, &path_str)?;
1853                    sourced_config.merge(fragment);
1854                    sourced_config.loaded_files.push(path_str.clone());
1855                }
1856            } else if MARKDOWNLINT_FILENAMES.contains(&filename)
1857                || path_str.ends_with(".json")
1858                || path_str.ends_with(".jsonc")
1859                || path_str.ends_with(".yaml")
1860                || path_str.ends_with(".yml")
1861            {
1862                // Parse as markdownlint config (JSON/YAML)
1863                let fragment = load_from_markdownlint(&path_str)?;
1864                sourced_config.merge(fragment);
1865                sourced_config.loaded_files.push(path_str.clone());
1866                // markdownlint is fallback only
1867            } else {
1868                // Try TOML only
1869                let content = std::fs::read_to_string(path).map_err(|e| ConfigError::IoError {
1870                    source: e,
1871                    path: path_str.clone(),
1872                })?;
1873                let fragment = parse_rumdl_toml(&content, &path_str)?;
1874                sourced_config.merge(fragment);
1875                sourced_config.loaded_files.push(path_str.clone());
1876            }
1877        }
1878
1879        // Only perform auto-discovery if not skipped AND no explicit config path provided
1880        if !skip_auto_discovery && config_path.is_none() {
1881            // Step 1: Load user configuration first (as a base)
1882            let user_config_path = if let Some(dir) = user_config_dir {
1883                Self::user_configuration_path_impl(dir)
1884            } else {
1885                Self::user_configuration_path()
1886            };
1887
1888            if let Some(user_config_path) = user_config_path {
1889                let path_str = user_config_path.display().to_string();
1890                let filename = user_config_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1891
1892                log::debug!("[rumdl-config] Loading user configuration file: {path_str}");
1893
1894                if filename == "pyproject.toml" {
1895                    let content = std::fs::read_to_string(&user_config_path).map_err(|e| ConfigError::IoError {
1896                        source: e,
1897                        path: path_str.clone(),
1898                    })?;
1899                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
1900                        sourced_config.merge(fragment);
1901                        sourced_config.loaded_files.push(path_str);
1902                    }
1903                } else {
1904                    let content = std::fs::read_to_string(&user_config_path).map_err(|e| ConfigError::IoError {
1905                        source: e,
1906                        path: path_str.clone(),
1907                    })?;
1908                    let fragment = parse_rumdl_toml(&content, &path_str)?;
1909                    sourced_config.merge(fragment);
1910                    sourced_config.loaded_files.push(path_str);
1911                }
1912            } else {
1913                log::debug!("[rumdl-config] No user configuration file found");
1914            }
1915
1916            // Step 2: Look for project configuration files (override user config)
1917            if let Some(config_file) = Self::discover_config_upward() {
1918                let path_str = config_file.display().to_string();
1919                let filename = config_file.file_name().and_then(|n| n.to_str()).unwrap_or("");
1920
1921                log::debug!("[rumdl-config] Loading discovered config file: {path_str}");
1922
1923                if filename == "pyproject.toml" {
1924                    let content = std::fs::read_to_string(&config_file).map_err(|e| ConfigError::IoError {
1925                        source: e,
1926                        path: path_str.clone(),
1927                    })?;
1928                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
1929                        sourced_config.merge(fragment);
1930                        sourced_config.loaded_files.push(path_str);
1931                    }
1932                } else if filename == ".rumdl.toml" || filename == "rumdl.toml" {
1933                    let content = std::fs::read_to_string(&config_file).map_err(|e| ConfigError::IoError {
1934                        source: e,
1935                        path: path_str.clone(),
1936                    })?;
1937                    let fragment = parse_rumdl_toml(&content, &path_str)?;
1938                    sourced_config.merge(fragment);
1939                    sourced_config.loaded_files.push(path_str);
1940                }
1941            } else {
1942                log::debug!("[rumdl-config] No configuration file found via upward traversal");
1943
1944                // Step 3: If no project config found, fallback to markdownlint config in current directory
1945                let mut found_markdownlint = false;
1946                for filename in MARKDOWNLINT_CONFIG_FILES {
1947                    if std::path::Path::new(filename).exists() {
1948                        match load_from_markdownlint(filename) {
1949                            Ok(fragment) => {
1950                                sourced_config.merge(fragment);
1951                                sourced_config.loaded_files.push(filename.to_string());
1952                                found_markdownlint = true;
1953                                break; // Load only the first one found
1954                            }
1955                            Err(_e) => {
1956                                // Log error but continue (it's just a fallback)
1957                            }
1958                        }
1959                    }
1960                }
1961
1962                if !found_markdownlint {
1963                    log::debug!("[rumdl-config] No markdownlint configuration file found");
1964                }
1965            }
1966        }
1967
1968        // 5. Apply CLI overrides (highest precedence)
1969        if let Some(cli) = cli_overrides {
1970            sourced_config
1971                .global
1972                .enable
1973                .merge_override(cli.enable.value.clone(), ConfigSource::Cli, None, None);
1974            sourced_config
1975                .global
1976                .disable
1977                .merge_override(cli.disable.value.clone(), ConfigSource::Cli, None, None);
1978            sourced_config
1979                .global
1980                .exclude
1981                .merge_override(cli.exclude.value.clone(), ConfigSource::Cli, None, None);
1982            sourced_config
1983                .global
1984                .include
1985                .merge_override(cli.include.value.clone(), ConfigSource::Cli, None, None);
1986            sourced_config.global.respect_gitignore.merge_override(
1987                cli.respect_gitignore.value,
1988                ConfigSource::Cli,
1989                None,
1990                None,
1991            );
1992            sourced_config
1993                .global
1994                .fixable
1995                .merge_override(cli.fixable.value.clone(), ConfigSource::Cli, None, None);
1996            sourced_config
1997                .global
1998                .unfixable
1999                .merge_override(cli.unfixable.value.clone(), ConfigSource::Cli, None, None);
2000            // No rule-specific CLI overrides implemented yet
2001        }
2002
2003        // Unknown keys are now collected during parsing and validated via validate_config_sourced()
2004
2005        Ok(sourced_config)
2006    }
2007
2008    /// Load and merge configurations from files and CLI overrides.
2009    /// If skip_auto_discovery is true, only explicit config paths are loaded.
2010    pub fn load_with_discovery(
2011        config_path: Option<&str>,
2012        cli_overrides: Option<&SourcedGlobalConfig>,
2013        skip_auto_discovery: bool,
2014    ) -> Result<Self, ConfigError> {
2015        Self::load_with_discovery_impl(config_path, cli_overrides, skip_auto_discovery, None)
2016    }
2017}
2018
2019impl From<SourcedConfig> for Config {
2020    fn from(sourced: SourcedConfig) -> Self {
2021        let mut rules = BTreeMap::new();
2022        for (rule_name, sourced_rule_cfg) in sourced.rules {
2023            // Normalize rule name to uppercase for case-insensitive lookup
2024            let normalized_rule_name = rule_name.to_ascii_uppercase();
2025            let mut values = BTreeMap::new();
2026            for (key, sourced_val) in sourced_rule_cfg.values {
2027                values.insert(key, sourced_val.value);
2028            }
2029            rules.insert(normalized_rule_name, RuleConfig { values });
2030        }
2031        #[allow(deprecated)]
2032        let global = GlobalConfig {
2033            enable: sourced.global.enable.value,
2034            disable: sourced.global.disable.value,
2035            exclude: sourced.global.exclude.value,
2036            include: sourced.global.include.value,
2037            respect_gitignore: sourced.global.respect_gitignore.value,
2038            line_length: sourced.global.line_length.value,
2039            output_format: sourced.global.output_format.as_ref().map(|v| v.value.clone()),
2040            fixable: sourced.global.fixable.value,
2041            unfixable: sourced.global.unfixable.value,
2042            flavor: sourced.global.flavor.value,
2043            force_exclude: sourced.global.force_exclude.value,
2044        };
2045        Config {
2046            global,
2047            per_file_ignores: sourced.per_file_ignores.value,
2048            rules,
2049        }
2050    }
2051}
2052
2053/// Registry of all known rules and their config schemas
2054pub struct RuleRegistry {
2055    /// Map of rule name (e.g. "MD013") to set of valid config keys and their TOML value types
2056    pub rule_schemas: std::collections::BTreeMap<String, toml::map::Map<String, toml::Value>>,
2057    /// Map of rule name to config key aliases
2058    pub rule_aliases: std::collections::BTreeMap<String, std::collections::HashMap<String, String>>,
2059}
2060
2061impl RuleRegistry {
2062    /// Build a registry from a list of rules
2063    pub fn from_rules(rules: &[Box<dyn Rule>]) -> Self {
2064        let mut rule_schemas = std::collections::BTreeMap::new();
2065        let mut rule_aliases = std::collections::BTreeMap::new();
2066
2067        for rule in rules {
2068            let norm_name = if let Some((name, toml::Value::Table(table))) = rule.default_config_section() {
2069                let norm_name = normalize_key(&name); // Normalize the name from default_config_section
2070                rule_schemas.insert(norm_name.clone(), table);
2071                norm_name
2072            } else {
2073                let norm_name = normalize_key(rule.name()); // Normalize the name from rule.name()
2074                rule_schemas.insert(norm_name.clone(), toml::map::Map::new());
2075                norm_name
2076            };
2077
2078            // Store aliases if the rule provides them
2079            if let Some(aliases) = rule.config_aliases() {
2080                rule_aliases.insert(norm_name, aliases);
2081            }
2082        }
2083
2084        RuleRegistry {
2085            rule_schemas,
2086            rule_aliases,
2087        }
2088    }
2089
2090    /// Get all known rule names
2091    pub fn rule_names(&self) -> std::collections::BTreeSet<String> {
2092        self.rule_schemas.keys().cloned().collect()
2093    }
2094
2095    /// Get the valid configuration keys for a rule, including both original and normalized variants
2096    pub fn config_keys_for(&self, rule: &str) -> Option<std::collections::BTreeSet<String>> {
2097        self.rule_schemas.get(rule).map(|schema| {
2098            let mut all_keys = std::collections::BTreeSet::new();
2099
2100            // Add original keys from schema
2101            for key in schema.keys() {
2102                all_keys.insert(key.clone());
2103            }
2104
2105            // Add normalized variants for markdownlint compatibility
2106            for key in schema.keys() {
2107                // Add kebab-case variant
2108                all_keys.insert(key.replace('_', "-"));
2109                // Add snake_case variant
2110                all_keys.insert(key.replace('-', "_"));
2111                // Add normalized variant
2112                all_keys.insert(normalize_key(key));
2113            }
2114
2115            // Add any aliases defined by the rule
2116            if let Some(aliases) = self.rule_aliases.get(rule) {
2117                for alias_key in aliases.keys() {
2118                    all_keys.insert(alias_key.clone());
2119                    // Also add normalized variants of the alias
2120                    all_keys.insert(alias_key.replace('_', "-"));
2121                    all_keys.insert(alias_key.replace('-', "_"));
2122                    all_keys.insert(normalize_key(alias_key));
2123                }
2124            }
2125
2126            all_keys
2127        })
2128    }
2129
2130    /// Get the expected value type for a rule's configuration key, trying variants
2131    pub fn expected_value_for(&self, rule: &str, key: &str) -> Option<&toml::Value> {
2132        if let Some(schema) = self.rule_schemas.get(rule) {
2133            // Check if this key is an alias
2134            if let Some(aliases) = self.rule_aliases.get(rule)
2135                && let Some(canonical_key) = aliases.get(key)
2136            {
2137                // Use the canonical key for schema lookup
2138                if let Some(value) = schema.get(canonical_key) {
2139                    return Some(value);
2140                }
2141            }
2142
2143            // Try the original key
2144            if let Some(value) = schema.get(key) {
2145                return Some(value);
2146            }
2147
2148            // Try key variants
2149            let key_variants = [
2150                key.replace('-', "_"), // Convert kebab-case to snake_case
2151                key.replace('_', "-"), // Convert snake_case to kebab-case
2152                normalize_key(key),    // Normalized key (lowercase, kebab-case)
2153            ];
2154
2155            for variant in &key_variants {
2156                if let Some(value) = schema.get(variant) {
2157                    return Some(value);
2158                }
2159            }
2160        }
2161        None
2162    }
2163}
2164
2165/// Represents a config validation warning or error
2166#[derive(Debug, Clone)]
2167pub struct ConfigValidationWarning {
2168    pub message: String,
2169    pub rule: Option<String>,
2170    pub key: Option<String>,
2171}
2172
2173/// Validate a loaded config against the rule registry, using SourcedConfig for unknown key tracking
2174pub fn validate_config_sourced(sourced: &SourcedConfig, registry: &RuleRegistry) -> Vec<ConfigValidationWarning> {
2175    let mut warnings = Vec::new();
2176    let known_rules = registry.rule_names();
2177    // 1. Unknown rules
2178    for rule in sourced.rules.keys() {
2179        if !known_rules.contains(rule) {
2180            warnings.push(ConfigValidationWarning {
2181                message: format!("Unknown rule in config: {rule}"),
2182                rule: Some(rule.clone()),
2183                key: None,
2184            });
2185        }
2186    }
2187    // 2. Unknown options and type mismatches
2188    for (rule, rule_cfg) in &sourced.rules {
2189        if let Some(valid_keys) = registry.config_keys_for(rule) {
2190            for key in rule_cfg.values.keys() {
2191                if !valid_keys.contains(key) {
2192                    let valid_keys_vec: Vec<String> = valid_keys.iter().cloned().collect();
2193                    let message = if let Some(suggestion) = suggest_similar_key(key, &valid_keys_vec) {
2194                        format!("Unknown option for rule {rule}: {key} (did you mean: {suggestion}?)")
2195                    } else {
2196                        format!("Unknown option for rule {rule}: {key}")
2197                    };
2198                    warnings.push(ConfigValidationWarning {
2199                        message,
2200                        rule: Some(rule.clone()),
2201                        key: Some(key.clone()),
2202                    });
2203                } else {
2204                    // Type check: compare type of value to type of default
2205                    if let Some(expected) = registry.expected_value_for(rule, key) {
2206                        let actual = &rule_cfg.values[key].value;
2207                        if !toml_value_type_matches(expected, actual) {
2208                            warnings.push(ConfigValidationWarning {
2209                                message: format!(
2210                                    "Type mismatch for {}.{}: expected {}, got {}",
2211                                    rule,
2212                                    key,
2213                                    toml_type_name(expected),
2214                                    toml_type_name(actual)
2215                                ),
2216                                rule: Some(rule.clone()),
2217                                key: Some(key.clone()),
2218                            });
2219                        }
2220                    }
2221                }
2222            }
2223        }
2224    }
2225    // 3. Unknown global options (from unknown_keys)
2226    let known_global_keys = vec![
2227        "enable".to_string(),
2228        "disable".to_string(),
2229        "include".to_string(),
2230        "exclude".to_string(),
2231        "respect-gitignore".to_string(),
2232        "line-length".to_string(),
2233        "fixable".to_string(),
2234        "unfixable".to_string(),
2235        "flavor".to_string(),
2236        "force-exclude".to_string(),
2237        "output-format".to_string(),
2238    ];
2239
2240    for (section, key, file_path) in &sourced.unknown_keys {
2241        if section.contains("[global]") || section.contains("[tool.rumdl]") {
2242            let message = if let Some(suggestion) = suggest_similar_key(key, &known_global_keys) {
2243                if let Some(path) = file_path {
2244                    format!("Unknown global option in {path}: {key} (did you mean: {suggestion}?)")
2245                } else {
2246                    format!("Unknown global option: {key} (did you mean: {suggestion}?)")
2247                }
2248            } else if let Some(path) = file_path {
2249                format!("Unknown global option in {path}: {key}")
2250            } else {
2251                format!("Unknown global option: {key}")
2252            };
2253            warnings.push(ConfigValidationWarning {
2254                message,
2255                rule: None,
2256                key: Some(key.clone()),
2257            });
2258        } else if !key.is_empty() {
2259            // This is an unknown rule section (key is empty means it's a section header)
2260            // No suggestions for rule names - just warn
2261            continue;
2262        } else {
2263            // Unknown rule section
2264            let message = if let Some(path) = file_path {
2265                format!(
2266                    "Unknown rule in {path}: {}",
2267                    section.trim_matches(|c| c == '[' || c == ']')
2268                )
2269            } else {
2270                format!(
2271                    "Unknown rule in config: {}",
2272                    section.trim_matches(|c| c == '[' || c == ']')
2273                )
2274            };
2275            warnings.push(ConfigValidationWarning {
2276                message,
2277                rule: None,
2278                key: None,
2279            });
2280        }
2281    }
2282    warnings
2283}
2284
2285fn toml_type_name(val: &toml::Value) -> &'static str {
2286    match val {
2287        toml::Value::String(_) => "string",
2288        toml::Value::Integer(_) => "integer",
2289        toml::Value::Float(_) => "float",
2290        toml::Value::Boolean(_) => "boolean",
2291        toml::Value::Array(_) => "array",
2292        toml::Value::Table(_) => "table",
2293        toml::Value::Datetime(_) => "datetime",
2294    }
2295}
2296
2297/// Calculate Levenshtein distance between two strings (simple implementation)
2298fn levenshtein_distance(s1: &str, s2: &str) -> usize {
2299    let len1 = s1.len();
2300    let len2 = s2.len();
2301
2302    if len1 == 0 {
2303        return len2;
2304    }
2305    if len2 == 0 {
2306        return len1;
2307    }
2308
2309    let s1_chars: Vec<char> = s1.chars().collect();
2310    let s2_chars: Vec<char> = s2.chars().collect();
2311
2312    let mut prev_row: Vec<usize> = (0..=len2).collect();
2313    let mut curr_row = vec![0; len2 + 1];
2314
2315    for i in 1..=len1 {
2316        curr_row[0] = i;
2317        for j in 1..=len2 {
2318            let cost = if s1_chars[i - 1] == s2_chars[j - 1] { 0 } else { 1 };
2319            curr_row[j] = (prev_row[j] + 1)          // deletion
2320                .min(curr_row[j - 1] + 1)            // insertion
2321                .min(prev_row[j - 1] + cost); // substitution
2322        }
2323        std::mem::swap(&mut prev_row, &mut curr_row);
2324    }
2325
2326    prev_row[len2]
2327}
2328
2329/// Suggest a similar key from a list of valid keys using fuzzy matching
2330fn suggest_similar_key(unknown: &str, valid_keys: &[String]) -> Option<String> {
2331    let unknown_lower = unknown.to_lowercase();
2332    let max_distance = 2.max(unknown.len() / 3); // Allow up to 2 edits or 30% of string length
2333
2334    let mut best_match: Option<(String, usize)> = None;
2335
2336    for valid in valid_keys {
2337        let valid_lower = valid.to_lowercase();
2338        let distance = levenshtein_distance(&unknown_lower, &valid_lower);
2339
2340        if distance <= max_distance {
2341            if let Some((_, best_dist)) = &best_match {
2342                if distance < *best_dist {
2343                    best_match = Some((valid.clone(), distance));
2344                }
2345            } else {
2346                best_match = Some((valid.clone(), distance));
2347            }
2348        }
2349    }
2350
2351    best_match.map(|(key, _)| key)
2352}
2353
2354fn toml_value_type_matches(expected: &toml::Value, actual: &toml::Value) -> bool {
2355    use toml::Value::*;
2356    match (expected, actual) {
2357        (String(_), String(_)) => true,
2358        (Integer(_), Integer(_)) => true,
2359        (Float(_), Float(_)) => true,
2360        (Boolean(_), Boolean(_)) => true,
2361        (Array(_), Array(_)) => true,
2362        (Table(_), Table(_)) => true,
2363        (Datetime(_), Datetime(_)) => true,
2364        // Allow integer for float
2365        (Float(_), Integer(_)) => true,
2366        _ => false,
2367    }
2368}
2369
2370/// Parses pyproject.toml content and extracts the [tool.rumdl] section if present.
2371fn parse_pyproject_toml(content: &str, path: &str) -> Result<Option<SourcedConfigFragment>, ConfigError> {
2372    let doc: toml::Value =
2373        toml::from_str(content).map_err(|e| ConfigError::ParseError(format!("{path}: Failed to parse TOML: {e}")))?;
2374    let mut fragment = SourcedConfigFragment::default();
2375    let source = ConfigSource::PyprojectToml;
2376    let file = Some(path.to_string());
2377
2378    // 1. Handle [tool.rumdl] and [tool.rumdl.global] sections
2379    if let Some(rumdl_config) = doc.get("tool").and_then(|t| t.get("rumdl"))
2380        && let Some(rumdl_table) = rumdl_config.as_table()
2381    {
2382        // Helper function to extract global config from a table
2383        let extract_global_config = |fragment: &mut SourcedConfigFragment, table: &toml::value::Table| {
2384            // Extract global options from the given table
2385            if let Some(enable) = table.get("enable")
2386                && let Ok(values) = Vec::<String>::deserialize(enable.clone())
2387            {
2388                // Normalize rule names in the list
2389                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2390                fragment
2391                    .global
2392                    .enable
2393                    .push_override(normalized_values, source, file.clone(), None);
2394            }
2395
2396            if let Some(disable) = table.get("disable")
2397                && let Ok(values) = Vec::<String>::deserialize(disable.clone())
2398            {
2399                // Re-enable normalization
2400                let normalized_values: Vec<String> = values.into_iter().map(|s| normalize_key(&s)).collect();
2401                fragment
2402                    .global
2403                    .disable
2404                    .push_override(normalized_values, source, file.clone(), None);
2405            }
2406
2407            if let Some(include) = table.get("include")
2408                && let Ok(values) = Vec::<String>::deserialize(include.clone())
2409            {
2410                fragment
2411                    .global
2412                    .include
2413                    .push_override(values, source, file.clone(), None);
2414            }
2415
2416            if let Some(exclude) = table.get("exclude")
2417                && let Ok(values) = Vec::<String>::deserialize(exclude.clone())
2418            {
2419                fragment
2420                    .global
2421                    .exclude
2422                    .push_override(values, source, file.clone(), None);
2423            }
2424
2425            if let Some(respect_gitignore) = table
2426                .get("respect-gitignore")
2427                .or_else(|| table.get("respect_gitignore"))
2428                && let Ok(value) = bool::deserialize(respect_gitignore.clone())
2429            {
2430                fragment
2431                    .global
2432                    .respect_gitignore
2433                    .push_override(value, source, file.clone(), None);
2434            }
2435
2436            if let Some(force_exclude) = table.get("force-exclude").or_else(|| table.get("force_exclude"))
2437                && let Ok(value) = bool::deserialize(force_exclude.clone())
2438            {
2439                fragment
2440                    .global
2441                    .force_exclude
2442                    .push_override(value, source, file.clone(), None);
2443            }
2444
2445            if let Some(output_format) = table.get("output-format").or_else(|| table.get("output_format"))
2446                && let Ok(value) = String::deserialize(output_format.clone())
2447            {
2448                if fragment.global.output_format.is_none() {
2449                    fragment.global.output_format = Some(SourcedValue::new(value.clone(), source));
2450                } else {
2451                    fragment
2452                        .global
2453                        .output_format
2454                        .as_mut()
2455                        .unwrap()
2456                        .push_override(value, source, file.clone(), None);
2457                }
2458            }
2459
2460            if let Some(fixable) = table.get("fixable")
2461                && let Ok(values) = Vec::<String>::deserialize(fixable.clone())
2462            {
2463                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2464                fragment
2465                    .global
2466                    .fixable
2467                    .push_override(normalized_values, source, file.clone(), None);
2468            }
2469
2470            if let Some(unfixable) = table.get("unfixable")
2471                && let Ok(values) = Vec::<String>::deserialize(unfixable.clone())
2472            {
2473                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2474                fragment
2475                    .global
2476                    .unfixable
2477                    .push_override(normalized_values, source, file.clone(), None);
2478            }
2479
2480            if let Some(flavor) = table.get("flavor")
2481                && let Ok(value) = MarkdownFlavor::deserialize(flavor.clone())
2482            {
2483                fragment.global.flavor.push_override(value, source, file.clone(), None);
2484            }
2485
2486            // Handle line-length special case - this should set the global line_length
2487            if let Some(line_length) = table.get("line-length").or_else(|| table.get("line_length"))
2488                && let Ok(value) = u64::deserialize(line_length.clone())
2489            {
2490                fragment
2491                    .global
2492                    .line_length
2493                    .push_override(value, source, file.clone(), None);
2494
2495                // Also add to MD013 rule config for backward compatibility
2496                let norm_md013_key = normalize_key("MD013");
2497                let rule_entry = fragment.rules.entry(norm_md013_key).or_default();
2498                let norm_line_length_key = normalize_key("line-length");
2499                let sv = rule_entry
2500                    .values
2501                    .entry(norm_line_length_key)
2502                    .or_insert_with(|| SourcedValue::new(line_length.clone(), ConfigSource::Default));
2503                sv.push_override(line_length.clone(), source, file.clone(), None);
2504            }
2505        };
2506
2507        // First, check for [tool.rumdl.global] section
2508        if let Some(global_table) = rumdl_table.get("global").and_then(|g| g.as_table()) {
2509            extract_global_config(&mut fragment, global_table);
2510        }
2511
2512        // Also extract global options from [tool.rumdl] directly (for flat structure)
2513        extract_global_config(&mut fragment, rumdl_table);
2514
2515        // --- Extract per-file-ignores configurations ---
2516        // Check both hyphenated and underscored versions for compatibility
2517        let per_file_ignores_key = rumdl_table
2518            .get("per-file-ignores")
2519            .or_else(|| rumdl_table.get("per_file_ignores"));
2520
2521        if let Some(per_file_ignores_value) = per_file_ignores_key
2522            && let Some(per_file_table) = per_file_ignores_value.as_table()
2523        {
2524            let mut per_file_map = HashMap::new();
2525            for (pattern, rules_value) in per_file_table {
2526                if let Ok(rules) = Vec::<String>::deserialize(rules_value.clone()) {
2527                    let normalized_rules = rules.into_iter().map(|s| normalize_key(&s)).collect();
2528                    per_file_map.insert(pattern.clone(), normalized_rules);
2529                } else {
2530                    log::warn!(
2531                        "[WARN] Expected array for per-file-ignores pattern '{pattern}' in {path}, found {rules_value:?}"
2532                    );
2533                }
2534            }
2535            fragment
2536                .per_file_ignores
2537                .push_override(per_file_map, source, file.clone(), None);
2538        }
2539
2540        // --- Extract rule-specific configurations ---
2541        for (key, value) in rumdl_table {
2542            let norm_rule_key = normalize_key(key);
2543
2544            // Skip keys already handled as global or special cases
2545            if [
2546                "enable",
2547                "disable",
2548                "include",
2549                "exclude",
2550                "respect_gitignore",
2551                "respect-gitignore", // Added kebab-case here too
2552                "force_exclude",
2553                "force-exclude",
2554                "line_length",
2555                "line-length",
2556                "output_format",
2557                "output-format",
2558                "fixable",
2559                "unfixable",
2560                "per-file-ignores",
2561                "per_file_ignores",
2562                "global",
2563            ]
2564            .contains(&norm_rule_key.as_str())
2565            {
2566                continue;
2567            }
2568
2569            // Explicitly check if the key looks like a rule name (e.g., starts with 'md')
2570            // AND if the value is actually a TOML table before processing as rule config.
2571            // This prevents misinterpreting other top-level keys under [tool.rumdl]
2572            let norm_rule_key_upper = norm_rule_key.to_ascii_uppercase();
2573            if norm_rule_key_upper.len() == 5
2574                && norm_rule_key_upper.starts_with("MD")
2575                && norm_rule_key_upper[2..].chars().all(|c| c.is_ascii_digit())
2576                && value.is_table()
2577            {
2578                if let Some(rule_config_table) = value.as_table() {
2579                    // Get the entry for this rule (e.g., "md013")
2580                    let rule_entry = fragment.rules.entry(norm_rule_key_upper).or_default();
2581                    for (rk, rv) in rule_config_table {
2582                        let norm_rk = normalize_key(rk); // Normalize the config key itself
2583
2584                        let toml_val = rv.clone();
2585
2586                        let sv = rule_entry
2587                            .values
2588                            .entry(norm_rk.clone())
2589                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), ConfigSource::Default));
2590                        sv.push_override(toml_val, source, file.clone(), None);
2591                    }
2592                }
2593            } else {
2594                // Key is not a global/special key, doesn't start with 'md', or isn't a table.
2595                // Track unknown keys under [tool.rumdl] for validation
2596                fragment
2597                    .unknown_keys
2598                    .push(("[tool.rumdl]".to_string(), key.to_string(), Some(path.to_string())));
2599            }
2600        }
2601    }
2602
2603    // 2. Handle [tool.rumdl.MDxxx] sections as rule-specific config (nested under [tool])
2604    if let Some(tool_table) = doc.get("tool").and_then(|t| t.as_table()) {
2605        for (key, value) in tool_table.iter() {
2606            if let Some(rule_name) = key.strip_prefix("rumdl.") {
2607                let norm_rule_name = normalize_key(rule_name);
2608                if norm_rule_name.len() == 5
2609                    && norm_rule_name.to_ascii_uppercase().starts_with("MD")
2610                    && norm_rule_name[2..].chars().all(|c| c.is_ascii_digit())
2611                    && let Some(rule_table) = value.as_table()
2612                {
2613                    let rule_entry = fragment.rules.entry(norm_rule_name.to_ascii_uppercase()).or_default();
2614                    for (rk, rv) in rule_table {
2615                        let norm_rk = normalize_key(rk);
2616                        let toml_val = rv.clone();
2617                        let sv = rule_entry
2618                            .values
2619                            .entry(norm_rk.clone())
2620                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), source));
2621                        sv.push_override(toml_val, source, file.clone(), None);
2622                    }
2623                } else if rule_name.to_ascii_uppercase().starts_with("MD") {
2624                    // Track unknown rule sections like [tool.rumdl.MD999]
2625                    fragment.unknown_keys.push((
2626                        format!("[tool.rumdl.{rule_name}]"),
2627                        String::new(),
2628                        Some(path.to_string()),
2629                    ));
2630                }
2631            }
2632        }
2633    }
2634
2635    // 3. Handle [tool.rumdl.MDxxx] sections as top-level keys (e.g., [tool.rumdl.MD007])
2636    if let Some(doc_table) = doc.as_table() {
2637        for (key, value) in doc_table.iter() {
2638            if let Some(rule_name) = key.strip_prefix("tool.rumdl.") {
2639                let norm_rule_name = normalize_key(rule_name);
2640                if norm_rule_name.len() == 5
2641                    && norm_rule_name.to_ascii_uppercase().starts_with("MD")
2642                    && norm_rule_name[2..].chars().all(|c| c.is_ascii_digit())
2643                    && let Some(rule_table) = value.as_table()
2644                {
2645                    let rule_entry = fragment.rules.entry(norm_rule_name.to_ascii_uppercase()).or_default();
2646                    for (rk, rv) in rule_table {
2647                        let norm_rk = normalize_key(rk);
2648                        let toml_val = rv.clone();
2649                        let sv = rule_entry
2650                            .values
2651                            .entry(norm_rk.clone())
2652                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), source));
2653                        sv.push_override(toml_val, source, file.clone(), None);
2654                    }
2655                } else if rule_name.to_ascii_uppercase().starts_with("MD") {
2656                    // Track unknown rule sections like [tool.rumdl.MD999]
2657                    fragment.unknown_keys.push((
2658                        format!("[tool.rumdl.{rule_name}]"),
2659                        String::new(),
2660                        Some(path.to_string()),
2661                    ));
2662                }
2663            }
2664        }
2665    }
2666
2667    // Only return Some(fragment) if any config was found
2668    let has_any = !fragment.global.enable.value.is_empty()
2669        || !fragment.global.disable.value.is_empty()
2670        || !fragment.global.include.value.is_empty()
2671        || !fragment.global.exclude.value.is_empty()
2672        || !fragment.global.fixable.value.is_empty()
2673        || !fragment.global.unfixable.value.is_empty()
2674        || fragment.global.output_format.is_some()
2675        || !fragment.per_file_ignores.value.is_empty()
2676        || !fragment.rules.is_empty();
2677    if has_any { Ok(Some(fragment)) } else { Ok(None) }
2678}
2679
2680/// Parses rumdl.toml / .rumdl.toml content.
2681fn parse_rumdl_toml(content: &str, path: &str) -> Result<SourcedConfigFragment, ConfigError> {
2682    let doc = content
2683        .parse::<DocumentMut>()
2684        .map_err(|e| ConfigError::ParseError(format!("{path}: Failed to parse TOML: {e}")))?;
2685    let mut fragment = SourcedConfigFragment::default();
2686    let source = ConfigSource::RumdlToml;
2687    let file = Some(path.to_string());
2688
2689    // Define known rules before the loop
2690    let all_rules = rules::all_rules(&Config::default());
2691    let registry = RuleRegistry::from_rules(&all_rules);
2692    let known_rule_names: BTreeSet<String> = registry
2693        .rule_names()
2694        .into_iter()
2695        .map(|s| s.to_ascii_uppercase())
2696        .collect();
2697
2698    // Handle [global] section
2699    if let Some(global_item) = doc.get("global")
2700        && let Some(global_table) = global_item.as_table()
2701    {
2702        for (key, value_item) in global_table.iter() {
2703            let norm_key = normalize_key(key);
2704            match norm_key.as_str() {
2705                "enable" | "disable" | "include" | "exclude" => {
2706                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
2707                        // Corrected: Iterate directly over the Formatted<Array>
2708                        let values: Vec<String> = formatted_array
2709                                .iter()
2710                                .filter_map(|item| item.as_str()) // Extract strings
2711                                .map(|s| s.to_string())
2712                                .collect();
2713
2714                        // Normalize rule names for enable/disable
2715                        let final_values = if norm_key == "enable" || norm_key == "disable" {
2716                            // Corrected: Pass &str to normalize_key
2717                            values.into_iter().map(|s| normalize_key(&s)).collect()
2718                        } else {
2719                            values
2720                        };
2721
2722                        match norm_key.as_str() {
2723                            "enable" => fragment
2724                                .global
2725                                .enable
2726                                .push_override(final_values, source, file.clone(), None),
2727                            "disable" => {
2728                                fragment
2729                                    .global
2730                                    .disable
2731                                    .push_override(final_values, source, file.clone(), None)
2732                            }
2733                            "include" => {
2734                                fragment
2735                                    .global
2736                                    .include
2737                                    .push_override(final_values, source, file.clone(), None)
2738                            }
2739                            "exclude" => {
2740                                fragment
2741                                    .global
2742                                    .exclude
2743                                    .push_override(final_values, source, file.clone(), None)
2744                            }
2745                            _ => unreachable!("Outer match guarantees only enable/disable/include/exclude"),
2746                        }
2747                    } else {
2748                        log::warn!(
2749                            "[WARN] Expected array for global key '{}' in {}, found {}",
2750                            key,
2751                            path,
2752                            value_item.type_name()
2753                        );
2754                    }
2755                }
2756                "respect_gitignore" | "respect-gitignore" => {
2757                    // Handle both cases
2758                    if let Some(toml_edit::Value::Boolean(formatted_bool)) = value_item.as_value() {
2759                        let val = *formatted_bool.value();
2760                        fragment
2761                            .global
2762                            .respect_gitignore
2763                            .push_override(val, source, file.clone(), None);
2764                    } else {
2765                        log::warn!(
2766                            "[WARN] Expected boolean for global key '{}' in {}, found {}",
2767                            key,
2768                            path,
2769                            value_item.type_name()
2770                        );
2771                    }
2772                }
2773                "force_exclude" | "force-exclude" => {
2774                    // Handle both cases
2775                    if let Some(toml_edit::Value::Boolean(formatted_bool)) = value_item.as_value() {
2776                        let val = *formatted_bool.value();
2777                        fragment
2778                            .global
2779                            .force_exclude
2780                            .push_override(val, source, file.clone(), None);
2781                    } else {
2782                        log::warn!(
2783                            "[WARN] Expected boolean for global key '{}' in {}, found {}",
2784                            key,
2785                            path,
2786                            value_item.type_name()
2787                        );
2788                    }
2789                }
2790                "line_length" | "line-length" => {
2791                    // Handle both cases
2792                    if let Some(toml_edit::Value::Integer(formatted_int)) = value_item.as_value() {
2793                        let val = *formatted_int.value() as u64;
2794                        fragment
2795                            .global
2796                            .line_length
2797                            .push_override(val, source, file.clone(), None);
2798                    } else {
2799                        log::warn!(
2800                            "[WARN] Expected integer for global key '{}' in {}, found {}",
2801                            key,
2802                            path,
2803                            value_item.type_name()
2804                        );
2805                    }
2806                }
2807                "output_format" | "output-format" => {
2808                    // Handle both cases
2809                    if let Some(toml_edit::Value::String(formatted_string)) = value_item.as_value() {
2810                        let val = formatted_string.value().clone();
2811                        if fragment.global.output_format.is_none() {
2812                            fragment.global.output_format = Some(SourcedValue::new(val.clone(), source));
2813                        } else {
2814                            fragment.global.output_format.as_mut().unwrap().push_override(
2815                                val,
2816                                source,
2817                                file.clone(),
2818                                None,
2819                            );
2820                        }
2821                    } else {
2822                        log::warn!(
2823                            "[WARN] Expected string for global key '{}' in {}, found {}",
2824                            key,
2825                            path,
2826                            value_item.type_name()
2827                        );
2828                    }
2829                }
2830                "fixable" => {
2831                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
2832                        let values: Vec<String> = formatted_array
2833                            .iter()
2834                            .filter_map(|item| item.as_str())
2835                            .map(normalize_key)
2836                            .collect();
2837                        fragment
2838                            .global
2839                            .fixable
2840                            .push_override(values, source, file.clone(), None);
2841                    } else {
2842                        log::warn!(
2843                            "[WARN] Expected array for global key '{}' in {}, found {}",
2844                            key,
2845                            path,
2846                            value_item.type_name()
2847                        );
2848                    }
2849                }
2850                "unfixable" => {
2851                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
2852                        let values: Vec<String> = formatted_array
2853                            .iter()
2854                            .filter_map(|item| item.as_str())
2855                            .map(normalize_key)
2856                            .collect();
2857                        fragment
2858                            .global
2859                            .unfixable
2860                            .push_override(values, source, file.clone(), None);
2861                    } else {
2862                        log::warn!(
2863                            "[WARN] Expected array for global key '{}' in {}, found {}",
2864                            key,
2865                            path,
2866                            value_item.type_name()
2867                        );
2868                    }
2869                }
2870                "flavor" => {
2871                    if let Some(toml_edit::Value::String(formatted_string)) = value_item.as_value() {
2872                        let val = formatted_string.value();
2873                        if let Ok(flavor) = MarkdownFlavor::from_str(val) {
2874                            fragment.global.flavor.push_override(flavor, source, file.clone(), None);
2875                        } else {
2876                            log::warn!("[WARN] Unknown markdown flavor '{val}' in {path}");
2877                        }
2878                    } else {
2879                        log::warn!(
2880                            "[WARN] Expected string for global key '{}' in {}, found {}",
2881                            key,
2882                            path,
2883                            value_item.type_name()
2884                        );
2885                    }
2886                }
2887                _ => {
2888                    // Track unknown global keys for validation
2889                    fragment
2890                        .unknown_keys
2891                        .push(("[global]".to_string(), key.to_string(), Some(path.to_string())));
2892                    log::warn!("[WARN] Unknown key in [global] section of {path}: {key}");
2893                }
2894            }
2895        }
2896    }
2897
2898    // Handle [per-file-ignores] section
2899    if let Some(per_file_item) = doc.get("per-file-ignores")
2900        && let Some(per_file_table) = per_file_item.as_table()
2901    {
2902        let mut per_file_map = HashMap::new();
2903        for (pattern, value_item) in per_file_table.iter() {
2904            if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
2905                let rules: Vec<String> = formatted_array
2906                    .iter()
2907                    .filter_map(|item| item.as_str())
2908                    .map(normalize_key)
2909                    .collect();
2910                per_file_map.insert(pattern.to_string(), rules);
2911            } else {
2912                let type_name = value_item.type_name();
2913                log::warn!(
2914                    "[WARN] Expected array for per-file-ignores pattern '{pattern}' in {path}, found {type_name}"
2915                );
2916            }
2917        }
2918        fragment
2919            .per_file_ignores
2920            .push_override(per_file_map, source, file.clone(), None);
2921    }
2922
2923    // Rule-specific: all other top-level tables
2924    for (key, item) in doc.iter() {
2925        let norm_rule_name = key.to_ascii_uppercase();
2926
2927        // Skip known special sections
2928        if key == "global" || key == "per-file-ignores" {
2929            continue;
2930        }
2931
2932        // Track unknown rule sections (like [MD999])
2933        if !known_rule_names.contains(&norm_rule_name) {
2934            // Only track if it looks like a rule section (starts with MD or is uppercase)
2935            if norm_rule_name.starts_with("MD") || key.chars().all(|c| c.is_uppercase() || c.is_numeric()) {
2936                fragment
2937                    .unknown_keys
2938                    .push((format!("[{key}]"), String::new(), Some(path.to_string())));
2939            }
2940            continue;
2941        }
2942
2943        if let Some(tbl) = item.as_table() {
2944            let rule_entry = fragment.rules.entry(norm_rule_name.clone()).or_default();
2945            for (rk, rv_item) in tbl.iter() {
2946                let norm_rk = normalize_key(rk);
2947                let maybe_toml_val: Option<toml::Value> = match rv_item.as_value() {
2948                    Some(toml_edit::Value::String(formatted)) => Some(toml::Value::String(formatted.value().clone())),
2949                    Some(toml_edit::Value::Integer(formatted)) => Some(toml::Value::Integer(*formatted.value())),
2950                    Some(toml_edit::Value::Float(formatted)) => Some(toml::Value::Float(*formatted.value())),
2951                    Some(toml_edit::Value::Boolean(formatted)) => Some(toml::Value::Boolean(*formatted.value())),
2952                    Some(toml_edit::Value::Datetime(formatted)) => Some(toml::Value::Datetime(*formatted.value())),
2953                    Some(toml_edit::Value::Array(formatted_array)) => {
2954                        // Convert toml_edit Array to toml::Value::Array
2955                        let mut values = Vec::new();
2956                        for item in formatted_array.iter() {
2957                            match item {
2958                                toml_edit::Value::String(formatted) => {
2959                                    values.push(toml::Value::String(formatted.value().clone()))
2960                                }
2961                                toml_edit::Value::Integer(formatted) => {
2962                                    values.push(toml::Value::Integer(*formatted.value()))
2963                                }
2964                                toml_edit::Value::Float(formatted) => {
2965                                    values.push(toml::Value::Float(*formatted.value()))
2966                                }
2967                                toml_edit::Value::Boolean(formatted) => {
2968                                    values.push(toml::Value::Boolean(*formatted.value()))
2969                                }
2970                                toml_edit::Value::Datetime(formatted) => {
2971                                    values.push(toml::Value::Datetime(*formatted.value()))
2972                                }
2973                                _ => {
2974                                    log::warn!(
2975                                        "[WARN] Skipping unsupported array element type in key '{norm_rule_name}.{norm_rk}' in {path}"
2976                                    );
2977                                }
2978                            }
2979                        }
2980                        Some(toml::Value::Array(values))
2981                    }
2982                    Some(toml_edit::Value::InlineTable(_)) => {
2983                        log::warn!(
2984                            "[WARN] Skipping inline table value for key '{norm_rule_name}.{norm_rk}' in {path}. Table conversion not yet fully implemented in parser."
2985                        );
2986                        None
2987                    }
2988                    None => {
2989                        log::warn!(
2990                            "[WARN] Skipping non-value item for key '{norm_rule_name}.{norm_rk}' in {path}. Expected simple value."
2991                        );
2992                        None
2993                    }
2994                };
2995                if let Some(toml_val) = maybe_toml_val {
2996                    let sv = rule_entry
2997                        .values
2998                        .entry(norm_rk.clone())
2999                        .or_insert_with(|| SourcedValue::new(toml_val.clone(), ConfigSource::Default));
3000                    sv.push_override(toml_val, source, file.clone(), None);
3001                }
3002            }
3003        } else if item.is_value() {
3004            log::warn!("[WARN] Ignoring top-level value key in {path}: '{key}'. Expected a table like [{key}].");
3005        }
3006    }
3007
3008    Ok(fragment)
3009}
3010
3011/// Loads and converts a markdownlint config file (.json or .yaml) into a SourcedConfigFragment.
3012fn load_from_markdownlint(path: &str) -> Result<SourcedConfigFragment, ConfigError> {
3013    // Use the unified loader from markdownlint_config.rs
3014    let ml_config = crate::markdownlint_config::load_markdownlint_config(path)
3015        .map_err(|e| ConfigError::ParseError(format!("{path}: {e}")))?;
3016    Ok(ml_config.map_to_sourced_rumdl_config_fragment(Some(path)))
3017}