rumdl_lib/
config.rs

1//!
2//! This module defines configuration structures, loading logic, and provenance tracking for rumdl.
3//! Supports TOML, pyproject.toml, and markdownlint config formats, and provides merging and override logic.
4
5use crate::rule::Rule;
6use crate::rules;
7use log;
8use serde::{Deserialize, Serialize};
9use std::collections::BTreeMap;
10use std::collections::{BTreeSet, HashMap, HashSet};
11use std::fmt;
12use std::fs;
13use std::io;
14use std::path::Path;
15use std::str::FromStr;
16use toml_edit::DocumentMut;
17
18/// Markdown flavor/dialect enumeration
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, schemars::JsonSchema)]
20#[serde(rename_all = "lowercase")]
21pub enum MarkdownFlavor {
22    /// Standard Markdown without flavor-specific adjustments
23    #[serde(rename = "standard", alias = "none", alias = "")]
24    #[default]
25    Standard,
26    /// MkDocs flavor with auto-reference support
27    #[serde(rename = "mkdocs")]
28    MkDocs,
29    /// MDX flavor with JSX and ESM support (.mdx files)
30    #[serde(rename = "mdx")]
31    MDX,
32    /// Quarto/RMarkdown flavor for scientific publishing (.qmd, .Rmd files)
33    #[serde(rename = "quarto")]
34    Quarto,
35    // Future flavors can be added here when they have actual implementation differences
36    // Planned: GFM (GitHub Flavored Markdown) - for GitHub-specific features like tables, strikethrough
37    // Planned: CommonMark - for strict CommonMark compliance
38}
39
40impl fmt::Display for MarkdownFlavor {
41    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
42        match self {
43            MarkdownFlavor::Standard => write!(f, "standard"),
44            MarkdownFlavor::MkDocs => write!(f, "mkdocs"),
45            MarkdownFlavor::MDX => write!(f, "mdx"),
46            MarkdownFlavor::Quarto => write!(f, "quarto"),
47        }
48    }
49}
50
51impl FromStr for MarkdownFlavor {
52    type Err = String;
53
54    fn from_str(s: &str) -> Result<Self, Self::Err> {
55        match s.to_lowercase().as_str() {
56            "standard" | "" | "none" => Ok(MarkdownFlavor::Standard),
57            "mkdocs" => Ok(MarkdownFlavor::MkDocs),
58            "mdx" => Ok(MarkdownFlavor::MDX),
59            "quarto" | "qmd" | "rmd" | "rmarkdown" => Ok(MarkdownFlavor::Quarto),
60            // Accept but warn about unimplemented flavors
61            "gfm" | "github" => {
62                eprintln!("Warning: GFM flavor not yet implemented, using standard");
63                Ok(MarkdownFlavor::Standard)
64            }
65            "commonmark" => {
66                eprintln!("Warning: CommonMark flavor not yet implemented, using standard");
67                Ok(MarkdownFlavor::Standard)
68            }
69            _ => Err(format!("Unknown markdown flavor: {s}")),
70        }
71    }
72}
73
74impl MarkdownFlavor {
75    /// Detect flavor from file extension
76    pub fn from_extension(ext: &str) -> Self {
77        match ext.to_lowercase().as_str() {
78            "mdx" => Self::MDX,
79            "qmd" => Self::Quarto,
80            "rmd" => Self::Quarto,
81            _ => Self::Standard,
82        }
83    }
84
85    /// Detect flavor from file path
86    pub fn from_path(path: &std::path::Path) -> Self {
87        path.extension()
88            .and_then(|e| e.to_str())
89            .map(Self::from_extension)
90            .unwrap_or(Self::Standard)
91    }
92
93    /// Check if this flavor supports ESM imports/exports (MDX-specific)
94    pub fn supports_esm_blocks(self) -> bool {
95        matches!(self, Self::MDX)
96    }
97
98    /// Check if this flavor supports JSX components (MDX-specific)
99    pub fn supports_jsx(self) -> bool {
100        matches!(self, Self::MDX)
101    }
102
103    /// Check if this flavor supports auto-references (MkDocs-specific)
104    pub fn supports_auto_references(self) -> bool {
105        matches!(self, Self::MkDocs)
106    }
107
108    /// Get a human-readable name for this flavor
109    pub fn name(self) -> &'static str {
110        match self {
111            Self::Standard => "Standard",
112            Self::MkDocs => "MkDocs",
113            Self::MDX => "MDX",
114            Self::Quarto => "Quarto",
115        }
116    }
117}
118
119/// Normalizes configuration keys (rule names, option names) to lowercase kebab-case.
120pub fn normalize_key(key: &str) -> String {
121    // If the key looks like a rule name (e.g., MD013), uppercase it
122    if key.len() == 5 && key.to_ascii_lowercase().starts_with("md") && key[2..].chars().all(|c| c.is_ascii_digit()) {
123        key.to_ascii_uppercase()
124    } else {
125        key.replace('_', "-").to_ascii_lowercase()
126    }
127}
128
129/// Represents a rule-specific configuration
130#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, schemars::JsonSchema)]
131pub struct RuleConfig {
132    /// Configuration values for the rule
133    #[serde(flatten)]
134    #[schemars(schema_with = "arbitrary_value_schema")]
135    pub values: BTreeMap<String, toml::Value>,
136}
137
138/// Generate a JSON schema for arbitrary configuration values
139fn arbitrary_value_schema(_gen: &mut schemars::r#gen::SchemaGenerator) -> schemars::schema::Schema {
140    use schemars::schema::*;
141    Schema::Object(SchemaObject {
142        instance_type: Some(InstanceType::Object.into()),
143        object: Some(Box::new(ObjectValidation {
144            additional_properties: Some(Box::new(Schema::Bool(true))),
145            ..Default::default()
146        })),
147        ..Default::default()
148    })
149}
150
151/// Represents the complete configuration loaded from rumdl.toml
152#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, schemars::JsonSchema)]
153#[schemars(
154    description = "rumdl configuration for linting Markdown files. Rules can be configured individually using [MD###] sections with rule-specific options."
155)]
156pub struct Config {
157    /// Global configuration options
158    #[serde(default)]
159    pub global: GlobalConfig,
160
161    /// Per-file rule ignores: maps file patterns to lists of rules to ignore
162    /// Example: { "README.md": ["MD033"], "docs/**/*.md": ["MD013"] }
163    #[serde(default, rename = "per-file-ignores")]
164    pub per_file_ignores: HashMap<String, Vec<String>>,
165
166    /// Rule-specific configurations (e.g., MD013, MD007, MD044)
167    /// Each rule section can contain options specific to that rule.
168    ///
169    /// Common examples:
170    /// - MD013: line_length, code_blocks, tables, headings
171    /// - MD007: indent
172    /// - MD003: style ("atx", "atx_closed", "setext")
173    /// - MD044: names (array of proper names to check)
174    ///
175    /// See https://github.com/rvben/rumdl for full rule documentation.
176    #[serde(flatten)]
177    pub rules: BTreeMap<String, RuleConfig>,
178}
179
180impl Config {
181    /// Check if the Markdown flavor is set to MkDocs
182    pub fn is_mkdocs_flavor(&self) -> bool {
183        self.global.flavor == MarkdownFlavor::MkDocs
184    }
185
186    // Future methods for when GFM and CommonMark are implemented:
187    // pub fn is_gfm_flavor(&self) -> bool
188    // pub fn is_commonmark_flavor(&self) -> bool
189
190    /// Get the configured Markdown flavor
191    pub fn markdown_flavor(&self) -> MarkdownFlavor {
192        self.global.flavor
193    }
194
195    /// Legacy method for backwards compatibility - redirects to is_mkdocs_flavor
196    pub fn is_mkdocs_project(&self) -> bool {
197        self.is_mkdocs_flavor()
198    }
199
200    /// Get the set of rules that should be ignored for a specific file based on per-file-ignores configuration
201    /// Returns a HashSet of rule names (uppercase, e.g., "MD033") that match the given file path
202    pub fn get_ignored_rules_for_file(&self, file_path: &Path) -> HashSet<String> {
203        use globset::{Glob, GlobSetBuilder};
204
205        let mut ignored_rules = HashSet::new();
206
207        if self.per_file_ignores.is_empty() {
208            return ignored_rules;
209        }
210
211        // Build a globset for efficient matching
212        let mut builder = GlobSetBuilder::new();
213        let mut pattern_to_rules: Vec<(usize, &Vec<String>)> = Vec::new();
214
215        for (idx, (pattern, rules)) in self.per_file_ignores.iter().enumerate() {
216            if let Ok(glob) = Glob::new(pattern) {
217                builder.add(glob);
218                pattern_to_rules.push((idx, rules));
219            } else {
220                log::warn!("Invalid glob pattern in per-file-ignores: {pattern}");
221            }
222        }
223
224        let globset = match builder.build() {
225            Ok(gs) => gs,
226            Err(e) => {
227                log::error!("Failed to build globset for per-file-ignores: {e}");
228                return ignored_rules;
229            }
230        };
231
232        // Match the file path against all patterns
233        for match_idx in globset.matches(file_path) {
234            if let Some((_, rules)) = pattern_to_rules.get(match_idx) {
235                for rule in rules.iter() {
236                    // Normalize rule names to uppercase (MD033, md033 -> MD033)
237                    ignored_rules.insert(normalize_key(rule));
238                }
239            }
240        }
241
242        ignored_rules
243    }
244}
245
246/// Global configuration options
247#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, schemars::JsonSchema)]
248#[serde(default)]
249pub struct GlobalConfig {
250    /// Enabled rules
251    #[serde(default)]
252    pub enable: Vec<String>,
253
254    /// Disabled rules
255    #[serde(default)]
256    pub disable: Vec<String>,
257
258    /// Files to exclude
259    #[serde(default)]
260    pub exclude: Vec<String>,
261
262    /// Files to include
263    #[serde(default)]
264    pub include: Vec<String>,
265
266    /// Respect .gitignore files when scanning directories
267    #[serde(default = "default_respect_gitignore")]
268    pub respect_gitignore: bool,
269
270    /// Global line length setting (used by MD013 and other rules if not overridden)
271    #[serde(default = "default_line_length")]
272    pub line_length: u64,
273
274    /// Output format for linting results (e.g., "text", "json", "pylint", etc.)
275    #[serde(skip_serializing_if = "Option::is_none")]
276    pub output_format: Option<String>,
277
278    /// Rules that are allowed to be fixed when --fix is used
279    /// If specified, only these rules will be fixed
280    #[serde(default)]
281    pub fixable: Vec<String>,
282
283    /// Rules that should never be fixed, even when --fix is used
284    /// Takes precedence over fixable
285    #[serde(default)]
286    pub unfixable: Vec<String>,
287
288    /// Markdown flavor/dialect to use (mkdocs, gfm, commonmark, etc.)
289    /// When set, adjusts parsing and validation rules for that specific Markdown variant
290    #[serde(default)]
291    pub flavor: MarkdownFlavor,
292
293    /// [DEPRECATED] Whether to enforce exclude patterns for explicitly passed paths.
294    /// This option is deprecated as of v0.0.156 and has no effect.
295    /// Exclude patterns are now always respected, even for explicitly provided files.
296    /// This prevents duplication between rumdl config and tool configs like pre-commit.
297    #[serde(default)]
298    #[deprecated(since = "0.0.156", note = "Exclude patterns are now always respected")]
299    pub force_exclude: bool,
300}
301
302fn default_respect_gitignore() -> bool {
303    true
304}
305
306fn default_line_length() -> u64 {
307    80
308}
309
310// Add the Default impl
311impl Default for GlobalConfig {
312    #[allow(deprecated)]
313    fn default() -> Self {
314        Self {
315            enable: Vec::new(),
316            disable: Vec::new(),
317            exclude: Vec::new(),
318            include: Vec::new(),
319            respect_gitignore: true,
320            line_length: 80,
321            output_format: None,
322            fixable: Vec::new(),
323            unfixable: Vec::new(),
324            flavor: MarkdownFlavor::default(),
325            force_exclude: false,
326        }
327    }
328}
329
330const MARKDOWNLINT_CONFIG_FILES: &[&str] = &[
331    ".markdownlint.json",
332    ".markdownlint.jsonc",
333    ".markdownlint.yaml",
334    ".markdownlint.yml",
335    "markdownlint.json",
336    "markdownlint.jsonc",
337    "markdownlint.yaml",
338    "markdownlint.yml",
339];
340
341/// Create a default configuration file at the specified path
342pub fn create_default_config(path: &str) -> Result<(), ConfigError> {
343    // Check if file already exists
344    if Path::new(path).exists() {
345        return Err(ConfigError::FileExists { path: path.to_string() });
346    }
347
348    // Default configuration content
349    let default_config = r#"# rumdl configuration file
350
351# Global configuration options
352[global]
353# List of rules to disable (uncomment and modify as needed)
354# disable = ["MD013", "MD033"]
355
356# List of rules to enable exclusively (if provided, only these rules will run)
357# enable = ["MD001", "MD003", "MD004"]
358
359# List of file/directory patterns to include for linting (if provided, only these will be linted)
360# include = [
361#    "docs/*.md",
362#    "src/**/*.md",
363#    "README.md"
364# ]
365
366# List of file/directory patterns to exclude from linting
367exclude = [
368    # Common directories to exclude
369    ".git",
370    ".github",
371    "node_modules",
372    "vendor",
373    "dist",
374    "build",
375
376    # Specific files or patterns
377    "CHANGELOG.md",
378    "LICENSE.md",
379]
380
381# Respect .gitignore files when scanning directories (default: true)
382respect-gitignore = true
383
384# Markdown flavor/dialect (uncomment to enable)
385# Options: mkdocs, gfm, commonmark
386# flavor = "mkdocs"
387
388# Rule-specific configurations (uncomment and modify as needed)
389
390# [MD003]
391# style = "atx"  # Heading style (atx, atx_closed, setext)
392
393# [MD004]
394# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
395
396# [MD007]
397# indent = 4  # Unordered list indentation
398
399# [MD013]
400# line-length = 100  # Line length
401# code-blocks = false  # Exclude code blocks from line length check
402# tables = false  # Exclude tables from line length check
403# headings = true  # Include headings in line length check
404
405# [MD044]
406# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
407# code-blocks = false  # Check code blocks for proper names (default: false, skips code blocks)
408"#;
409
410    // Write the default configuration to the file
411    match fs::write(path, default_config) {
412        Ok(_) => Ok(()),
413        Err(err) => Err(ConfigError::IoError {
414            source: err,
415            path: path.to_string(),
416        }),
417    }
418}
419
420/// Errors that can occur when loading configuration
421#[derive(Debug, thiserror::Error)]
422pub enum ConfigError {
423    /// Failed to read the configuration file
424    #[error("Failed to read config file at {path}: {source}")]
425    IoError { source: io::Error, path: String },
426
427    /// Failed to parse the configuration content (TOML or JSON)
428    #[error("Failed to parse config: {0}")]
429    ParseError(String),
430
431    /// Configuration file already exists
432    #[error("Configuration file already exists at {path}")]
433    FileExists { path: String },
434}
435
436/// Get a rule-specific configuration value
437/// Automatically tries both the original key and normalized variants (kebab-case ↔ snake_case)
438/// for better markdownlint compatibility
439pub fn get_rule_config_value<T: serde::de::DeserializeOwned>(config: &Config, rule_name: &str, key: &str) -> Option<T> {
440    let norm_rule_name = rule_name.to_ascii_uppercase(); // Use uppercase for lookup
441
442    let rule_config = config.rules.get(&norm_rule_name)?;
443
444    // Try multiple key variants to support both underscore and kebab-case formats
445    let key_variants = [
446        key.to_string(),       // Original key as provided
447        normalize_key(key),    // Normalized key (lowercase, kebab-case)
448        key.replace('-', "_"), // Convert kebab-case to snake_case
449        key.replace('_', "-"), // Convert snake_case to kebab-case
450    ];
451
452    // Try each variant until we find a match
453    for variant in &key_variants {
454        if let Some(value) = rule_config.values.get(variant)
455            && let Ok(result) = T::deserialize(value.clone())
456        {
457            return Some(result);
458        }
459    }
460
461    None
462}
463
464/// Generate default rumdl configuration for pyproject.toml
465pub fn generate_pyproject_config() -> String {
466    let config_content = r#"
467[tool.rumdl]
468# Global configuration options
469line-length = 100
470disable = []
471exclude = [
472    # Common directories to exclude
473    ".git",
474    ".github",
475    "node_modules",
476    "vendor",
477    "dist",
478    "build",
479]
480respect-gitignore = true
481
482# Rule-specific configurations (uncomment and modify as needed)
483
484# [tool.rumdl.MD003]
485# style = "atx"  # Heading style (atx, atx_closed, setext)
486
487# [tool.rumdl.MD004]
488# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
489
490# [tool.rumdl.MD007]
491# indent = 4  # Unordered list indentation
492
493# [tool.rumdl.MD013]
494# line-length = 100  # Line length
495# code-blocks = false  # Exclude code blocks from line length check
496# tables = false  # Exclude tables from line length check
497# headings = true  # Include headings in line length check
498
499# [tool.rumdl.MD044]
500# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
501# code-blocks = false  # Check code blocks for proper names (default: false, skips code blocks)
502"#;
503
504    config_content.to_string()
505}
506
507#[cfg(test)]
508mod tests {
509    use super::*;
510    use std::fs;
511    use tempfile::tempdir;
512
513    #[test]
514    fn test_flavor_loading() {
515        let temp_dir = tempdir().unwrap();
516        let config_path = temp_dir.path().join(".rumdl.toml");
517        let config_content = r#"
518[global]
519flavor = "mkdocs"
520disable = ["MD001"]
521"#;
522        fs::write(&config_path, config_content).unwrap();
523
524        // Load the config
525        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
526        let config: Config = sourced.into();
527
528        // Check that flavor was loaded
529        assert_eq!(config.global.flavor, MarkdownFlavor::MkDocs);
530        assert!(config.is_mkdocs_flavor());
531        assert!(config.is_mkdocs_project()); // Test backwards compatibility
532        assert_eq!(config.global.disable, vec!["MD001".to_string()]);
533    }
534
535    #[test]
536    fn test_pyproject_toml_root_level_config() {
537        let temp_dir = tempdir().unwrap();
538        let config_path = temp_dir.path().join("pyproject.toml");
539
540        // Create a test pyproject.toml with root-level configuration
541        let content = r#"
542[tool.rumdl]
543line-length = 120
544disable = ["MD033"]
545enable = ["MD001", "MD004"]
546include = ["docs/*.md"]
547exclude = ["node_modules"]
548respect-gitignore = true
549        "#;
550
551        fs::write(&config_path, content).unwrap();
552
553        // Load the config with skip_auto_discovery to avoid environment config files
554        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
555        let config: Config = sourced.into(); // Convert to plain config for assertions
556
557        // Check global settings
558        assert_eq!(config.global.disable, vec!["MD033".to_string()]);
559        assert_eq!(config.global.enable, vec!["MD001".to_string(), "MD004".to_string()]);
560        // Should now contain only the configured pattern since auto-discovery is disabled
561        assert_eq!(config.global.include, vec!["docs/*.md".to_string()]);
562        assert_eq!(config.global.exclude, vec!["node_modules".to_string()]);
563        assert!(config.global.respect_gitignore);
564
565        // Check line-length was correctly added to MD013
566        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
567        assert_eq!(line_length, Some(120));
568    }
569
570    #[test]
571    fn test_pyproject_toml_snake_case_and_kebab_case() {
572        let temp_dir = tempdir().unwrap();
573        let config_path = temp_dir.path().join("pyproject.toml");
574
575        // Test with both kebab-case and snake_case variants
576        let content = r#"
577[tool.rumdl]
578line-length = 150
579respect_gitignore = true
580        "#;
581
582        fs::write(&config_path, content).unwrap();
583
584        // Load the config with skip_auto_discovery to avoid environment config files
585        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
586        let config: Config = sourced.into(); // Convert to plain config for assertions
587
588        // Check settings were correctly loaded
589        assert!(config.global.respect_gitignore);
590        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
591        assert_eq!(line_length, Some(150));
592    }
593
594    #[test]
595    fn test_md013_key_normalization_in_rumdl_toml() {
596        let temp_dir = tempdir().unwrap();
597        let config_path = temp_dir.path().join(".rumdl.toml");
598        let config_content = r#"
599[MD013]
600line_length = 111
601line-length = 222
602"#;
603        fs::write(&config_path, config_content).unwrap();
604        // Load the config with skip_auto_discovery to avoid environment config files
605        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
606        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
607        // Now we should only get the explicitly configured key
608        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
609        assert_eq!(keys, vec!["line-length"]);
610        let val = &rule_cfg.values["line-length"].value;
611        assert_eq!(val.as_integer(), Some(222));
612        // get_rule_config_value should retrieve the value for both snake_case and kebab-case
613        let config: Config = sourced.clone().into();
614        let v1 = get_rule_config_value::<usize>(&config, "MD013", "line_length");
615        let v2 = get_rule_config_value::<usize>(&config, "MD013", "line-length");
616        assert_eq!(v1, Some(222));
617        assert_eq!(v2, Some(222));
618    }
619
620    #[test]
621    fn test_md013_section_case_insensitivity() {
622        let temp_dir = tempdir().unwrap();
623        let config_path = temp_dir.path().join(".rumdl.toml");
624        let config_content = r#"
625[md013]
626line-length = 101
627
628[Md013]
629line-length = 102
630
631[MD013]
632line-length = 103
633"#;
634        fs::write(&config_path, config_content).unwrap();
635        // Load the config with skip_auto_discovery to avoid environment config files
636        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
637        let config: Config = sourced.clone().into();
638        // Only the last section should win, and be present
639        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
640        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
641        assert_eq!(keys, vec!["line-length"]);
642        let val = &rule_cfg.values["line-length"].value;
643        assert_eq!(val.as_integer(), Some(103));
644        let v = get_rule_config_value::<usize>(&config, "MD013", "line-length");
645        assert_eq!(v, Some(103));
646    }
647
648    #[test]
649    fn test_md013_key_snake_and_kebab_case() {
650        let temp_dir = tempdir().unwrap();
651        let config_path = temp_dir.path().join(".rumdl.toml");
652        let config_content = r#"
653[MD013]
654line_length = 201
655line-length = 202
656"#;
657        fs::write(&config_path, config_content).unwrap();
658        // Load the config with skip_auto_discovery to avoid environment config files
659        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
660        let config: Config = sourced.clone().into();
661        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
662        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
663        assert_eq!(keys, vec!["line-length"]);
664        let val = &rule_cfg.values["line-length"].value;
665        assert_eq!(val.as_integer(), Some(202));
666        let v1 = get_rule_config_value::<usize>(&config, "MD013", "line_length");
667        let v2 = get_rule_config_value::<usize>(&config, "MD013", "line-length");
668        assert_eq!(v1, Some(202));
669        assert_eq!(v2, Some(202));
670    }
671
672    #[test]
673    fn test_unknown_rule_section_is_ignored() {
674        let temp_dir = tempdir().unwrap();
675        let config_path = temp_dir.path().join(".rumdl.toml");
676        let config_content = r#"
677[MD999]
678foo = 1
679bar = 2
680[MD013]
681line-length = 303
682"#;
683        fs::write(&config_path, config_content).unwrap();
684        // Load the config with skip_auto_discovery to avoid environment config files
685        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
686        let config: Config = sourced.clone().into();
687        // MD999 should not be present
688        assert!(!sourced.rules.contains_key("MD999"));
689        // MD013 should be present and correct
690        let v = get_rule_config_value::<usize>(&config, "MD013", "line-length");
691        assert_eq!(v, Some(303));
692    }
693
694    #[test]
695    fn test_invalid_toml_syntax() {
696        let temp_dir = tempdir().unwrap();
697        let config_path = temp_dir.path().join(".rumdl.toml");
698
699        // Invalid TOML with unclosed string
700        let config_content = r#"
701[MD013]
702line-length = "unclosed string
703"#;
704        fs::write(&config_path, config_content).unwrap();
705
706        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
707        assert!(result.is_err());
708        match result.unwrap_err() {
709            ConfigError::ParseError(msg) => {
710                // The actual error message from toml parser might vary
711                assert!(msg.contains("expected") || msg.contains("invalid") || msg.contains("unterminated"));
712            }
713            _ => panic!("Expected ParseError"),
714        }
715    }
716
717    #[test]
718    fn test_wrong_type_for_config_value() {
719        let temp_dir = tempdir().unwrap();
720        let config_path = temp_dir.path().join(".rumdl.toml");
721
722        // line-length should be a number, not a string
723        let config_content = r#"
724[MD013]
725line-length = "not a number"
726"#;
727        fs::write(&config_path, config_content).unwrap();
728
729        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
730        let config: Config = sourced.into();
731
732        // The value should be loaded as a string, not converted
733        let rule_config = config.rules.get("MD013").unwrap();
734        let value = rule_config.values.get("line-length").unwrap();
735        assert!(matches!(value, toml::Value::String(_)));
736    }
737
738    #[test]
739    fn test_empty_config_file() {
740        let temp_dir = tempdir().unwrap();
741        let config_path = temp_dir.path().join(".rumdl.toml");
742
743        // Empty file
744        fs::write(&config_path, "").unwrap();
745
746        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
747        let config: Config = sourced.into();
748
749        // Should have default values
750        assert_eq!(config.global.line_length, 80);
751        assert!(config.global.respect_gitignore);
752        assert!(config.rules.is_empty());
753    }
754
755    #[test]
756    fn test_malformed_pyproject_toml() {
757        let temp_dir = tempdir().unwrap();
758        let config_path = temp_dir.path().join("pyproject.toml");
759
760        // Missing closing bracket
761        let content = r#"
762[tool.rumdl
763line-length = 120
764"#;
765        fs::write(&config_path, content).unwrap();
766
767        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
768        assert!(result.is_err());
769    }
770
771    #[test]
772    fn test_conflicting_config_values() {
773        let temp_dir = tempdir().unwrap();
774        let config_path = temp_dir.path().join(".rumdl.toml");
775
776        // Both enable and disable the same rule - these need to be in a global section
777        let config_content = r#"
778[global]
779enable = ["MD013"]
780disable = ["MD013"]
781"#;
782        fs::write(&config_path, config_content).unwrap();
783
784        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
785        let config: Config = sourced.into();
786
787        // Both should be present - resolution happens at runtime
788        assert!(config.global.enable.contains(&"MD013".to_string()));
789        assert!(config.global.disable.contains(&"MD013".to_string()));
790    }
791
792    #[test]
793    fn test_invalid_rule_names() {
794        let temp_dir = tempdir().unwrap();
795        let config_path = temp_dir.path().join(".rumdl.toml");
796
797        let config_content = r#"
798[global]
799enable = ["MD001", "NOT_A_RULE", "md002", "12345"]
800disable = ["MD-001", "MD_002"]
801"#;
802        fs::write(&config_path, config_content).unwrap();
803
804        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
805        let config: Config = sourced.into();
806
807        // All values should be preserved as-is
808        assert_eq!(config.global.enable.len(), 4);
809        assert_eq!(config.global.disable.len(), 2);
810    }
811
812    #[test]
813    fn test_deeply_nested_config() {
814        let temp_dir = tempdir().unwrap();
815        let config_path = temp_dir.path().join(".rumdl.toml");
816
817        // This should be ignored as we don't support nested tables within rule configs
818        let config_content = r#"
819[MD013]
820line-length = 100
821[MD013.nested]
822value = 42
823"#;
824        fs::write(&config_path, config_content).unwrap();
825
826        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
827        let config: Config = sourced.into();
828
829        let rule_config = config.rules.get("MD013").unwrap();
830        assert_eq!(
831            rule_config.values.get("line-length").unwrap(),
832            &toml::Value::Integer(100)
833        );
834        // Nested table should not be present
835        assert!(!rule_config.values.contains_key("nested"));
836    }
837
838    #[test]
839    fn test_unicode_in_config() {
840        let temp_dir = tempdir().unwrap();
841        let config_path = temp_dir.path().join(".rumdl.toml");
842
843        let config_content = r#"
844[global]
845include = ["文档/*.md", "ドキュメント/*.md"]
846exclude = ["测试/*", "🚀/*"]
847
848[MD013]
849line-length = 80
850message = "行太长了 🚨"
851"#;
852        fs::write(&config_path, config_content).unwrap();
853
854        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
855        let config: Config = sourced.into();
856
857        assert_eq!(config.global.include.len(), 2);
858        assert_eq!(config.global.exclude.len(), 2);
859        assert!(config.global.include[0].contains("文档"));
860        assert!(config.global.exclude[1].contains("🚀"));
861
862        let rule_config = config.rules.get("MD013").unwrap();
863        let message = rule_config.values.get("message").unwrap();
864        if let toml::Value::String(s) = message {
865            assert!(s.contains("行太长了"));
866            assert!(s.contains("🚨"));
867        }
868    }
869
870    #[test]
871    fn test_extremely_long_values() {
872        let temp_dir = tempdir().unwrap();
873        let config_path = temp_dir.path().join(".rumdl.toml");
874
875        let long_string = "a".repeat(10000);
876        let config_content = format!(
877            r#"
878[global]
879exclude = ["{long_string}"]
880
881[MD013]
882line-length = 999999999
883"#
884        );
885
886        fs::write(&config_path, config_content).unwrap();
887
888        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
889        let config: Config = sourced.into();
890
891        assert_eq!(config.global.exclude[0].len(), 10000);
892        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
893        assert_eq!(line_length, Some(999999999));
894    }
895
896    #[test]
897    fn test_config_with_comments() {
898        let temp_dir = tempdir().unwrap();
899        let config_path = temp_dir.path().join(".rumdl.toml");
900
901        let config_content = r#"
902[global]
903# This is a comment
904enable = ["MD001"] # Enable MD001
905# disable = ["MD002"] # This is commented out
906
907[MD013] # Line length rule
908line-length = 100 # Set to 100 characters
909# ignored = true # This setting is commented out
910"#;
911        fs::write(&config_path, config_content).unwrap();
912
913        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
914        let config: Config = sourced.into();
915
916        assert_eq!(config.global.enable, vec!["MD001"]);
917        assert!(config.global.disable.is_empty()); // Commented out
918
919        let rule_config = config.rules.get("MD013").unwrap();
920        assert_eq!(rule_config.values.len(), 1); // Only line-length
921        assert!(!rule_config.values.contains_key("ignored"));
922    }
923
924    #[test]
925    fn test_arrays_in_rule_config() {
926        let temp_dir = tempdir().unwrap();
927        let config_path = temp_dir.path().join(".rumdl.toml");
928
929        let config_content = r#"
930[MD003]
931levels = [1, 2, 3]
932tags = ["important", "critical"]
933mixed = [1, "two", true]
934"#;
935        fs::write(&config_path, config_content).unwrap();
936
937        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
938        let config: Config = sourced.into();
939
940        // Arrays should now be properly parsed
941        let rule_config = config.rules.get("MD003").expect("MD003 config should exist");
942
943        // Check that arrays are present and correctly parsed
944        assert!(rule_config.values.contains_key("levels"));
945        assert!(rule_config.values.contains_key("tags"));
946        assert!(rule_config.values.contains_key("mixed"));
947
948        // Verify array contents
949        if let Some(toml::Value::Array(levels)) = rule_config.values.get("levels") {
950            assert_eq!(levels.len(), 3);
951            assert_eq!(levels[0], toml::Value::Integer(1));
952            assert_eq!(levels[1], toml::Value::Integer(2));
953            assert_eq!(levels[2], toml::Value::Integer(3));
954        } else {
955            panic!("levels should be an array");
956        }
957
958        if let Some(toml::Value::Array(tags)) = rule_config.values.get("tags") {
959            assert_eq!(tags.len(), 2);
960            assert_eq!(tags[0], toml::Value::String("important".to_string()));
961            assert_eq!(tags[1], toml::Value::String("critical".to_string()));
962        } else {
963            panic!("tags should be an array");
964        }
965
966        if let Some(toml::Value::Array(mixed)) = rule_config.values.get("mixed") {
967            assert_eq!(mixed.len(), 3);
968            assert_eq!(mixed[0], toml::Value::Integer(1));
969            assert_eq!(mixed[1], toml::Value::String("two".to_string()));
970            assert_eq!(mixed[2], toml::Value::Boolean(true));
971        } else {
972            panic!("mixed should be an array");
973        }
974    }
975
976    #[test]
977    fn test_normalize_key_edge_cases() {
978        // Rule names
979        assert_eq!(normalize_key("MD001"), "MD001");
980        assert_eq!(normalize_key("md001"), "MD001");
981        assert_eq!(normalize_key("Md001"), "MD001");
982        assert_eq!(normalize_key("mD001"), "MD001");
983
984        // Non-rule names
985        assert_eq!(normalize_key("line_length"), "line-length");
986        assert_eq!(normalize_key("line-length"), "line-length");
987        assert_eq!(normalize_key("LINE_LENGTH"), "line-length");
988        assert_eq!(normalize_key("respect_gitignore"), "respect-gitignore");
989
990        // Edge cases
991        assert_eq!(normalize_key("MD"), "md"); // Too short to be a rule
992        assert_eq!(normalize_key("MD00"), "md00"); // Too short
993        assert_eq!(normalize_key("MD0001"), "md0001"); // Too long
994        assert_eq!(normalize_key("MDabc"), "mdabc"); // Non-digit
995        assert_eq!(normalize_key("MD00a"), "md00a"); // Partial digit
996        assert_eq!(normalize_key(""), "");
997        assert_eq!(normalize_key("_"), "-");
998        assert_eq!(normalize_key("___"), "---");
999    }
1000
1001    #[test]
1002    fn test_missing_config_file() {
1003        let temp_dir = tempdir().unwrap();
1004        let config_path = temp_dir.path().join("nonexistent.toml");
1005
1006        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
1007        assert!(result.is_err());
1008        match result.unwrap_err() {
1009            ConfigError::IoError { .. } => {}
1010            _ => panic!("Expected IoError for missing file"),
1011        }
1012    }
1013
1014    #[test]
1015    #[cfg(unix)]
1016    fn test_permission_denied_config() {
1017        use std::os::unix::fs::PermissionsExt;
1018
1019        let temp_dir = tempdir().unwrap();
1020        let config_path = temp_dir.path().join(".rumdl.toml");
1021
1022        fs::write(&config_path, "enable = [\"MD001\"]").unwrap();
1023
1024        // Remove read permissions
1025        let mut perms = fs::metadata(&config_path).unwrap().permissions();
1026        perms.set_mode(0o000);
1027        fs::set_permissions(&config_path, perms).unwrap();
1028
1029        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
1030
1031        // Restore permissions for cleanup
1032        let mut perms = fs::metadata(&config_path).unwrap().permissions();
1033        perms.set_mode(0o644);
1034        fs::set_permissions(&config_path, perms).unwrap();
1035
1036        assert!(result.is_err());
1037        match result.unwrap_err() {
1038            ConfigError::IoError { .. } => {}
1039            _ => panic!("Expected IoError for permission denied"),
1040        }
1041    }
1042
1043    #[test]
1044    fn test_circular_reference_detection() {
1045        // This test is more conceptual since TOML doesn't support circular references
1046        // But we test that deeply nested structures don't cause stack overflow
1047        let temp_dir = tempdir().unwrap();
1048        let config_path = temp_dir.path().join(".rumdl.toml");
1049
1050        let mut config_content = String::from("[MD001]\n");
1051        for i in 0..100 {
1052            config_content.push_str(&format!("key{i} = {i}\n"));
1053        }
1054
1055        fs::write(&config_path, config_content).unwrap();
1056
1057        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1058        let config: Config = sourced.into();
1059
1060        let rule_config = config.rules.get("MD001").unwrap();
1061        assert_eq!(rule_config.values.len(), 100);
1062    }
1063
1064    #[test]
1065    fn test_special_toml_values() {
1066        let temp_dir = tempdir().unwrap();
1067        let config_path = temp_dir.path().join(".rumdl.toml");
1068
1069        let config_content = r#"
1070[MD001]
1071infinity = inf
1072neg_infinity = -inf
1073not_a_number = nan
1074datetime = 1979-05-27T07:32:00Z
1075local_date = 1979-05-27
1076local_time = 07:32:00
1077"#;
1078        fs::write(&config_path, config_content).unwrap();
1079
1080        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1081        let config: Config = sourced.into();
1082
1083        // Some values might not be parsed due to parser limitations
1084        if let Some(rule_config) = config.rules.get("MD001") {
1085            // Check special float values if present
1086            if let Some(toml::Value::Float(f)) = rule_config.values.get("infinity") {
1087                assert!(f.is_infinite() && f.is_sign_positive());
1088            }
1089            if let Some(toml::Value::Float(f)) = rule_config.values.get("neg_infinity") {
1090                assert!(f.is_infinite() && f.is_sign_negative());
1091            }
1092            if let Some(toml::Value::Float(f)) = rule_config.values.get("not_a_number") {
1093                assert!(f.is_nan());
1094            }
1095
1096            // Check datetime values if present
1097            if let Some(val) = rule_config.values.get("datetime") {
1098                assert!(matches!(val, toml::Value::Datetime(_)));
1099            }
1100            // Note: local_date and local_time might not be parsed by the current implementation
1101        }
1102    }
1103
1104    #[test]
1105    fn test_default_config_passes_validation() {
1106        use crate::rules;
1107
1108        let temp_dir = tempdir().unwrap();
1109        let config_path = temp_dir.path().join(".rumdl.toml");
1110        let config_path_str = config_path.to_str().unwrap();
1111
1112        // Create the default config using the same function that `rumdl init` uses
1113        create_default_config(config_path_str).unwrap();
1114
1115        // Load it back as a SourcedConfig
1116        let sourced =
1117            SourcedConfig::load(Some(config_path_str), None).expect("Default config should load successfully");
1118
1119        // Create the rule registry
1120        let all_rules = rules::all_rules(&Config::default());
1121        let registry = RuleRegistry::from_rules(&all_rules);
1122
1123        // Validate the config
1124        let warnings = validate_config_sourced(&sourced, &registry);
1125
1126        // The default config should have no warnings
1127        if !warnings.is_empty() {
1128            for warning in &warnings {
1129                eprintln!("Config validation warning: {}", warning.message);
1130                if let Some(rule) = &warning.rule {
1131                    eprintln!("  Rule: {rule}");
1132                }
1133                if let Some(key) = &warning.key {
1134                    eprintln!("  Key: {key}");
1135                }
1136            }
1137        }
1138        assert!(
1139            warnings.is_empty(),
1140            "Default config from rumdl init should pass validation without warnings"
1141        );
1142    }
1143
1144    #[test]
1145    fn test_per_file_ignores_config_parsing() {
1146        let temp_dir = tempdir().unwrap();
1147        let config_path = temp_dir.path().join(".rumdl.toml");
1148        let config_content = r#"
1149[per-file-ignores]
1150"README.md" = ["MD033"]
1151"docs/**/*.md" = ["MD013", "MD033"]
1152"test/*.md" = ["MD041"]
1153"#;
1154        fs::write(&config_path, config_content).unwrap();
1155
1156        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1157        let config: Config = sourced.into();
1158
1159        // Verify per-file-ignores was loaded
1160        assert_eq!(config.per_file_ignores.len(), 3);
1161        assert_eq!(
1162            config.per_file_ignores.get("README.md"),
1163            Some(&vec!["MD033".to_string()])
1164        );
1165        assert_eq!(
1166            config.per_file_ignores.get("docs/**/*.md"),
1167            Some(&vec!["MD013".to_string(), "MD033".to_string()])
1168        );
1169        assert_eq!(
1170            config.per_file_ignores.get("test/*.md"),
1171            Some(&vec!["MD041".to_string()])
1172        );
1173    }
1174
1175    #[test]
1176    fn test_per_file_ignores_glob_matching() {
1177        use std::path::PathBuf;
1178
1179        let temp_dir = tempdir().unwrap();
1180        let config_path = temp_dir.path().join(".rumdl.toml");
1181        let config_content = r#"
1182[per-file-ignores]
1183"README.md" = ["MD033"]
1184"docs/**/*.md" = ["MD013"]
1185"**/test_*.md" = ["MD041"]
1186"#;
1187        fs::write(&config_path, config_content).unwrap();
1188
1189        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1190        let config: Config = sourced.into();
1191
1192        // Test exact match
1193        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1194        assert!(ignored.contains("MD033"));
1195        assert_eq!(ignored.len(), 1);
1196
1197        // Test glob pattern matching
1198        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("docs/api/overview.md"));
1199        assert!(ignored.contains("MD013"));
1200        assert_eq!(ignored.len(), 1);
1201
1202        // Test recursive glob pattern
1203        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("tests/fixtures/test_example.md"));
1204        assert!(ignored.contains("MD041"));
1205        assert_eq!(ignored.len(), 1);
1206
1207        // Test non-matching path
1208        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("other/file.md"));
1209        assert!(ignored.is_empty());
1210    }
1211
1212    #[test]
1213    fn test_per_file_ignores_pyproject_toml() {
1214        let temp_dir = tempdir().unwrap();
1215        let config_path = temp_dir.path().join("pyproject.toml");
1216        let config_content = r#"
1217[tool.rumdl]
1218[tool.rumdl.per-file-ignores]
1219"README.md" = ["MD033", "MD013"]
1220"generated/*.md" = ["MD041"]
1221"#;
1222        fs::write(&config_path, config_content).unwrap();
1223
1224        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1225        let config: Config = sourced.into();
1226
1227        // Verify per-file-ignores was loaded from pyproject.toml
1228        assert_eq!(config.per_file_ignores.len(), 2);
1229        assert_eq!(
1230            config.per_file_ignores.get("README.md"),
1231            Some(&vec!["MD033".to_string(), "MD013".to_string()])
1232        );
1233        assert_eq!(
1234            config.per_file_ignores.get("generated/*.md"),
1235            Some(&vec!["MD041".to_string()])
1236        );
1237    }
1238
1239    #[test]
1240    fn test_per_file_ignores_multiple_patterns_match() {
1241        use std::path::PathBuf;
1242
1243        let temp_dir = tempdir().unwrap();
1244        let config_path = temp_dir.path().join(".rumdl.toml");
1245        let config_content = r#"
1246[per-file-ignores]
1247"docs/**/*.md" = ["MD013"]
1248"**/api/*.md" = ["MD033"]
1249"docs/api/overview.md" = ["MD041"]
1250"#;
1251        fs::write(&config_path, config_content).unwrap();
1252
1253        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1254        let config: Config = sourced.into();
1255
1256        // File matches multiple patterns - should get union of all rules
1257        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("docs/api/overview.md"));
1258        assert_eq!(ignored.len(), 3);
1259        assert!(ignored.contains("MD013"));
1260        assert!(ignored.contains("MD033"));
1261        assert!(ignored.contains("MD041"));
1262    }
1263
1264    #[test]
1265    fn test_per_file_ignores_rule_name_normalization() {
1266        use std::path::PathBuf;
1267
1268        let temp_dir = tempdir().unwrap();
1269        let config_path = temp_dir.path().join(".rumdl.toml");
1270        let config_content = r#"
1271[per-file-ignores]
1272"README.md" = ["md033", "MD013", "Md041"]
1273"#;
1274        fs::write(&config_path, config_content).unwrap();
1275
1276        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1277        let config: Config = sourced.into();
1278
1279        // All rule names should be normalized to uppercase
1280        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1281        assert_eq!(ignored.len(), 3);
1282        assert!(ignored.contains("MD033"));
1283        assert!(ignored.contains("MD013"));
1284        assert!(ignored.contains("MD041"));
1285    }
1286
1287    #[test]
1288    fn test_per_file_ignores_invalid_glob_pattern() {
1289        use std::path::PathBuf;
1290
1291        let temp_dir = tempdir().unwrap();
1292        let config_path = temp_dir.path().join(".rumdl.toml");
1293        let config_content = r#"
1294[per-file-ignores]
1295"[invalid" = ["MD033"]
1296"valid/*.md" = ["MD013"]
1297"#;
1298        fs::write(&config_path, config_content).unwrap();
1299
1300        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1301        let config: Config = sourced.into();
1302
1303        // Invalid pattern should be skipped, valid pattern should work
1304        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("valid/test.md"));
1305        assert!(ignored.contains("MD013"));
1306
1307        // Invalid pattern should not cause issues
1308        let ignored2 = config.get_ignored_rules_for_file(&PathBuf::from("[invalid"));
1309        assert!(ignored2.is_empty());
1310    }
1311
1312    #[test]
1313    fn test_per_file_ignores_empty_section() {
1314        use std::path::PathBuf;
1315
1316        let temp_dir = tempdir().unwrap();
1317        let config_path = temp_dir.path().join(".rumdl.toml");
1318        let config_content = r#"
1319[global]
1320disable = ["MD001"]
1321
1322[per-file-ignores]
1323"#;
1324        fs::write(&config_path, config_content).unwrap();
1325
1326        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1327        let config: Config = sourced.into();
1328
1329        // Empty per-file-ignores should work fine
1330        assert_eq!(config.per_file_ignores.len(), 0);
1331        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1332        assert!(ignored.is_empty());
1333    }
1334
1335    #[test]
1336    fn test_per_file_ignores_with_underscores_in_pyproject() {
1337        let temp_dir = tempdir().unwrap();
1338        let config_path = temp_dir.path().join("pyproject.toml");
1339        let config_content = r#"
1340[tool.rumdl]
1341[tool.rumdl.per_file_ignores]
1342"README.md" = ["MD033"]
1343"#;
1344        fs::write(&config_path, config_content).unwrap();
1345
1346        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1347        let config: Config = sourced.into();
1348
1349        // Should support both per-file-ignores and per_file_ignores
1350        assert_eq!(config.per_file_ignores.len(), 1);
1351        assert_eq!(
1352            config.per_file_ignores.get("README.md"),
1353            Some(&vec!["MD033".to_string()])
1354        );
1355    }
1356
1357    #[test]
1358    fn test_generate_json_schema() {
1359        use schemars::schema_for;
1360        use std::env;
1361
1362        let schema = schema_for!(Config);
1363        let schema_json = serde_json::to_string_pretty(&schema).expect("Failed to serialize schema");
1364
1365        // Write schema to file if RUMDL_UPDATE_SCHEMA env var is set
1366        if env::var("RUMDL_UPDATE_SCHEMA").is_ok() {
1367            let schema_path = env::current_dir().unwrap().join("rumdl.schema.json");
1368            fs::write(&schema_path, &schema_json).expect("Failed to write schema file");
1369            println!("Schema written to: {}", schema_path.display());
1370        }
1371
1372        // Basic validation that schema was generated
1373        assert!(schema_json.contains("\"title\": \"Config\""));
1374        assert!(schema_json.contains("\"global\""));
1375        assert!(schema_json.contains("\"per-file-ignores\""));
1376    }
1377}
1378
1379#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1380pub enum ConfigSource {
1381    Default,
1382    RumdlToml,
1383    PyprojectToml,
1384    Cli,
1385    /// Value was loaded from a markdownlint config file (e.g. .markdownlint.json, .markdownlint.yaml)
1386    Markdownlint,
1387}
1388
1389#[derive(Debug, Clone)]
1390pub struct ConfigOverride<T> {
1391    pub value: T,
1392    pub source: ConfigSource,
1393    pub file: Option<String>,
1394    pub line: Option<usize>,
1395}
1396
1397#[derive(Debug, Clone)]
1398pub struct SourcedValue<T> {
1399    pub value: T,
1400    pub source: ConfigSource,
1401    pub overrides: Vec<ConfigOverride<T>>,
1402}
1403
1404impl<T: Clone> SourcedValue<T> {
1405    pub fn new(value: T, source: ConfigSource) -> Self {
1406        Self {
1407            value: value.clone(),
1408            source,
1409            overrides: vec![ConfigOverride {
1410                value,
1411                source,
1412                file: None,
1413                line: None,
1414            }],
1415        }
1416    }
1417
1418    /// Merges a new override into this SourcedValue based on source precedence.
1419    /// If the new source has higher or equal precedence, the value and source are updated,
1420    /// and the new override is added to the history.
1421    pub fn merge_override(
1422        &mut self,
1423        new_value: T,
1424        new_source: ConfigSource,
1425        new_file: Option<String>,
1426        new_line: Option<usize>,
1427    ) {
1428        // Helper function to get precedence, defined locally or globally
1429        fn source_precedence(src: ConfigSource) -> u8 {
1430            match src {
1431                ConfigSource::Default => 0,
1432                ConfigSource::PyprojectToml => 1,
1433                ConfigSource::Markdownlint => 2,
1434                ConfigSource::RumdlToml => 3,
1435                ConfigSource::Cli => 4,
1436            }
1437        }
1438
1439        if source_precedence(new_source) >= source_precedence(self.source) {
1440            self.value = new_value.clone();
1441            self.source = new_source;
1442            self.overrides.push(ConfigOverride {
1443                value: new_value,
1444                source: new_source,
1445                file: new_file,
1446                line: new_line,
1447            });
1448        }
1449    }
1450
1451    pub fn push_override(&mut self, value: T, source: ConfigSource, file: Option<String>, line: Option<usize>) {
1452        // This is essentially merge_override without the precedence check
1453        // We might consolidate these later, but keep separate for now during refactor
1454        self.value = value.clone();
1455        self.source = source;
1456        self.overrides.push(ConfigOverride {
1457            value,
1458            source,
1459            file,
1460            line,
1461        });
1462    }
1463}
1464
1465#[derive(Debug, Clone)]
1466pub struct SourcedGlobalConfig {
1467    pub enable: SourcedValue<Vec<String>>,
1468    pub disable: SourcedValue<Vec<String>>,
1469    pub exclude: SourcedValue<Vec<String>>,
1470    pub include: SourcedValue<Vec<String>>,
1471    pub respect_gitignore: SourcedValue<bool>,
1472    pub line_length: SourcedValue<u64>,
1473    pub output_format: Option<SourcedValue<String>>,
1474    pub fixable: SourcedValue<Vec<String>>,
1475    pub unfixable: SourcedValue<Vec<String>>,
1476    pub flavor: SourcedValue<MarkdownFlavor>,
1477    pub force_exclude: SourcedValue<bool>,
1478}
1479
1480impl Default for SourcedGlobalConfig {
1481    fn default() -> Self {
1482        SourcedGlobalConfig {
1483            enable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1484            disable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1485            exclude: SourcedValue::new(Vec::new(), ConfigSource::Default),
1486            include: SourcedValue::new(Vec::new(), ConfigSource::Default),
1487            respect_gitignore: SourcedValue::new(true, ConfigSource::Default),
1488            line_length: SourcedValue::new(80, ConfigSource::Default),
1489            output_format: None,
1490            fixable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1491            unfixable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1492            flavor: SourcedValue::new(MarkdownFlavor::default(), ConfigSource::Default),
1493            force_exclude: SourcedValue::new(false, ConfigSource::Default),
1494        }
1495    }
1496}
1497
1498#[derive(Debug, Default, Clone)]
1499pub struct SourcedRuleConfig {
1500    pub values: BTreeMap<String, SourcedValue<toml::Value>>,
1501}
1502
1503/// Represents configuration loaded from a single source file, with provenance.
1504/// Used as an intermediate step before merging into the final SourcedConfig.
1505#[derive(Debug, Clone)]
1506pub struct SourcedConfigFragment {
1507    pub global: SourcedGlobalConfig,
1508    pub per_file_ignores: SourcedValue<HashMap<String, Vec<String>>>,
1509    pub rules: BTreeMap<String, SourcedRuleConfig>,
1510    pub unknown_keys: Vec<(String, String, Option<String>)>, // (section, key, file_path)
1511                                                             // Note: loaded_files is tracked globally in SourcedConfig.
1512}
1513
1514impl Default for SourcedConfigFragment {
1515    fn default() -> Self {
1516        Self {
1517            global: SourcedGlobalConfig::default(),
1518            per_file_ignores: SourcedValue::new(HashMap::new(), ConfigSource::Default),
1519            rules: BTreeMap::new(),
1520            unknown_keys: Vec::new(),
1521        }
1522    }
1523}
1524
1525#[derive(Debug, Clone)]
1526pub struct SourcedConfig {
1527    pub global: SourcedGlobalConfig,
1528    pub per_file_ignores: SourcedValue<HashMap<String, Vec<String>>>,
1529    pub rules: BTreeMap<String, SourcedRuleConfig>,
1530    pub loaded_files: Vec<String>,
1531    pub unknown_keys: Vec<(String, String, Option<String>)>, // (section, key, file_path)
1532}
1533
1534impl Default for SourcedConfig {
1535    fn default() -> Self {
1536        Self {
1537            global: SourcedGlobalConfig::default(),
1538            per_file_ignores: SourcedValue::new(HashMap::new(), ConfigSource::Default),
1539            rules: BTreeMap::new(),
1540            loaded_files: Vec::new(),
1541            unknown_keys: Vec::new(),
1542        }
1543    }
1544}
1545
1546impl SourcedConfig {
1547    /// Merges another SourcedConfigFragment into this SourcedConfig.
1548    /// Uses source precedence to determine which values take effect.
1549    fn merge(&mut self, fragment: SourcedConfigFragment) {
1550        // Merge global config
1551        self.global.enable.merge_override(
1552            fragment.global.enable.value,
1553            fragment.global.enable.source,
1554            fragment.global.enable.overrides.first().and_then(|o| o.file.clone()),
1555            fragment.global.enable.overrides.first().and_then(|o| o.line),
1556        );
1557        self.global.disable.merge_override(
1558            fragment.global.disable.value,
1559            fragment.global.disable.source,
1560            fragment.global.disable.overrides.first().and_then(|o| o.file.clone()),
1561            fragment.global.disable.overrides.first().and_then(|o| o.line),
1562        );
1563        self.global.include.merge_override(
1564            fragment.global.include.value,
1565            fragment.global.include.source,
1566            fragment.global.include.overrides.first().and_then(|o| o.file.clone()),
1567            fragment.global.include.overrides.first().and_then(|o| o.line),
1568        );
1569        self.global.exclude.merge_override(
1570            fragment.global.exclude.value,
1571            fragment.global.exclude.source,
1572            fragment.global.exclude.overrides.first().and_then(|o| o.file.clone()),
1573            fragment.global.exclude.overrides.first().and_then(|o| o.line),
1574        );
1575        self.global.respect_gitignore.merge_override(
1576            fragment.global.respect_gitignore.value,
1577            fragment.global.respect_gitignore.source,
1578            fragment
1579                .global
1580                .respect_gitignore
1581                .overrides
1582                .first()
1583                .and_then(|o| o.file.clone()),
1584            fragment.global.respect_gitignore.overrides.first().and_then(|o| o.line),
1585        );
1586        self.global.line_length.merge_override(
1587            fragment.global.line_length.value,
1588            fragment.global.line_length.source,
1589            fragment
1590                .global
1591                .line_length
1592                .overrides
1593                .first()
1594                .and_then(|o| o.file.clone()),
1595            fragment.global.line_length.overrides.first().and_then(|o| o.line),
1596        );
1597        self.global.fixable.merge_override(
1598            fragment.global.fixable.value,
1599            fragment.global.fixable.source,
1600            fragment.global.fixable.overrides.first().and_then(|o| o.file.clone()),
1601            fragment.global.fixable.overrides.first().and_then(|o| o.line),
1602        );
1603        self.global.unfixable.merge_override(
1604            fragment.global.unfixable.value,
1605            fragment.global.unfixable.source,
1606            fragment.global.unfixable.overrides.first().and_then(|o| o.file.clone()),
1607            fragment.global.unfixable.overrides.first().and_then(|o| o.line),
1608        );
1609
1610        // Merge flavor
1611        self.global.flavor.merge_override(
1612            fragment.global.flavor.value,
1613            fragment.global.flavor.source,
1614            fragment.global.flavor.overrides.first().and_then(|o| o.file.clone()),
1615            fragment.global.flavor.overrides.first().and_then(|o| o.line),
1616        );
1617
1618        // Merge force_exclude
1619        self.global.force_exclude.merge_override(
1620            fragment.global.force_exclude.value,
1621            fragment.global.force_exclude.source,
1622            fragment
1623                .global
1624                .force_exclude
1625                .overrides
1626                .first()
1627                .and_then(|o| o.file.clone()),
1628            fragment.global.force_exclude.overrides.first().and_then(|o| o.line),
1629        );
1630
1631        // Merge output_format if present
1632        if let Some(output_format_fragment) = fragment.global.output_format {
1633            if let Some(ref mut output_format) = self.global.output_format {
1634                output_format.merge_override(
1635                    output_format_fragment.value,
1636                    output_format_fragment.source,
1637                    output_format_fragment.overrides.first().and_then(|o| o.file.clone()),
1638                    output_format_fragment.overrides.first().and_then(|o| o.line),
1639                );
1640            } else {
1641                self.global.output_format = Some(output_format_fragment);
1642            }
1643        }
1644
1645        // Merge per_file_ignores
1646        self.per_file_ignores.merge_override(
1647            fragment.per_file_ignores.value,
1648            fragment.per_file_ignores.source,
1649            fragment.per_file_ignores.overrides.first().and_then(|o| o.file.clone()),
1650            fragment.per_file_ignores.overrides.first().and_then(|o| o.line),
1651        );
1652
1653        // Merge rule configs
1654        for (rule_name, rule_fragment) in fragment.rules {
1655            let norm_rule_name = rule_name.to_ascii_uppercase(); // Normalize to uppercase for case-insensitivity
1656            let rule_entry = self.rules.entry(norm_rule_name).or_default();
1657            for (key, sourced_value_fragment) in rule_fragment.values {
1658                let sv_entry = rule_entry
1659                    .values
1660                    .entry(key.clone())
1661                    .or_insert_with(|| SourcedValue::new(sourced_value_fragment.value.clone(), ConfigSource::Default));
1662                let file_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.file.clone());
1663                let line_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.line);
1664                sv_entry.merge_override(
1665                    sourced_value_fragment.value,  // Use the value from the fragment
1666                    sourced_value_fragment.source, // Use the source from the fragment
1667                    file_from_fragment,            // Pass the file path from the fragment override
1668                    line_from_fragment,            // Pass the line number from the fragment override
1669                );
1670            }
1671        }
1672
1673        // Merge unknown_keys from fragment
1674        for (section, key, file_path) in fragment.unknown_keys {
1675            // Deduplicate: only add if not already present
1676            if !self.unknown_keys.iter().any(|(s, k, _)| s == &section && k == &key) {
1677                self.unknown_keys.push((section, key, file_path));
1678            }
1679        }
1680    }
1681
1682    /// Load and merge configurations from files and CLI overrides.
1683    pub fn load(config_path: Option<&str>, cli_overrides: Option<&SourcedGlobalConfig>) -> Result<Self, ConfigError> {
1684        Self::load_with_discovery(config_path, cli_overrides, false)
1685    }
1686
1687    /// Discover configuration file by traversing up the directory tree.
1688    /// Returns the first configuration file found.
1689    fn discover_config_upward() -> Option<std::path::PathBuf> {
1690        use std::env;
1691
1692        const CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", "pyproject.toml"];
1693        const MAX_DEPTH: usize = 100; // Prevent infinite traversal
1694
1695        let start_dir = match env::current_dir() {
1696            Ok(dir) => dir,
1697            Err(e) => {
1698                log::debug!("[rumdl-config] Failed to get current directory: {e}");
1699                return None;
1700            }
1701        };
1702
1703        let mut current_dir = start_dir.clone();
1704        let mut depth = 0;
1705
1706        loop {
1707            if depth >= MAX_DEPTH {
1708                log::debug!("[rumdl-config] Maximum traversal depth reached");
1709                break;
1710            }
1711
1712            log::debug!("[rumdl-config] Searching for config in: {}", current_dir.display());
1713
1714            // Check for config files in order of precedence
1715            for config_name in CONFIG_FILES {
1716                let config_path = current_dir.join(config_name);
1717
1718                if config_path.exists() {
1719                    // For pyproject.toml, verify it contains [tool.rumdl] section
1720                    if *config_name == "pyproject.toml" {
1721                        if let Ok(content) = std::fs::read_to_string(&config_path) {
1722                            if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
1723                                log::debug!("[rumdl-config] Found config file: {}", config_path.display());
1724                                return Some(config_path);
1725                            }
1726                            log::debug!("[rumdl-config] Found pyproject.toml but no [tool.rumdl] section");
1727                            continue;
1728                        }
1729                    } else {
1730                        log::debug!("[rumdl-config] Found config file: {}", config_path.display());
1731                        return Some(config_path);
1732                    }
1733                }
1734            }
1735
1736            // Check for .git directory (stop boundary)
1737            if current_dir.join(".git").exists() {
1738                log::debug!("[rumdl-config] Stopping at .git directory");
1739                break;
1740            }
1741
1742            // Move to parent directory
1743            match current_dir.parent() {
1744                Some(parent) => {
1745                    current_dir = parent.to_owned();
1746                    depth += 1;
1747                }
1748                None => {
1749                    log::debug!("[rumdl-config] Reached filesystem root");
1750                    break;
1751                }
1752            }
1753        }
1754
1755        None
1756    }
1757
1758    /// Internal implementation that accepts config directory for testing
1759    fn user_configuration_path_impl(config_dir: &Path) -> Option<std::path::PathBuf> {
1760        let config_dir = config_dir.join("rumdl");
1761
1762        // Check for config files in precedence order (same as project discovery)
1763        const USER_CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", "pyproject.toml"];
1764
1765        log::debug!(
1766            "[rumdl-config] Checking for user configuration in: {}",
1767            config_dir.display()
1768        );
1769
1770        for filename in USER_CONFIG_FILES {
1771            let config_path = config_dir.join(filename);
1772
1773            if config_path.exists() {
1774                // For pyproject.toml, verify it contains [tool.rumdl] section
1775                if *filename == "pyproject.toml" {
1776                    if let Ok(content) = std::fs::read_to_string(&config_path) {
1777                        if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
1778                            log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
1779                            return Some(config_path);
1780                        }
1781                        log::debug!("[rumdl-config] Found user pyproject.toml but no [tool.rumdl] section");
1782                        continue;
1783                    }
1784                } else {
1785                    log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
1786                    return Some(config_path);
1787                }
1788            }
1789        }
1790
1791        log::debug!(
1792            "[rumdl-config] No user configuration found in: {}",
1793            config_dir.display()
1794        );
1795        None
1796    }
1797
1798    /// Discover user-level configuration file from platform-specific config directory.
1799    /// Returns the first configuration file found in the user config directory.
1800    fn user_configuration_path() -> Option<std::path::PathBuf> {
1801        use etcetera::{BaseStrategy, choose_base_strategy};
1802
1803        match choose_base_strategy() {
1804            Ok(strategy) => {
1805                let config_dir = strategy.config_dir();
1806                Self::user_configuration_path_impl(&config_dir)
1807            }
1808            Err(e) => {
1809                log::debug!("[rumdl-config] Failed to determine user config directory: {e}");
1810                None
1811            }
1812        }
1813    }
1814
1815    /// Internal implementation that accepts user config directory for testing
1816    #[doc(hidden)]
1817    pub fn load_with_discovery_impl(
1818        config_path: Option<&str>,
1819        cli_overrides: Option<&SourcedGlobalConfig>,
1820        skip_auto_discovery: bool,
1821        user_config_dir: Option<&Path>,
1822    ) -> Result<Self, ConfigError> {
1823        use std::env;
1824        log::debug!("[rumdl-config] Current working directory: {:?}", env::current_dir());
1825        if config_path.is_none() {
1826            if skip_auto_discovery {
1827                log::debug!("[rumdl-config] Skipping auto-discovery due to --no-config flag");
1828            } else {
1829                log::debug!("[rumdl-config] No explicit config_path provided, will search default locations");
1830            }
1831        } else {
1832            log::debug!("[rumdl-config] Explicit config_path provided: {config_path:?}");
1833        }
1834        let mut sourced_config = SourcedConfig::default();
1835
1836        // 1. Load explicit config path if provided
1837        if let Some(path) = config_path {
1838            let path_obj = Path::new(path);
1839            let filename = path_obj.file_name().and_then(|name| name.to_str()).unwrap_or("");
1840            log::debug!("[rumdl-config] Trying to load config file: {filename}");
1841            let path_str = path.to_string();
1842
1843            // Known markdownlint config files
1844            const MARKDOWNLINT_FILENAMES: &[&str] = &[".markdownlint.json", ".markdownlint.yaml", ".markdownlint.yml"];
1845
1846            if filename == "pyproject.toml" || filename == ".rumdl.toml" || filename == "rumdl.toml" {
1847                let content = std::fs::read_to_string(path).map_err(|e| ConfigError::IoError {
1848                    source: e,
1849                    path: path_str.clone(),
1850                })?;
1851                if filename == "pyproject.toml" {
1852                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
1853                        sourced_config.merge(fragment);
1854                        sourced_config.loaded_files.push(path_str.clone());
1855                    }
1856                } else {
1857                    let fragment = parse_rumdl_toml(&content, &path_str)?;
1858                    sourced_config.merge(fragment);
1859                    sourced_config.loaded_files.push(path_str.clone());
1860                }
1861            } else if MARKDOWNLINT_FILENAMES.contains(&filename)
1862                || path_str.ends_with(".json")
1863                || path_str.ends_with(".jsonc")
1864                || path_str.ends_with(".yaml")
1865                || path_str.ends_with(".yml")
1866            {
1867                // Parse as markdownlint config (JSON/YAML)
1868                let fragment = load_from_markdownlint(&path_str)?;
1869                sourced_config.merge(fragment);
1870                sourced_config.loaded_files.push(path_str.clone());
1871                // markdownlint is fallback only
1872            } else {
1873                // Try TOML only
1874                let content = std::fs::read_to_string(path).map_err(|e| ConfigError::IoError {
1875                    source: e,
1876                    path: path_str.clone(),
1877                })?;
1878                let fragment = parse_rumdl_toml(&content, &path_str)?;
1879                sourced_config.merge(fragment);
1880                sourced_config.loaded_files.push(path_str.clone());
1881            }
1882        }
1883
1884        // Only perform auto-discovery if not skipped AND no explicit config path provided
1885        if !skip_auto_discovery && config_path.is_none() {
1886            // Step 1: Load user configuration first (as a base)
1887            let user_config_path = if let Some(dir) = user_config_dir {
1888                Self::user_configuration_path_impl(dir)
1889            } else {
1890                Self::user_configuration_path()
1891            };
1892
1893            if let Some(user_config_path) = user_config_path {
1894                let path_str = user_config_path.display().to_string();
1895                let filename = user_config_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1896
1897                log::debug!("[rumdl-config] Loading user configuration file: {path_str}");
1898
1899                if filename == "pyproject.toml" {
1900                    let content = std::fs::read_to_string(&user_config_path).map_err(|e| ConfigError::IoError {
1901                        source: e,
1902                        path: path_str.clone(),
1903                    })?;
1904                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
1905                        sourced_config.merge(fragment);
1906                        sourced_config.loaded_files.push(path_str);
1907                    }
1908                } else {
1909                    let content = std::fs::read_to_string(&user_config_path).map_err(|e| ConfigError::IoError {
1910                        source: e,
1911                        path: path_str.clone(),
1912                    })?;
1913                    let fragment = parse_rumdl_toml(&content, &path_str)?;
1914                    sourced_config.merge(fragment);
1915                    sourced_config.loaded_files.push(path_str);
1916                }
1917            } else {
1918                log::debug!("[rumdl-config] No user configuration file found");
1919            }
1920
1921            // Step 2: Look for project configuration files (override user config)
1922            if let Some(config_file) = Self::discover_config_upward() {
1923                let path_str = config_file.display().to_string();
1924                let filename = config_file.file_name().and_then(|n| n.to_str()).unwrap_or("");
1925
1926                log::debug!("[rumdl-config] Loading discovered config file: {path_str}");
1927
1928                if filename == "pyproject.toml" {
1929                    let content = std::fs::read_to_string(&config_file).map_err(|e| ConfigError::IoError {
1930                        source: e,
1931                        path: path_str.clone(),
1932                    })?;
1933                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
1934                        sourced_config.merge(fragment);
1935                        sourced_config.loaded_files.push(path_str);
1936                    }
1937                } else if filename == ".rumdl.toml" || filename == "rumdl.toml" {
1938                    let content = std::fs::read_to_string(&config_file).map_err(|e| ConfigError::IoError {
1939                        source: e,
1940                        path: path_str.clone(),
1941                    })?;
1942                    let fragment = parse_rumdl_toml(&content, &path_str)?;
1943                    sourced_config.merge(fragment);
1944                    sourced_config.loaded_files.push(path_str);
1945                }
1946            } else {
1947                log::debug!("[rumdl-config] No configuration file found via upward traversal");
1948
1949                // Step 3: If no project config found, fallback to markdownlint config in current directory
1950                let mut found_markdownlint = false;
1951                for filename in MARKDOWNLINT_CONFIG_FILES {
1952                    if std::path::Path::new(filename).exists() {
1953                        match load_from_markdownlint(filename) {
1954                            Ok(fragment) => {
1955                                sourced_config.merge(fragment);
1956                                sourced_config.loaded_files.push(filename.to_string());
1957                                found_markdownlint = true;
1958                                break; // Load only the first one found
1959                            }
1960                            Err(_e) => {
1961                                // Log error but continue (it's just a fallback)
1962                            }
1963                        }
1964                    }
1965                }
1966
1967                if !found_markdownlint {
1968                    log::debug!("[rumdl-config] No markdownlint configuration file found");
1969                }
1970            }
1971        }
1972
1973        // 5. Apply CLI overrides (highest precedence)
1974        if let Some(cli) = cli_overrides {
1975            sourced_config
1976                .global
1977                .enable
1978                .merge_override(cli.enable.value.clone(), ConfigSource::Cli, None, None);
1979            sourced_config
1980                .global
1981                .disable
1982                .merge_override(cli.disable.value.clone(), ConfigSource::Cli, None, None);
1983            sourced_config
1984                .global
1985                .exclude
1986                .merge_override(cli.exclude.value.clone(), ConfigSource::Cli, None, None);
1987            sourced_config
1988                .global
1989                .include
1990                .merge_override(cli.include.value.clone(), ConfigSource::Cli, None, None);
1991            sourced_config.global.respect_gitignore.merge_override(
1992                cli.respect_gitignore.value,
1993                ConfigSource::Cli,
1994                None,
1995                None,
1996            );
1997            sourced_config
1998                .global
1999                .fixable
2000                .merge_override(cli.fixable.value.clone(), ConfigSource::Cli, None, None);
2001            sourced_config
2002                .global
2003                .unfixable
2004                .merge_override(cli.unfixable.value.clone(), ConfigSource::Cli, None, None);
2005            // No rule-specific CLI overrides implemented yet
2006        }
2007
2008        // Unknown keys are now collected during parsing and validated via validate_config_sourced()
2009
2010        Ok(sourced_config)
2011    }
2012
2013    /// Load and merge configurations from files and CLI overrides.
2014    /// If skip_auto_discovery is true, only explicit config paths are loaded.
2015    pub fn load_with_discovery(
2016        config_path: Option<&str>,
2017        cli_overrides: Option<&SourcedGlobalConfig>,
2018        skip_auto_discovery: bool,
2019    ) -> Result<Self, ConfigError> {
2020        Self::load_with_discovery_impl(config_path, cli_overrides, skip_auto_discovery, None)
2021    }
2022}
2023
2024impl From<SourcedConfig> for Config {
2025    fn from(sourced: SourcedConfig) -> Self {
2026        let mut rules = BTreeMap::new();
2027        for (rule_name, sourced_rule_cfg) in sourced.rules {
2028            // Normalize rule name to uppercase for case-insensitive lookup
2029            let normalized_rule_name = rule_name.to_ascii_uppercase();
2030            let mut values = BTreeMap::new();
2031            for (key, sourced_val) in sourced_rule_cfg.values {
2032                values.insert(key, sourced_val.value);
2033            }
2034            rules.insert(normalized_rule_name, RuleConfig { values });
2035        }
2036        #[allow(deprecated)]
2037        let global = GlobalConfig {
2038            enable: sourced.global.enable.value,
2039            disable: sourced.global.disable.value,
2040            exclude: sourced.global.exclude.value,
2041            include: sourced.global.include.value,
2042            respect_gitignore: sourced.global.respect_gitignore.value,
2043            line_length: sourced.global.line_length.value,
2044            output_format: sourced.global.output_format.as_ref().map(|v| v.value.clone()),
2045            fixable: sourced.global.fixable.value,
2046            unfixable: sourced.global.unfixable.value,
2047            flavor: sourced.global.flavor.value,
2048            force_exclude: sourced.global.force_exclude.value,
2049        };
2050        Config {
2051            global,
2052            per_file_ignores: sourced.per_file_ignores.value,
2053            rules,
2054        }
2055    }
2056}
2057
2058/// Registry of all known rules and their config schemas
2059pub struct RuleRegistry {
2060    /// Map of rule name (e.g. "MD013") to set of valid config keys and their TOML value types
2061    pub rule_schemas: std::collections::BTreeMap<String, toml::map::Map<String, toml::Value>>,
2062    /// Map of rule name to config key aliases
2063    pub rule_aliases: std::collections::BTreeMap<String, std::collections::HashMap<String, String>>,
2064}
2065
2066impl RuleRegistry {
2067    /// Build a registry from a list of rules
2068    pub fn from_rules(rules: &[Box<dyn Rule>]) -> Self {
2069        let mut rule_schemas = std::collections::BTreeMap::new();
2070        let mut rule_aliases = std::collections::BTreeMap::new();
2071
2072        for rule in rules {
2073            let norm_name = if let Some((name, toml::Value::Table(table))) = rule.default_config_section() {
2074                let norm_name = normalize_key(&name); // Normalize the name from default_config_section
2075                rule_schemas.insert(norm_name.clone(), table);
2076                norm_name
2077            } else {
2078                let norm_name = normalize_key(rule.name()); // Normalize the name from rule.name()
2079                rule_schemas.insert(norm_name.clone(), toml::map::Map::new());
2080                norm_name
2081            };
2082
2083            // Store aliases if the rule provides them
2084            if let Some(aliases) = rule.config_aliases() {
2085                rule_aliases.insert(norm_name, aliases);
2086            }
2087        }
2088
2089        RuleRegistry {
2090            rule_schemas,
2091            rule_aliases,
2092        }
2093    }
2094
2095    /// Get all known rule names
2096    pub fn rule_names(&self) -> std::collections::BTreeSet<String> {
2097        self.rule_schemas.keys().cloned().collect()
2098    }
2099
2100    /// Get the valid configuration keys for a rule, including both original and normalized variants
2101    pub fn config_keys_for(&self, rule: &str) -> Option<std::collections::BTreeSet<String>> {
2102        self.rule_schemas.get(rule).map(|schema| {
2103            let mut all_keys = std::collections::BTreeSet::new();
2104
2105            // Add original keys from schema
2106            for key in schema.keys() {
2107                all_keys.insert(key.clone());
2108            }
2109
2110            // Add normalized variants for markdownlint compatibility
2111            for key in schema.keys() {
2112                // Add kebab-case variant
2113                all_keys.insert(key.replace('_', "-"));
2114                // Add snake_case variant
2115                all_keys.insert(key.replace('-', "_"));
2116                // Add normalized variant
2117                all_keys.insert(normalize_key(key));
2118            }
2119
2120            // Add any aliases defined by the rule
2121            if let Some(aliases) = self.rule_aliases.get(rule) {
2122                for alias_key in aliases.keys() {
2123                    all_keys.insert(alias_key.clone());
2124                    // Also add normalized variants of the alias
2125                    all_keys.insert(alias_key.replace('_', "-"));
2126                    all_keys.insert(alias_key.replace('-', "_"));
2127                    all_keys.insert(normalize_key(alias_key));
2128                }
2129            }
2130
2131            all_keys
2132        })
2133    }
2134
2135    /// Get the expected value type for a rule's configuration key, trying variants
2136    pub fn expected_value_for(&self, rule: &str, key: &str) -> Option<&toml::Value> {
2137        if let Some(schema) = self.rule_schemas.get(rule) {
2138            // Check if this key is an alias
2139            if let Some(aliases) = self.rule_aliases.get(rule)
2140                && let Some(canonical_key) = aliases.get(key)
2141            {
2142                // Use the canonical key for schema lookup
2143                if let Some(value) = schema.get(canonical_key) {
2144                    return Some(value);
2145                }
2146            }
2147
2148            // Try the original key
2149            if let Some(value) = schema.get(key) {
2150                return Some(value);
2151            }
2152
2153            // Try key variants
2154            let key_variants = [
2155                key.replace('-', "_"), // Convert kebab-case to snake_case
2156                key.replace('_', "-"), // Convert snake_case to kebab-case
2157                normalize_key(key),    // Normalized key (lowercase, kebab-case)
2158            ];
2159
2160            for variant in &key_variants {
2161                if let Some(value) = schema.get(variant) {
2162                    return Some(value);
2163                }
2164            }
2165        }
2166        None
2167    }
2168}
2169
2170/// Represents a config validation warning or error
2171#[derive(Debug, Clone)]
2172pub struct ConfigValidationWarning {
2173    pub message: String,
2174    pub rule: Option<String>,
2175    pub key: Option<String>,
2176}
2177
2178/// Validate a loaded config against the rule registry, using SourcedConfig for unknown key tracking
2179pub fn validate_config_sourced(sourced: &SourcedConfig, registry: &RuleRegistry) -> Vec<ConfigValidationWarning> {
2180    let mut warnings = Vec::new();
2181    let known_rules = registry.rule_names();
2182    // 1. Unknown rules
2183    for rule in sourced.rules.keys() {
2184        if !known_rules.contains(rule) {
2185            warnings.push(ConfigValidationWarning {
2186                message: format!("Unknown rule in config: {rule}"),
2187                rule: Some(rule.clone()),
2188                key: None,
2189            });
2190        }
2191    }
2192    // 2. Unknown options and type mismatches
2193    for (rule, rule_cfg) in &sourced.rules {
2194        if let Some(valid_keys) = registry.config_keys_for(rule) {
2195            for key in rule_cfg.values.keys() {
2196                if !valid_keys.contains(key) {
2197                    let valid_keys_vec: Vec<String> = valid_keys.iter().cloned().collect();
2198                    let message = if let Some(suggestion) = suggest_similar_key(key, &valid_keys_vec) {
2199                        format!("Unknown option for rule {rule}: {key} (did you mean: {suggestion}?)")
2200                    } else {
2201                        format!("Unknown option for rule {rule}: {key}")
2202                    };
2203                    warnings.push(ConfigValidationWarning {
2204                        message,
2205                        rule: Some(rule.clone()),
2206                        key: Some(key.clone()),
2207                    });
2208                } else {
2209                    // Type check: compare type of value to type of default
2210                    if let Some(expected) = registry.expected_value_for(rule, key) {
2211                        let actual = &rule_cfg.values[key].value;
2212                        if !toml_value_type_matches(expected, actual) {
2213                            warnings.push(ConfigValidationWarning {
2214                                message: format!(
2215                                    "Type mismatch for {}.{}: expected {}, got {}",
2216                                    rule,
2217                                    key,
2218                                    toml_type_name(expected),
2219                                    toml_type_name(actual)
2220                                ),
2221                                rule: Some(rule.clone()),
2222                                key: Some(key.clone()),
2223                            });
2224                        }
2225                    }
2226                }
2227            }
2228        }
2229    }
2230    // 3. Unknown global options (from unknown_keys)
2231    let known_global_keys = vec![
2232        "enable".to_string(),
2233        "disable".to_string(),
2234        "include".to_string(),
2235        "exclude".to_string(),
2236        "respect-gitignore".to_string(),
2237        "line-length".to_string(),
2238        "fixable".to_string(),
2239        "unfixable".to_string(),
2240        "flavor".to_string(),
2241        "force-exclude".to_string(),
2242        "output-format".to_string(),
2243    ];
2244
2245    for (section, key, file_path) in &sourced.unknown_keys {
2246        if section.contains("[global]") || section.contains("[tool.rumdl]") {
2247            let message = if let Some(suggestion) = suggest_similar_key(key, &known_global_keys) {
2248                if let Some(path) = file_path {
2249                    format!("Unknown global option in {path}: {key} (did you mean: {suggestion}?)")
2250                } else {
2251                    format!("Unknown global option: {key} (did you mean: {suggestion}?)")
2252                }
2253            } else if let Some(path) = file_path {
2254                format!("Unknown global option in {path}: {key}")
2255            } else {
2256                format!("Unknown global option: {key}")
2257            };
2258            warnings.push(ConfigValidationWarning {
2259                message,
2260                rule: None,
2261                key: Some(key.clone()),
2262            });
2263        } else if !key.is_empty() {
2264            // This is an unknown rule section (key is empty means it's a section header)
2265            // No suggestions for rule names - just warn
2266            continue;
2267        } else {
2268            // Unknown rule section
2269            let message = if let Some(path) = file_path {
2270                format!(
2271                    "Unknown rule in {path}: {}",
2272                    section.trim_matches(|c| c == '[' || c == ']')
2273                )
2274            } else {
2275                format!(
2276                    "Unknown rule in config: {}",
2277                    section.trim_matches(|c| c == '[' || c == ']')
2278                )
2279            };
2280            warnings.push(ConfigValidationWarning {
2281                message,
2282                rule: None,
2283                key: None,
2284            });
2285        }
2286    }
2287    warnings
2288}
2289
2290fn toml_type_name(val: &toml::Value) -> &'static str {
2291    match val {
2292        toml::Value::String(_) => "string",
2293        toml::Value::Integer(_) => "integer",
2294        toml::Value::Float(_) => "float",
2295        toml::Value::Boolean(_) => "boolean",
2296        toml::Value::Array(_) => "array",
2297        toml::Value::Table(_) => "table",
2298        toml::Value::Datetime(_) => "datetime",
2299    }
2300}
2301
2302/// Calculate Levenshtein distance between two strings (simple implementation)
2303fn levenshtein_distance(s1: &str, s2: &str) -> usize {
2304    let len1 = s1.len();
2305    let len2 = s2.len();
2306
2307    if len1 == 0 {
2308        return len2;
2309    }
2310    if len2 == 0 {
2311        return len1;
2312    }
2313
2314    let s1_chars: Vec<char> = s1.chars().collect();
2315    let s2_chars: Vec<char> = s2.chars().collect();
2316
2317    let mut prev_row: Vec<usize> = (0..=len2).collect();
2318    let mut curr_row = vec![0; len2 + 1];
2319
2320    for i in 1..=len1 {
2321        curr_row[0] = i;
2322        for j in 1..=len2 {
2323            let cost = if s1_chars[i - 1] == s2_chars[j - 1] { 0 } else { 1 };
2324            curr_row[j] = (prev_row[j] + 1)          // deletion
2325                .min(curr_row[j - 1] + 1)            // insertion
2326                .min(prev_row[j - 1] + cost); // substitution
2327        }
2328        std::mem::swap(&mut prev_row, &mut curr_row);
2329    }
2330
2331    prev_row[len2]
2332}
2333
2334/// Suggest a similar key from a list of valid keys using fuzzy matching
2335fn suggest_similar_key(unknown: &str, valid_keys: &[String]) -> Option<String> {
2336    let unknown_lower = unknown.to_lowercase();
2337    let max_distance = 2.max(unknown.len() / 3); // Allow up to 2 edits or 30% of string length
2338
2339    let mut best_match: Option<(String, usize)> = None;
2340
2341    for valid in valid_keys {
2342        let valid_lower = valid.to_lowercase();
2343        let distance = levenshtein_distance(&unknown_lower, &valid_lower);
2344
2345        if distance <= max_distance {
2346            if let Some((_, best_dist)) = &best_match {
2347                if distance < *best_dist {
2348                    best_match = Some((valid.clone(), distance));
2349                }
2350            } else {
2351                best_match = Some((valid.clone(), distance));
2352            }
2353        }
2354    }
2355
2356    best_match.map(|(key, _)| key)
2357}
2358
2359fn toml_value_type_matches(expected: &toml::Value, actual: &toml::Value) -> bool {
2360    use toml::Value::*;
2361    match (expected, actual) {
2362        (String(_), String(_)) => true,
2363        (Integer(_), Integer(_)) => true,
2364        (Float(_), Float(_)) => true,
2365        (Boolean(_), Boolean(_)) => true,
2366        (Array(_), Array(_)) => true,
2367        (Table(_), Table(_)) => true,
2368        (Datetime(_), Datetime(_)) => true,
2369        // Allow integer for float
2370        (Float(_), Integer(_)) => true,
2371        _ => false,
2372    }
2373}
2374
2375/// Parses pyproject.toml content and extracts the [tool.rumdl] section if present.
2376fn parse_pyproject_toml(content: &str, path: &str) -> Result<Option<SourcedConfigFragment>, ConfigError> {
2377    let doc: toml::Value =
2378        toml::from_str(content).map_err(|e| ConfigError::ParseError(format!("{path}: Failed to parse TOML: {e}")))?;
2379    let mut fragment = SourcedConfigFragment::default();
2380    let source = ConfigSource::PyprojectToml;
2381    let file = Some(path.to_string());
2382
2383    // 1. Handle [tool.rumdl] and [tool.rumdl.global] sections
2384    if let Some(rumdl_config) = doc.get("tool").and_then(|t| t.get("rumdl"))
2385        && let Some(rumdl_table) = rumdl_config.as_table()
2386    {
2387        // Helper function to extract global config from a table
2388        let extract_global_config = |fragment: &mut SourcedConfigFragment, table: &toml::value::Table| {
2389            // Extract global options from the given table
2390            if let Some(enable) = table.get("enable")
2391                && let Ok(values) = Vec::<String>::deserialize(enable.clone())
2392            {
2393                // Normalize rule names in the list
2394                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2395                fragment
2396                    .global
2397                    .enable
2398                    .push_override(normalized_values, source, file.clone(), None);
2399            }
2400
2401            if let Some(disable) = table.get("disable")
2402                && let Ok(values) = Vec::<String>::deserialize(disable.clone())
2403            {
2404                // Re-enable normalization
2405                let normalized_values: Vec<String> = values.into_iter().map(|s| normalize_key(&s)).collect();
2406                fragment
2407                    .global
2408                    .disable
2409                    .push_override(normalized_values, source, file.clone(), None);
2410            }
2411
2412            if let Some(include) = table.get("include")
2413                && let Ok(values) = Vec::<String>::deserialize(include.clone())
2414            {
2415                fragment
2416                    .global
2417                    .include
2418                    .push_override(values, source, file.clone(), None);
2419            }
2420
2421            if let Some(exclude) = table.get("exclude")
2422                && let Ok(values) = Vec::<String>::deserialize(exclude.clone())
2423            {
2424                fragment
2425                    .global
2426                    .exclude
2427                    .push_override(values, source, file.clone(), None);
2428            }
2429
2430            if let Some(respect_gitignore) = table
2431                .get("respect-gitignore")
2432                .or_else(|| table.get("respect_gitignore"))
2433                && let Ok(value) = bool::deserialize(respect_gitignore.clone())
2434            {
2435                fragment
2436                    .global
2437                    .respect_gitignore
2438                    .push_override(value, source, file.clone(), None);
2439            }
2440
2441            if let Some(force_exclude) = table.get("force-exclude").or_else(|| table.get("force_exclude"))
2442                && let Ok(value) = bool::deserialize(force_exclude.clone())
2443            {
2444                fragment
2445                    .global
2446                    .force_exclude
2447                    .push_override(value, source, file.clone(), None);
2448            }
2449
2450            if let Some(output_format) = table.get("output-format").or_else(|| table.get("output_format"))
2451                && let Ok(value) = String::deserialize(output_format.clone())
2452            {
2453                if fragment.global.output_format.is_none() {
2454                    fragment.global.output_format = Some(SourcedValue::new(value.clone(), source));
2455                } else {
2456                    fragment
2457                        .global
2458                        .output_format
2459                        .as_mut()
2460                        .unwrap()
2461                        .push_override(value, source, file.clone(), None);
2462                }
2463            }
2464
2465            if let Some(fixable) = table.get("fixable")
2466                && let Ok(values) = Vec::<String>::deserialize(fixable.clone())
2467            {
2468                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2469                fragment
2470                    .global
2471                    .fixable
2472                    .push_override(normalized_values, source, file.clone(), None);
2473            }
2474
2475            if let Some(unfixable) = table.get("unfixable")
2476                && let Ok(values) = Vec::<String>::deserialize(unfixable.clone())
2477            {
2478                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2479                fragment
2480                    .global
2481                    .unfixable
2482                    .push_override(normalized_values, source, file.clone(), None);
2483            }
2484
2485            if let Some(flavor) = table.get("flavor")
2486                && let Ok(value) = MarkdownFlavor::deserialize(flavor.clone())
2487            {
2488                fragment.global.flavor.push_override(value, source, file.clone(), None);
2489            }
2490
2491            // Handle line-length special case - this should set the global line_length
2492            if let Some(line_length) = table.get("line-length").or_else(|| table.get("line_length"))
2493                && let Ok(value) = u64::deserialize(line_length.clone())
2494            {
2495                fragment
2496                    .global
2497                    .line_length
2498                    .push_override(value, source, file.clone(), None);
2499
2500                // Also add to MD013 rule config for backward compatibility
2501                let norm_md013_key = normalize_key("MD013");
2502                let rule_entry = fragment.rules.entry(norm_md013_key).or_default();
2503                let norm_line_length_key = normalize_key("line-length");
2504                let sv = rule_entry
2505                    .values
2506                    .entry(norm_line_length_key)
2507                    .or_insert_with(|| SourcedValue::new(line_length.clone(), ConfigSource::Default));
2508                sv.push_override(line_length.clone(), source, file.clone(), None);
2509            }
2510        };
2511
2512        // First, check for [tool.rumdl.global] section
2513        if let Some(global_table) = rumdl_table.get("global").and_then(|g| g.as_table()) {
2514            extract_global_config(&mut fragment, global_table);
2515        }
2516
2517        // Also extract global options from [tool.rumdl] directly (for flat structure)
2518        extract_global_config(&mut fragment, rumdl_table);
2519
2520        // --- Extract per-file-ignores configurations ---
2521        // Check both hyphenated and underscored versions for compatibility
2522        let per_file_ignores_key = rumdl_table
2523            .get("per-file-ignores")
2524            .or_else(|| rumdl_table.get("per_file_ignores"));
2525
2526        if let Some(per_file_ignores_value) = per_file_ignores_key
2527            && let Some(per_file_table) = per_file_ignores_value.as_table()
2528        {
2529            let mut per_file_map = HashMap::new();
2530            for (pattern, rules_value) in per_file_table {
2531                if let Ok(rules) = Vec::<String>::deserialize(rules_value.clone()) {
2532                    let normalized_rules = rules.into_iter().map(|s| normalize_key(&s)).collect();
2533                    per_file_map.insert(pattern.clone(), normalized_rules);
2534                } else {
2535                    log::warn!(
2536                        "[WARN] Expected array for per-file-ignores pattern '{pattern}' in {path}, found {rules_value:?}"
2537                    );
2538                }
2539            }
2540            fragment
2541                .per_file_ignores
2542                .push_override(per_file_map, source, file.clone(), None);
2543        }
2544
2545        // --- Extract rule-specific configurations ---
2546        for (key, value) in rumdl_table {
2547            let norm_rule_key = normalize_key(key);
2548
2549            // Skip keys already handled as global or special cases
2550            if [
2551                "enable",
2552                "disable",
2553                "include",
2554                "exclude",
2555                "respect_gitignore",
2556                "respect-gitignore", // Added kebab-case here too
2557                "force_exclude",
2558                "force-exclude",
2559                "line_length",
2560                "line-length",
2561                "output_format",
2562                "output-format",
2563                "fixable",
2564                "unfixable",
2565                "per-file-ignores",
2566                "per_file_ignores",
2567                "global",
2568            ]
2569            .contains(&norm_rule_key.as_str())
2570            {
2571                continue;
2572            }
2573
2574            // Explicitly check if the key looks like a rule name (e.g., starts with 'md')
2575            // AND if the value is actually a TOML table before processing as rule config.
2576            // This prevents misinterpreting other top-level keys under [tool.rumdl]
2577            let norm_rule_key_upper = norm_rule_key.to_ascii_uppercase();
2578            if norm_rule_key_upper.len() == 5
2579                && norm_rule_key_upper.starts_with("MD")
2580                && norm_rule_key_upper[2..].chars().all(|c| c.is_ascii_digit())
2581                && value.is_table()
2582            {
2583                if let Some(rule_config_table) = value.as_table() {
2584                    // Get the entry for this rule (e.g., "md013")
2585                    let rule_entry = fragment.rules.entry(norm_rule_key_upper).or_default();
2586                    for (rk, rv) in rule_config_table {
2587                        let norm_rk = normalize_key(rk); // Normalize the config key itself
2588
2589                        let toml_val = rv.clone();
2590
2591                        let sv = rule_entry
2592                            .values
2593                            .entry(norm_rk.clone())
2594                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), ConfigSource::Default));
2595                        sv.push_override(toml_val, source, file.clone(), None);
2596                    }
2597                }
2598            } else {
2599                // Key is not a global/special key, doesn't start with 'md', or isn't a table.
2600                // Track unknown keys under [tool.rumdl] for validation
2601                fragment
2602                    .unknown_keys
2603                    .push(("[tool.rumdl]".to_string(), key.to_string(), Some(path.to_string())));
2604            }
2605        }
2606    }
2607
2608    // 2. Handle [tool.rumdl.MDxxx] sections as rule-specific config (nested under [tool])
2609    if let Some(tool_table) = doc.get("tool").and_then(|t| t.as_table()) {
2610        for (key, value) in tool_table.iter() {
2611            if let Some(rule_name) = key.strip_prefix("rumdl.") {
2612                let norm_rule_name = normalize_key(rule_name);
2613                if norm_rule_name.len() == 5
2614                    && norm_rule_name.to_ascii_uppercase().starts_with("MD")
2615                    && norm_rule_name[2..].chars().all(|c| c.is_ascii_digit())
2616                    && let Some(rule_table) = value.as_table()
2617                {
2618                    let rule_entry = fragment.rules.entry(norm_rule_name.to_ascii_uppercase()).or_default();
2619                    for (rk, rv) in rule_table {
2620                        let norm_rk = normalize_key(rk);
2621                        let toml_val = rv.clone();
2622                        let sv = rule_entry
2623                            .values
2624                            .entry(norm_rk.clone())
2625                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), source));
2626                        sv.push_override(toml_val, source, file.clone(), None);
2627                    }
2628                } else if rule_name.to_ascii_uppercase().starts_with("MD") {
2629                    // Track unknown rule sections like [tool.rumdl.MD999]
2630                    fragment.unknown_keys.push((
2631                        format!("[tool.rumdl.{rule_name}]"),
2632                        String::new(),
2633                        Some(path.to_string()),
2634                    ));
2635                }
2636            }
2637        }
2638    }
2639
2640    // 3. Handle [tool.rumdl.MDxxx] sections as top-level keys (e.g., [tool.rumdl.MD007])
2641    if let Some(doc_table) = doc.as_table() {
2642        for (key, value) in doc_table.iter() {
2643            if let Some(rule_name) = key.strip_prefix("tool.rumdl.") {
2644                let norm_rule_name = normalize_key(rule_name);
2645                if norm_rule_name.len() == 5
2646                    && norm_rule_name.to_ascii_uppercase().starts_with("MD")
2647                    && norm_rule_name[2..].chars().all(|c| c.is_ascii_digit())
2648                    && let Some(rule_table) = value.as_table()
2649                {
2650                    let rule_entry = fragment.rules.entry(norm_rule_name.to_ascii_uppercase()).or_default();
2651                    for (rk, rv) in rule_table {
2652                        let norm_rk = normalize_key(rk);
2653                        let toml_val = rv.clone();
2654                        let sv = rule_entry
2655                            .values
2656                            .entry(norm_rk.clone())
2657                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), source));
2658                        sv.push_override(toml_val, source, file.clone(), None);
2659                    }
2660                } else if rule_name.to_ascii_uppercase().starts_with("MD") {
2661                    // Track unknown rule sections like [tool.rumdl.MD999]
2662                    fragment.unknown_keys.push((
2663                        format!("[tool.rumdl.{rule_name}]"),
2664                        String::new(),
2665                        Some(path.to_string()),
2666                    ));
2667                }
2668            }
2669        }
2670    }
2671
2672    // Only return Some(fragment) if any config was found
2673    let has_any = !fragment.global.enable.value.is_empty()
2674        || !fragment.global.disable.value.is_empty()
2675        || !fragment.global.include.value.is_empty()
2676        || !fragment.global.exclude.value.is_empty()
2677        || !fragment.global.fixable.value.is_empty()
2678        || !fragment.global.unfixable.value.is_empty()
2679        || fragment.global.output_format.is_some()
2680        || !fragment.per_file_ignores.value.is_empty()
2681        || !fragment.rules.is_empty();
2682    if has_any { Ok(Some(fragment)) } else { Ok(None) }
2683}
2684
2685/// Parses rumdl.toml / .rumdl.toml content.
2686fn parse_rumdl_toml(content: &str, path: &str) -> Result<SourcedConfigFragment, ConfigError> {
2687    let doc = content
2688        .parse::<DocumentMut>()
2689        .map_err(|e| ConfigError::ParseError(format!("{path}: Failed to parse TOML: {e}")))?;
2690    let mut fragment = SourcedConfigFragment::default();
2691    let source = ConfigSource::RumdlToml;
2692    let file = Some(path.to_string());
2693
2694    // Define known rules before the loop
2695    let all_rules = rules::all_rules(&Config::default());
2696    let registry = RuleRegistry::from_rules(&all_rules);
2697    let known_rule_names: BTreeSet<String> = registry
2698        .rule_names()
2699        .into_iter()
2700        .map(|s| s.to_ascii_uppercase())
2701        .collect();
2702
2703    // Handle [global] section
2704    if let Some(global_item) = doc.get("global")
2705        && let Some(global_table) = global_item.as_table()
2706    {
2707        for (key, value_item) in global_table.iter() {
2708            let norm_key = normalize_key(key);
2709            match norm_key.as_str() {
2710                "enable" | "disable" | "include" | "exclude" => {
2711                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
2712                        // Corrected: Iterate directly over the Formatted<Array>
2713                        let values: Vec<String> = formatted_array
2714                                .iter()
2715                                .filter_map(|item| item.as_str()) // Extract strings
2716                                .map(|s| s.to_string())
2717                                .collect();
2718
2719                        // Normalize rule names for enable/disable
2720                        let final_values = if norm_key == "enable" || norm_key == "disable" {
2721                            // Corrected: Pass &str to normalize_key
2722                            values.into_iter().map(|s| normalize_key(&s)).collect()
2723                        } else {
2724                            values
2725                        };
2726
2727                        match norm_key.as_str() {
2728                            "enable" => fragment
2729                                .global
2730                                .enable
2731                                .push_override(final_values, source, file.clone(), None),
2732                            "disable" => {
2733                                fragment
2734                                    .global
2735                                    .disable
2736                                    .push_override(final_values, source, file.clone(), None)
2737                            }
2738                            "include" => {
2739                                fragment
2740                                    .global
2741                                    .include
2742                                    .push_override(final_values, source, file.clone(), None)
2743                            }
2744                            "exclude" => {
2745                                fragment
2746                                    .global
2747                                    .exclude
2748                                    .push_override(final_values, source, file.clone(), None)
2749                            }
2750                            _ => unreachable!("Outer match guarantees only enable/disable/include/exclude"),
2751                        }
2752                    } else {
2753                        log::warn!(
2754                            "[WARN] Expected array for global key '{}' in {}, found {}",
2755                            key,
2756                            path,
2757                            value_item.type_name()
2758                        );
2759                    }
2760                }
2761                "respect_gitignore" | "respect-gitignore" => {
2762                    // Handle both cases
2763                    if let Some(toml_edit::Value::Boolean(formatted_bool)) = value_item.as_value() {
2764                        let val = *formatted_bool.value();
2765                        fragment
2766                            .global
2767                            .respect_gitignore
2768                            .push_override(val, source, file.clone(), None);
2769                    } else {
2770                        log::warn!(
2771                            "[WARN] Expected boolean for global key '{}' in {}, found {}",
2772                            key,
2773                            path,
2774                            value_item.type_name()
2775                        );
2776                    }
2777                }
2778                "force_exclude" | "force-exclude" => {
2779                    // Handle both cases
2780                    if let Some(toml_edit::Value::Boolean(formatted_bool)) = value_item.as_value() {
2781                        let val = *formatted_bool.value();
2782                        fragment
2783                            .global
2784                            .force_exclude
2785                            .push_override(val, source, file.clone(), None);
2786                    } else {
2787                        log::warn!(
2788                            "[WARN] Expected boolean for global key '{}' in {}, found {}",
2789                            key,
2790                            path,
2791                            value_item.type_name()
2792                        );
2793                    }
2794                }
2795                "line_length" | "line-length" => {
2796                    // Handle both cases
2797                    if let Some(toml_edit::Value::Integer(formatted_int)) = value_item.as_value() {
2798                        let val = *formatted_int.value() as u64;
2799                        fragment
2800                            .global
2801                            .line_length
2802                            .push_override(val, source, file.clone(), None);
2803                    } else {
2804                        log::warn!(
2805                            "[WARN] Expected integer for global key '{}' in {}, found {}",
2806                            key,
2807                            path,
2808                            value_item.type_name()
2809                        );
2810                    }
2811                }
2812                "output_format" | "output-format" => {
2813                    // Handle both cases
2814                    if let Some(toml_edit::Value::String(formatted_string)) = value_item.as_value() {
2815                        let val = formatted_string.value().clone();
2816                        if fragment.global.output_format.is_none() {
2817                            fragment.global.output_format = Some(SourcedValue::new(val.clone(), source));
2818                        } else {
2819                            fragment.global.output_format.as_mut().unwrap().push_override(
2820                                val,
2821                                source,
2822                                file.clone(),
2823                                None,
2824                            );
2825                        }
2826                    } else {
2827                        log::warn!(
2828                            "[WARN] Expected string for global key '{}' in {}, found {}",
2829                            key,
2830                            path,
2831                            value_item.type_name()
2832                        );
2833                    }
2834                }
2835                "fixable" => {
2836                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
2837                        let values: Vec<String> = formatted_array
2838                            .iter()
2839                            .filter_map(|item| item.as_str())
2840                            .map(normalize_key)
2841                            .collect();
2842                        fragment
2843                            .global
2844                            .fixable
2845                            .push_override(values, source, file.clone(), None);
2846                    } else {
2847                        log::warn!(
2848                            "[WARN] Expected array for global key '{}' in {}, found {}",
2849                            key,
2850                            path,
2851                            value_item.type_name()
2852                        );
2853                    }
2854                }
2855                "unfixable" => {
2856                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
2857                        let values: Vec<String> = formatted_array
2858                            .iter()
2859                            .filter_map(|item| item.as_str())
2860                            .map(normalize_key)
2861                            .collect();
2862                        fragment
2863                            .global
2864                            .unfixable
2865                            .push_override(values, source, file.clone(), None);
2866                    } else {
2867                        log::warn!(
2868                            "[WARN] Expected array for global key '{}' in {}, found {}",
2869                            key,
2870                            path,
2871                            value_item.type_name()
2872                        );
2873                    }
2874                }
2875                "flavor" => {
2876                    if let Some(toml_edit::Value::String(formatted_string)) = value_item.as_value() {
2877                        let val = formatted_string.value();
2878                        if let Ok(flavor) = MarkdownFlavor::from_str(val) {
2879                            fragment.global.flavor.push_override(flavor, source, file.clone(), None);
2880                        } else {
2881                            log::warn!("[WARN] Unknown markdown flavor '{val}' in {path}");
2882                        }
2883                    } else {
2884                        log::warn!(
2885                            "[WARN] Expected string for global key '{}' in {}, found {}",
2886                            key,
2887                            path,
2888                            value_item.type_name()
2889                        );
2890                    }
2891                }
2892                _ => {
2893                    // Track unknown global keys for validation
2894                    fragment
2895                        .unknown_keys
2896                        .push(("[global]".to_string(), key.to_string(), Some(path.to_string())));
2897                    log::warn!("[WARN] Unknown key in [global] section of {path}: {key}");
2898                }
2899            }
2900        }
2901    }
2902
2903    // Handle [per-file-ignores] section
2904    if let Some(per_file_item) = doc.get("per-file-ignores")
2905        && let Some(per_file_table) = per_file_item.as_table()
2906    {
2907        let mut per_file_map = HashMap::new();
2908        for (pattern, value_item) in per_file_table.iter() {
2909            if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
2910                let rules: Vec<String> = formatted_array
2911                    .iter()
2912                    .filter_map(|item| item.as_str())
2913                    .map(normalize_key)
2914                    .collect();
2915                per_file_map.insert(pattern.to_string(), rules);
2916            } else {
2917                let type_name = value_item.type_name();
2918                log::warn!(
2919                    "[WARN] Expected array for per-file-ignores pattern '{pattern}' in {path}, found {type_name}"
2920                );
2921            }
2922        }
2923        fragment
2924            .per_file_ignores
2925            .push_override(per_file_map, source, file.clone(), None);
2926    }
2927
2928    // Rule-specific: all other top-level tables
2929    for (key, item) in doc.iter() {
2930        let norm_rule_name = key.to_ascii_uppercase();
2931
2932        // Skip known special sections
2933        if key == "global" || key == "per-file-ignores" {
2934            continue;
2935        }
2936
2937        // Track unknown rule sections (like [MD999])
2938        if !known_rule_names.contains(&norm_rule_name) {
2939            // Only track if it looks like a rule section (starts with MD or is uppercase)
2940            if norm_rule_name.starts_with("MD") || key.chars().all(|c| c.is_uppercase() || c.is_numeric()) {
2941                fragment
2942                    .unknown_keys
2943                    .push((format!("[{key}]"), String::new(), Some(path.to_string())));
2944            }
2945            continue;
2946        }
2947
2948        if let Some(tbl) = item.as_table() {
2949            let rule_entry = fragment.rules.entry(norm_rule_name.clone()).or_default();
2950            for (rk, rv_item) in tbl.iter() {
2951                let norm_rk = normalize_key(rk);
2952                let maybe_toml_val: Option<toml::Value> = match rv_item.as_value() {
2953                    Some(toml_edit::Value::String(formatted)) => Some(toml::Value::String(formatted.value().clone())),
2954                    Some(toml_edit::Value::Integer(formatted)) => Some(toml::Value::Integer(*formatted.value())),
2955                    Some(toml_edit::Value::Float(formatted)) => Some(toml::Value::Float(*formatted.value())),
2956                    Some(toml_edit::Value::Boolean(formatted)) => Some(toml::Value::Boolean(*formatted.value())),
2957                    Some(toml_edit::Value::Datetime(formatted)) => Some(toml::Value::Datetime(*formatted.value())),
2958                    Some(toml_edit::Value::Array(formatted_array)) => {
2959                        // Convert toml_edit Array to toml::Value::Array
2960                        let mut values = Vec::new();
2961                        for item in formatted_array.iter() {
2962                            match item {
2963                                toml_edit::Value::String(formatted) => {
2964                                    values.push(toml::Value::String(formatted.value().clone()))
2965                                }
2966                                toml_edit::Value::Integer(formatted) => {
2967                                    values.push(toml::Value::Integer(*formatted.value()))
2968                                }
2969                                toml_edit::Value::Float(formatted) => {
2970                                    values.push(toml::Value::Float(*formatted.value()))
2971                                }
2972                                toml_edit::Value::Boolean(formatted) => {
2973                                    values.push(toml::Value::Boolean(*formatted.value()))
2974                                }
2975                                toml_edit::Value::Datetime(formatted) => {
2976                                    values.push(toml::Value::Datetime(*formatted.value()))
2977                                }
2978                                _ => {
2979                                    log::warn!(
2980                                        "[WARN] Skipping unsupported array element type in key '{norm_rule_name}.{norm_rk}' in {path}"
2981                                    );
2982                                }
2983                            }
2984                        }
2985                        Some(toml::Value::Array(values))
2986                    }
2987                    Some(toml_edit::Value::InlineTable(_)) => {
2988                        log::warn!(
2989                            "[WARN] Skipping inline table value for key '{norm_rule_name}.{norm_rk}' in {path}. Table conversion not yet fully implemented in parser."
2990                        );
2991                        None
2992                    }
2993                    None => {
2994                        log::warn!(
2995                            "[WARN] Skipping non-value item for key '{norm_rule_name}.{norm_rk}' in {path}. Expected simple value."
2996                        );
2997                        None
2998                    }
2999                };
3000                if let Some(toml_val) = maybe_toml_val {
3001                    let sv = rule_entry
3002                        .values
3003                        .entry(norm_rk.clone())
3004                        .or_insert_with(|| SourcedValue::new(toml_val.clone(), ConfigSource::Default));
3005                    sv.push_override(toml_val, source, file.clone(), None);
3006                }
3007            }
3008        } else if item.is_value() {
3009            log::warn!("[WARN] Ignoring top-level value key in {path}: '{key}'. Expected a table like [{key}].");
3010        }
3011    }
3012
3013    Ok(fragment)
3014}
3015
3016/// Loads and converts a markdownlint config file (.json or .yaml) into a SourcedConfigFragment.
3017fn load_from_markdownlint(path: &str) -> Result<SourcedConfigFragment, ConfigError> {
3018    // Use the unified loader from markdownlint_config.rs
3019    let ml_config = crate::markdownlint_config::load_markdownlint_config(path)
3020        .map_err(|e| ConfigError::ParseError(format!("{path}: {e}")))?;
3021    Ok(ml_config.map_to_sourced_rumdl_config_fragment(Some(path)))
3022}