rumdl_lib/
config.rs

1//!
2//! This module defines configuration structures, loading logic, and provenance tracking for rumdl.
3//! Supports TOML, pyproject.toml, and markdownlint config formats, and provides merging and override logic.
4
5use crate::rule::Rule;
6use crate::rules;
7use lazy_static::lazy_static;
8use log;
9use serde::{Deserialize, Serialize};
10use std::collections::BTreeMap;
11use std::collections::{BTreeSet, HashMap, HashSet};
12use std::fmt;
13use std::fs;
14use std::io;
15use std::path::Path;
16use std::str::FromStr;
17use toml_edit::DocumentMut;
18
19/// Markdown flavor/dialect enumeration
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, schemars::JsonSchema)]
21#[serde(rename_all = "lowercase")]
22pub enum MarkdownFlavor {
23    /// Standard Markdown without flavor-specific adjustments
24    #[serde(rename = "standard", alias = "none", alias = "")]
25    #[default]
26    Standard,
27    /// MkDocs flavor with auto-reference support
28    #[serde(rename = "mkdocs")]
29    MkDocs,
30    /// MDX flavor with JSX and ESM support (.mdx files)
31    #[serde(rename = "mdx")]
32    MDX,
33    /// Quarto/RMarkdown flavor for scientific publishing (.qmd, .Rmd files)
34    #[serde(rename = "quarto")]
35    Quarto,
36    // Future flavors can be added here when they have actual implementation differences
37    // Planned: GFM (GitHub Flavored Markdown) - for GitHub-specific features like tables, strikethrough
38    // Planned: CommonMark - for strict CommonMark compliance
39}
40
41impl fmt::Display for MarkdownFlavor {
42    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
43        match self {
44            MarkdownFlavor::Standard => write!(f, "standard"),
45            MarkdownFlavor::MkDocs => write!(f, "mkdocs"),
46            MarkdownFlavor::MDX => write!(f, "mdx"),
47            MarkdownFlavor::Quarto => write!(f, "quarto"),
48        }
49    }
50}
51
52impl FromStr for MarkdownFlavor {
53    type Err = String;
54
55    fn from_str(s: &str) -> Result<Self, Self::Err> {
56        match s.to_lowercase().as_str() {
57            "standard" | "" | "none" => Ok(MarkdownFlavor::Standard),
58            "mkdocs" => Ok(MarkdownFlavor::MkDocs),
59            "mdx" => Ok(MarkdownFlavor::MDX),
60            "quarto" | "qmd" | "rmd" | "rmarkdown" => Ok(MarkdownFlavor::Quarto),
61            // Accept but warn about unimplemented flavors
62            "gfm" | "github" => {
63                eprintln!("Warning: GFM flavor not yet implemented, using standard");
64                Ok(MarkdownFlavor::Standard)
65            }
66            "commonmark" => {
67                eprintln!("Warning: CommonMark flavor not yet implemented, using standard");
68                Ok(MarkdownFlavor::Standard)
69            }
70            _ => Err(format!("Unknown markdown flavor: {s}")),
71        }
72    }
73}
74
75impl MarkdownFlavor {
76    /// Detect flavor from file extension
77    pub fn from_extension(ext: &str) -> Self {
78        match ext.to_lowercase().as_str() {
79            "mdx" => Self::MDX,
80            "qmd" => Self::Quarto,
81            "rmd" => Self::Quarto,
82            _ => Self::Standard,
83        }
84    }
85
86    /// Detect flavor from file path
87    pub fn from_path(path: &std::path::Path) -> Self {
88        path.extension()
89            .and_then(|e| e.to_str())
90            .map(Self::from_extension)
91            .unwrap_or(Self::Standard)
92    }
93
94    /// Check if this flavor supports ESM imports/exports (MDX-specific)
95    pub fn supports_esm_blocks(self) -> bool {
96        matches!(self, Self::MDX)
97    }
98
99    /// Check if this flavor supports JSX components (MDX-specific)
100    pub fn supports_jsx(self) -> bool {
101        matches!(self, Self::MDX)
102    }
103
104    /// Check if this flavor supports auto-references (MkDocs-specific)
105    pub fn supports_auto_references(self) -> bool {
106        matches!(self, Self::MkDocs)
107    }
108
109    /// Get a human-readable name for this flavor
110    pub fn name(self) -> &'static str {
111        match self {
112            Self::Standard => "Standard",
113            Self::MkDocs => "MkDocs",
114            Self::MDX => "MDX",
115            Self::Quarto => "Quarto",
116        }
117    }
118}
119
120lazy_static! {
121    // Map common markdownlint config keys to rumdl rule names
122    static ref MARKDOWNLINT_KEY_MAP: HashMap<&'static str, &'static str> = {
123        let mut m = HashMap::new();
124        // Add mappings based on common markdownlint config names
125        // From https://github.com/DavidAnson/markdownlint/blob/main/schema/.markdownlint.jsonc
126        m.insert("ul-style", "md004");
127        m.insert("code-block-style", "md046");
128        m.insert("ul-indent", "md007"); // Example
129        m.insert("line-length", "md013"); // Example of a common one that might be top-level
130        // Add more mappings as needed based on markdownlint schema or observed usage
131        m
132    };
133}
134
135/// Normalizes configuration keys (rule names, option names) to lowercase kebab-case.
136pub fn normalize_key(key: &str) -> String {
137    // If the key looks like a rule name (e.g., MD013), uppercase it
138    if key.len() == 5 && key.to_ascii_lowercase().starts_with("md") && key[2..].chars().all(|c| c.is_ascii_digit()) {
139        key.to_ascii_uppercase()
140    } else {
141        key.replace('_', "-").to_ascii_lowercase()
142    }
143}
144
145/// Represents a rule-specific configuration
146#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, schemars::JsonSchema)]
147pub struct RuleConfig {
148    /// Configuration values for the rule
149    #[serde(flatten)]
150    #[schemars(schema_with = "arbitrary_value_schema")]
151    pub values: BTreeMap<String, toml::Value>,
152}
153
154/// Generate a JSON schema for arbitrary configuration values
155fn arbitrary_value_schema(_gen: &mut schemars::r#gen::SchemaGenerator) -> schemars::schema::Schema {
156    use schemars::schema::*;
157    Schema::Object(SchemaObject {
158        instance_type: Some(InstanceType::Object.into()),
159        object: Some(Box::new(ObjectValidation {
160            additional_properties: Some(Box::new(Schema::Bool(true))),
161            ..Default::default()
162        })),
163        ..Default::default()
164    })
165}
166
167/// Represents the complete configuration loaded from rumdl.toml
168#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, schemars::JsonSchema)]
169#[schemars(
170    description = "rumdl configuration for linting Markdown files. Rules can be configured individually using [MD###] sections with rule-specific options."
171)]
172pub struct Config {
173    /// Global configuration options
174    #[serde(default)]
175    pub global: GlobalConfig,
176
177    /// Per-file rule ignores: maps file patterns to lists of rules to ignore
178    /// Example: { "README.md": ["MD033"], "docs/**/*.md": ["MD013"] }
179    #[serde(default, rename = "per-file-ignores")]
180    pub per_file_ignores: HashMap<String, Vec<String>>,
181
182    /// Rule-specific configurations (e.g., MD013, MD007, MD044)
183    /// Each rule section can contain options specific to that rule.
184    ///
185    /// Common examples:
186    /// - MD013: line_length, code_blocks, tables, headings
187    /// - MD007: indent
188    /// - MD003: style ("atx", "atx_closed", "setext")
189    /// - MD044: names (array of proper names to check)
190    ///
191    /// See https://github.com/rvben/rumdl for full rule documentation.
192    #[serde(flatten)]
193    pub rules: BTreeMap<String, RuleConfig>,
194}
195
196impl Config {
197    /// Check if the Markdown flavor is set to MkDocs
198    pub fn is_mkdocs_flavor(&self) -> bool {
199        self.global.flavor == MarkdownFlavor::MkDocs
200    }
201
202    // Future methods for when GFM and CommonMark are implemented:
203    // pub fn is_gfm_flavor(&self) -> bool
204    // pub fn is_commonmark_flavor(&self) -> bool
205
206    /// Get the configured Markdown flavor
207    pub fn markdown_flavor(&self) -> MarkdownFlavor {
208        self.global.flavor
209    }
210
211    /// Legacy method for backwards compatibility - redirects to is_mkdocs_flavor
212    pub fn is_mkdocs_project(&self) -> bool {
213        self.is_mkdocs_flavor()
214    }
215
216    /// Get the set of rules that should be ignored for a specific file based on per-file-ignores configuration
217    /// Returns a HashSet of rule names (uppercase, e.g., "MD033") that match the given file path
218    pub fn get_ignored_rules_for_file(&self, file_path: &Path) -> HashSet<String> {
219        use globset::{Glob, GlobSetBuilder};
220
221        let mut ignored_rules = HashSet::new();
222
223        if self.per_file_ignores.is_empty() {
224            return ignored_rules;
225        }
226
227        // Build a globset for efficient matching
228        let mut builder = GlobSetBuilder::new();
229        let mut pattern_to_rules: Vec<(usize, &Vec<String>)> = Vec::new();
230
231        for (idx, (pattern, rules)) in self.per_file_ignores.iter().enumerate() {
232            if let Ok(glob) = Glob::new(pattern) {
233                builder.add(glob);
234                pattern_to_rules.push((idx, rules));
235            } else {
236                log::warn!("Invalid glob pattern in per-file-ignores: {pattern}");
237            }
238        }
239
240        let globset = match builder.build() {
241            Ok(gs) => gs,
242            Err(e) => {
243                log::error!("Failed to build globset for per-file-ignores: {e}");
244                return ignored_rules;
245            }
246        };
247
248        // Match the file path against all patterns
249        for match_idx in globset.matches(file_path) {
250            if let Some((_, rules)) = pattern_to_rules.get(match_idx) {
251                for rule in rules.iter() {
252                    // Normalize rule names to uppercase (MD033, md033 -> MD033)
253                    ignored_rules.insert(normalize_key(rule));
254                }
255            }
256        }
257
258        ignored_rules
259    }
260}
261
262/// Global configuration options
263#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, schemars::JsonSchema)]
264#[serde(default)]
265pub struct GlobalConfig {
266    /// Enabled rules
267    #[serde(default)]
268    pub enable: Vec<String>,
269
270    /// Disabled rules
271    #[serde(default)]
272    pub disable: Vec<String>,
273
274    /// Files to exclude
275    #[serde(default)]
276    pub exclude: Vec<String>,
277
278    /// Files to include
279    #[serde(default)]
280    pub include: Vec<String>,
281
282    /// Respect .gitignore files when scanning directories
283    #[serde(default = "default_respect_gitignore")]
284    pub respect_gitignore: bool,
285
286    /// Global line length setting (used by MD013 and other rules if not overridden)
287    #[serde(default = "default_line_length")]
288    pub line_length: u64,
289
290    /// Output format for linting results (e.g., "text", "json", "pylint", etc.)
291    #[serde(skip_serializing_if = "Option::is_none")]
292    pub output_format: Option<String>,
293
294    /// Rules that are allowed to be fixed when --fix is used
295    /// If specified, only these rules will be fixed
296    #[serde(default)]
297    pub fixable: Vec<String>,
298
299    /// Rules that should never be fixed, even when --fix is used
300    /// Takes precedence over fixable
301    #[serde(default)]
302    pub unfixable: Vec<String>,
303
304    /// Markdown flavor/dialect to use (mkdocs, gfm, commonmark, etc.)
305    /// When set, adjusts parsing and validation rules for that specific Markdown variant
306    #[serde(default)]
307    pub flavor: MarkdownFlavor,
308
309    /// [DEPRECATED] Whether to enforce exclude patterns for explicitly passed paths.
310    /// This option is deprecated as of v0.0.156 and has no effect.
311    /// Exclude patterns are now always respected, even for explicitly provided files.
312    /// This prevents duplication between rumdl config and tool configs like pre-commit.
313    #[serde(default)]
314    #[deprecated(since = "0.0.156", note = "Exclude patterns are now always respected")]
315    pub force_exclude: bool,
316}
317
318fn default_respect_gitignore() -> bool {
319    true
320}
321
322fn default_line_length() -> u64 {
323    80
324}
325
326// Add the Default impl
327impl Default for GlobalConfig {
328    #[allow(deprecated)]
329    fn default() -> Self {
330        Self {
331            enable: Vec::new(),
332            disable: Vec::new(),
333            exclude: Vec::new(),
334            include: Vec::new(),
335            respect_gitignore: true,
336            line_length: 80,
337            output_format: None,
338            fixable: Vec::new(),
339            unfixable: Vec::new(),
340            flavor: MarkdownFlavor::default(),
341            force_exclude: false,
342        }
343    }
344}
345
346const MARKDOWNLINT_CONFIG_FILES: &[&str] = &[
347    ".markdownlint.json",
348    ".markdownlint.jsonc",
349    ".markdownlint.yaml",
350    ".markdownlint.yml",
351    "markdownlint.json",
352    "markdownlint.jsonc",
353    "markdownlint.yaml",
354    "markdownlint.yml",
355];
356
357/// Create a default configuration file at the specified path
358pub fn create_default_config(path: &str) -> Result<(), ConfigError> {
359    // Check if file already exists
360    if Path::new(path).exists() {
361        return Err(ConfigError::FileExists { path: path.to_string() });
362    }
363
364    // Default configuration content
365    let default_config = r#"# rumdl configuration file
366
367# Global configuration options
368[global]
369# List of rules to disable (uncomment and modify as needed)
370# disable = ["MD013", "MD033"]
371
372# List of rules to enable exclusively (if provided, only these rules will run)
373# enable = ["MD001", "MD003", "MD004"]
374
375# List of file/directory patterns to include for linting (if provided, only these will be linted)
376# include = [
377#    "docs/*.md",
378#    "src/**/*.md",
379#    "README.md"
380# ]
381
382# List of file/directory patterns to exclude from linting
383exclude = [
384    # Common directories to exclude
385    ".git",
386    ".github",
387    "node_modules",
388    "vendor",
389    "dist",
390    "build",
391
392    # Specific files or patterns
393    "CHANGELOG.md",
394    "LICENSE.md",
395]
396
397# Respect .gitignore files when scanning directories (default: true)
398respect-gitignore = true
399
400# Markdown flavor/dialect (uncomment to enable)
401# Options: mkdocs, gfm, commonmark
402# flavor = "mkdocs"
403
404# Rule-specific configurations (uncomment and modify as needed)
405
406# [MD003]
407# style = "atx"  # Heading style (atx, atx_closed, setext)
408
409# [MD004]
410# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
411
412# [MD007]
413# indent = 4  # Unordered list indentation
414
415# [MD013]
416# line-length = 100  # Line length
417# code-blocks = false  # Exclude code blocks from line length check
418# tables = false  # Exclude tables from line length check
419# headings = true  # Include headings in line length check
420
421# [MD044]
422# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
423# code-blocks = false  # Check code blocks for proper names (default: false, skips code blocks)
424"#;
425
426    // Write the default configuration to the file
427    match fs::write(path, default_config) {
428        Ok(_) => Ok(()),
429        Err(err) => Err(ConfigError::IoError {
430            source: err,
431            path: path.to_string(),
432        }),
433    }
434}
435
436/// Errors that can occur when loading configuration
437#[derive(Debug, thiserror::Error)]
438pub enum ConfigError {
439    /// Failed to read the configuration file
440    #[error("Failed to read config file at {path}: {source}")]
441    IoError { source: io::Error, path: String },
442
443    /// Failed to parse the configuration content (TOML or JSON)
444    #[error("Failed to parse config: {0}")]
445    ParseError(String),
446
447    /// Configuration file already exists
448    #[error("Configuration file already exists at {path}")]
449    FileExists { path: String },
450}
451
452/// Get a rule-specific configuration value
453/// Automatically tries both the original key and normalized variants (kebab-case ↔ snake_case)
454/// for better markdownlint compatibility
455pub fn get_rule_config_value<T: serde::de::DeserializeOwned>(config: &Config, rule_name: &str, key: &str) -> Option<T> {
456    let norm_rule_name = rule_name.to_ascii_uppercase(); // Use uppercase for lookup
457
458    let rule_config = config.rules.get(&norm_rule_name)?;
459
460    // Try multiple key variants to support both underscore and kebab-case formats
461    let key_variants = [
462        key.to_string(),       // Original key as provided
463        normalize_key(key),    // Normalized key (lowercase, kebab-case)
464        key.replace('-', "_"), // Convert kebab-case to snake_case
465        key.replace('_', "-"), // Convert snake_case to kebab-case
466    ];
467
468    // Try each variant until we find a match
469    for variant in &key_variants {
470        if let Some(value) = rule_config.values.get(variant)
471            && let Ok(result) = T::deserialize(value.clone())
472        {
473            return Some(result);
474        }
475    }
476
477    None
478}
479
480/// Generate default rumdl configuration for pyproject.toml
481pub fn generate_pyproject_config() -> String {
482    let config_content = r#"
483[tool.rumdl]
484# Global configuration options
485line-length = 100
486disable = []
487exclude = [
488    # Common directories to exclude
489    ".git",
490    ".github",
491    "node_modules",
492    "vendor",
493    "dist",
494    "build",
495]
496respect-gitignore = true
497
498# Rule-specific configurations (uncomment and modify as needed)
499
500# [tool.rumdl.MD003]
501# style = "atx"  # Heading style (atx, atx_closed, setext)
502
503# [tool.rumdl.MD004]
504# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
505
506# [tool.rumdl.MD007]
507# indent = 4  # Unordered list indentation
508
509# [tool.rumdl.MD013]
510# line-length = 100  # Line length
511# code-blocks = false  # Exclude code blocks from line length check
512# tables = false  # Exclude tables from line length check
513# headings = true  # Include headings in line length check
514
515# [tool.rumdl.MD044]
516# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
517# code-blocks = false  # Check code blocks for proper names (default: false, skips code blocks)
518"#;
519
520    config_content.to_string()
521}
522
523#[cfg(test)]
524mod tests {
525    use super::*;
526    use std::fs;
527    use tempfile::tempdir;
528
529    #[test]
530    fn test_flavor_loading() {
531        let temp_dir = tempdir().unwrap();
532        let config_path = temp_dir.path().join(".rumdl.toml");
533        let config_content = r#"
534[global]
535flavor = "mkdocs"
536disable = ["MD001"]
537"#;
538        fs::write(&config_path, config_content).unwrap();
539
540        // Load the config
541        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
542        let config: Config = sourced.into();
543
544        // Check that flavor was loaded
545        assert_eq!(config.global.flavor, MarkdownFlavor::MkDocs);
546        assert!(config.is_mkdocs_flavor());
547        assert!(config.is_mkdocs_project()); // Test backwards compatibility
548        assert_eq!(config.global.disable, vec!["MD001".to_string()]);
549    }
550
551    #[test]
552    fn test_pyproject_toml_root_level_config() {
553        let temp_dir = tempdir().unwrap();
554        let config_path = temp_dir.path().join("pyproject.toml");
555
556        // Create a test pyproject.toml with root-level configuration
557        let content = r#"
558[tool.rumdl]
559line-length = 120
560disable = ["MD033"]
561enable = ["MD001", "MD004"]
562include = ["docs/*.md"]
563exclude = ["node_modules"]
564respect-gitignore = true
565        "#;
566
567        fs::write(&config_path, content).unwrap();
568
569        // Load the config with skip_auto_discovery to avoid environment config files
570        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
571        let config: Config = sourced.into(); // Convert to plain config for assertions
572
573        // Check global settings
574        assert_eq!(config.global.disable, vec!["MD033".to_string()]);
575        assert_eq!(config.global.enable, vec!["MD001".to_string(), "MD004".to_string()]);
576        // Should now contain only the configured pattern since auto-discovery is disabled
577        assert_eq!(config.global.include, vec!["docs/*.md".to_string()]);
578        assert_eq!(config.global.exclude, vec!["node_modules".to_string()]);
579        assert!(config.global.respect_gitignore);
580
581        // Check line-length was correctly added to MD013
582        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
583        assert_eq!(line_length, Some(120));
584    }
585
586    #[test]
587    fn test_pyproject_toml_snake_case_and_kebab_case() {
588        let temp_dir = tempdir().unwrap();
589        let config_path = temp_dir.path().join("pyproject.toml");
590
591        // Test with both kebab-case and snake_case variants
592        let content = r#"
593[tool.rumdl]
594line-length = 150
595respect_gitignore = true
596        "#;
597
598        fs::write(&config_path, content).unwrap();
599
600        // Load the config with skip_auto_discovery to avoid environment config files
601        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
602        let config: Config = sourced.into(); // Convert to plain config for assertions
603
604        // Check settings were correctly loaded
605        assert!(config.global.respect_gitignore);
606        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
607        assert_eq!(line_length, Some(150));
608    }
609
610    #[test]
611    fn test_md013_key_normalization_in_rumdl_toml() {
612        let temp_dir = tempdir().unwrap();
613        let config_path = temp_dir.path().join(".rumdl.toml");
614        let config_content = r#"
615[MD013]
616line_length = 111
617line-length = 222
618"#;
619        fs::write(&config_path, config_content).unwrap();
620        // Load the config with skip_auto_discovery to avoid environment config files
621        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
622        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
623        // Now we should only get the explicitly configured key
624        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
625        assert_eq!(keys, vec!["line-length"]);
626        let val = &rule_cfg.values["line-length"].value;
627        assert_eq!(val.as_integer(), Some(222));
628        // get_rule_config_value should retrieve the value for both snake_case and kebab-case
629        let config: Config = sourced.clone().into();
630        let v1 = get_rule_config_value::<usize>(&config, "MD013", "line_length");
631        let v2 = get_rule_config_value::<usize>(&config, "MD013", "line-length");
632        assert_eq!(v1, Some(222));
633        assert_eq!(v2, Some(222));
634    }
635
636    #[test]
637    fn test_md013_section_case_insensitivity() {
638        let temp_dir = tempdir().unwrap();
639        let config_path = temp_dir.path().join(".rumdl.toml");
640        let config_content = r#"
641[md013]
642line-length = 101
643
644[Md013]
645line-length = 102
646
647[MD013]
648line-length = 103
649"#;
650        fs::write(&config_path, config_content).unwrap();
651        // Load the config with skip_auto_discovery to avoid environment config files
652        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
653        let config: Config = sourced.clone().into();
654        // Only the last section should win, and be present
655        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
656        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
657        assert_eq!(keys, vec!["line-length"]);
658        let val = &rule_cfg.values["line-length"].value;
659        assert_eq!(val.as_integer(), Some(103));
660        let v = get_rule_config_value::<usize>(&config, "MD013", "line-length");
661        assert_eq!(v, Some(103));
662    }
663
664    #[test]
665    fn test_md013_key_snake_and_kebab_case() {
666        let temp_dir = tempdir().unwrap();
667        let config_path = temp_dir.path().join(".rumdl.toml");
668        let config_content = r#"
669[MD013]
670line_length = 201
671line-length = 202
672"#;
673        fs::write(&config_path, config_content).unwrap();
674        // Load the config with skip_auto_discovery to avoid environment config files
675        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
676        let config: Config = sourced.clone().into();
677        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
678        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
679        assert_eq!(keys, vec!["line-length"]);
680        let val = &rule_cfg.values["line-length"].value;
681        assert_eq!(val.as_integer(), Some(202));
682        let v1 = get_rule_config_value::<usize>(&config, "MD013", "line_length");
683        let v2 = get_rule_config_value::<usize>(&config, "MD013", "line-length");
684        assert_eq!(v1, Some(202));
685        assert_eq!(v2, Some(202));
686    }
687
688    #[test]
689    fn test_unknown_rule_section_is_ignored() {
690        let temp_dir = tempdir().unwrap();
691        let config_path = temp_dir.path().join(".rumdl.toml");
692        let config_content = r#"
693[MD999]
694foo = 1
695bar = 2
696[MD013]
697line-length = 303
698"#;
699        fs::write(&config_path, config_content).unwrap();
700        // Load the config with skip_auto_discovery to avoid environment config files
701        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
702        let config: Config = sourced.clone().into();
703        // MD999 should not be present
704        assert!(!sourced.rules.contains_key("MD999"));
705        // MD013 should be present and correct
706        let v = get_rule_config_value::<usize>(&config, "MD013", "line-length");
707        assert_eq!(v, Some(303));
708    }
709
710    #[test]
711    fn test_invalid_toml_syntax() {
712        let temp_dir = tempdir().unwrap();
713        let config_path = temp_dir.path().join(".rumdl.toml");
714
715        // Invalid TOML with unclosed string
716        let config_content = r#"
717[MD013]
718line-length = "unclosed string
719"#;
720        fs::write(&config_path, config_content).unwrap();
721
722        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
723        assert!(result.is_err());
724        match result.unwrap_err() {
725            ConfigError::ParseError(msg) => {
726                // The actual error message from toml parser might vary
727                assert!(msg.contains("expected") || msg.contains("invalid") || msg.contains("unterminated"));
728            }
729            _ => panic!("Expected ParseError"),
730        }
731    }
732
733    #[test]
734    fn test_wrong_type_for_config_value() {
735        let temp_dir = tempdir().unwrap();
736        let config_path = temp_dir.path().join(".rumdl.toml");
737
738        // line-length should be a number, not a string
739        let config_content = r#"
740[MD013]
741line-length = "not a number"
742"#;
743        fs::write(&config_path, config_content).unwrap();
744
745        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
746        let config: Config = sourced.into();
747
748        // The value should be loaded as a string, not converted
749        let rule_config = config.rules.get("MD013").unwrap();
750        let value = rule_config.values.get("line-length").unwrap();
751        assert!(matches!(value, toml::Value::String(_)));
752    }
753
754    #[test]
755    fn test_empty_config_file() {
756        let temp_dir = tempdir().unwrap();
757        let config_path = temp_dir.path().join(".rumdl.toml");
758
759        // Empty file
760        fs::write(&config_path, "").unwrap();
761
762        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
763        let config: Config = sourced.into();
764
765        // Should have default values
766        assert_eq!(config.global.line_length, 80);
767        assert!(config.global.respect_gitignore);
768        assert!(config.rules.is_empty());
769    }
770
771    #[test]
772    fn test_malformed_pyproject_toml() {
773        let temp_dir = tempdir().unwrap();
774        let config_path = temp_dir.path().join("pyproject.toml");
775
776        // Missing closing bracket
777        let content = r#"
778[tool.rumdl
779line-length = 120
780"#;
781        fs::write(&config_path, content).unwrap();
782
783        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
784        assert!(result.is_err());
785    }
786
787    #[test]
788    fn test_conflicting_config_values() {
789        let temp_dir = tempdir().unwrap();
790        let config_path = temp_dir.path().join(".rumdl.toml");
791
792        // Both enable and disable the same rule - these need to be in a global section
793        let config_content = r#"
794[global]
795enable = ["MD013"]
796disable = ["MD013"]
797"#;
798        fs::write(&config_path, config_content).unwrap();
799
800        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
801        let config: Config = sourced.into();
802
803        // Both should be present - resolution happens at runtime
804        assert!(config.global.enable.contains(&"MD013".to_string()));
805        assert!(config.global.disable.contains(&"MD013".to_string()));
806    }
807
808    #[test]
809    fn test_invalid_rule_names() {
810        let temp_dir = tempdir().unwrap();
811        let config_path = temp_dir.path().join(".rumdl.toml");
812
813        let config_content = r#"
814[global]
815enable = ["MD001", "NOT_A_RULE", "md002", "12345"]
816disable = ["MD-001", "MD_002"]
817"#;
818        fs::write(&config_path, config_content).unwrap();
819
820        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
821        let config: Config = sourced.into();
822
823        // All values should be preserved as-is
824        assert_eq!(config.global.enable.len(), 4);
825        assert_eq!(config.global.disable.len(), 2);
826    }
827
828    #[test]
829    fn test_deeply_nested_config() {
830        let temp_dir = tempdir().unwrap();
831        let config_path = temp_dir.path().join(".rumdl.toml");
832
833        // This should be ignored as we don't support nested tables within rule configs
834        let config_content = r#"
835[MD013]
836line-length = 100
837[MD013.nested]
838value = 42
839"#;
840        fs::write(&config_path, config_content).unwrap();
841
842        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
843        let config: Config = sourced.into();
844
845        let rule_config = config.rules.get("MD013").unwrap();
846        assert_eq!(
847            rule_config.values.get("line-length").unwrap(),
848            &toml::Value::Integer(100)
849        );
850        // Nested table should not be present
851        assert!(!rule_config.values.contains_key("nested"));
852    }
853
854    #[test]
855    fn test_unicode_in_config() {
856        let temp_dir = tempdir().unwrap();
857        let config_path = temp_dir.path().join(".rumdl.toml");
858
859        let config_content = r#"
860[global]
861include = ["文档/*.md", "ドキュメント/*.md"]
862exclude = ["测试/*", "🚀/*"]
863
864[MD013]
865line-length = 80
866message = "行太长了 🚨"
867"#;
868        fs::write(&config_path, config_content).unwrap();
869
870        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
871        let config: Config = sourced.into();
872
873        assert_eq!(config.global.include.len(), 2);
874        assert_eq!(config.global.exclude.len(), 2);
875        assert!(config.global.include[0].contains("文档"));
876        assert!(config.global.exclude[1].contains("🚀"));
877
878        let rule_config = config.rules.get("MD013").unwrap();
879        let message = rule_config.values.get("message").unwrap();
880        if let toml::Value::String(s) = message {
881            assert!(s.contains("行太长了"));
882            assert!(s.contains("🚨"));
883        }
884    }
885
886    #[test]
887    fn test_extremely_long_values() {
888        let temp_dir = tempdir().unwrap();
889        let config_path = temp_dir.path().join(".rumdl.toml");
890
891        let long_string = "a".repeat(10000);
892        let config_content = format!(
893            r#"
894[global]
895exclude = ["{long_string}"]
896
897[MD013]
898line-length = 999999999
899"#
900        );
901
902        fs::write(&config_path, config_content).unwrap();
903
904        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
905        let config: Config = sourced.into();
906
907        assert_eq!(config.global.exclude[0].len(), 10000);
908        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
909        assert_eq!(line_length, Some(999999999));
910    }
911
912    #[test]
913    fn test_config_with_comments() {
914        let temp_dir = tempdir().unwrap();
915        let config_path = temp_dir.path().join(".rumdl.toml");
916
917        let config_content = r#"
918[global]
919# This is a comment
920enable = ["MD001"] # Enable MD001
921# disable = ["MD002"] # This is commented out
922
923[MD013] # Line length rule
924line-length = 100 # Set to 100 characters
925# ignored = true # This setting is commented out
926"#;
927        fs::write(&config_path, config_content).unwrap();
928
929        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
930        let config: Config = sourced.into();
931
932        assert_eq!(config.global.enable, vec!["MD001"]);
933        assert!(config.global.disable.is_empty()); // Commented out
934
935        let rule_config = config.rules.get("MD013").unwrap();
936        assert_eq!(rule_config.values.len(), 1); // Only line-length
937        assert!(!rule_config.values.contains_key("ignored"));
938    }
939
940    #[test]
941    fn test_arrays_in_rule_config() {
942        let temp_dir = tempdir().unwrap();
943        let config_path = temp_dir.path().join(".rumdl.toml");
944
945        let config_content = r#"
946[MD002]
947levels = [1, 2, 3]
948tags = ["important", "critical"]
949mixed = [1, "two", true]
950"#;
951        fs::write(&config_path, config_content).unwrap();
952
953        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
954        let config: Config = sourced.into();
955
956        // Arrays should now be properly parsed
957        let rule_config = config.rules.get("MD002").expect("MD002 config should exist");
958
959        // Check that arrays are present and correctly parsed
960        assert!(rule_config.values.contains_key("levels"));
961        assert!(rule_config.values.contains_key("tags"));
962        assert!(rule_config.values.contains_key("mixed"));
963
964        // Verify array contents
965        if let Some(toml::Value::Array(levels)) = rule_config.values.get("levels") {
966            assert_eq!(levels.len(), 3);
967            assert_eq!(levels[0], toml::Value::Integer(1));
968            assert_eq!(levels[1], toml::Value::Integer(2));
969            assert_eq!(levels[2], toml::Value::Integer(3));
970        } else {
971            panic!("levels should be an array");
972        }
973
974        if let Some(toml::Value::Array(tags)) = rule_config.values.get("tags") {
975            assert_eq!(tags.len(), 2);
976            assert_eq!(tags[0], toml::Value::String("important".to_string()));
977            assert_eq!(tags[1], toml::Value::String("critical".to_string()));
978        } else {
979            panic!("tags should be an array");
980        }
981
982        if let Some(toml::Value::Array(mixed)) = rule_config.values.get("mixed") {
983            assert_eq!(mixed.len(), 3);
984            assert_eq!(mixed[0], toml::Value::Integer(1));
985            assert_eq!(mixed[1], toml::Value::String("two".to_string()));
986            assert_eq!(mixed[2], toml::Value::Boolean(true));
987        } else {
988            panic!("mixed should be an array");
989        }
990    }
991
992    #[test]
993    fn test_normalize_key_edge_cases() {
994        // Rule names
995        assert_eq!(normalize_key("MD001"), "MD001");
996        assert_eq!(normalize_key("md001"), "MD001");
997        assert_eq!(normalize_key("Md001"), "MD001");
998        assert_eq!(normalize_key("mD001"), "MD001");
999
1000        // Non-rule names
1001        assert_eq!(normalize_key("line_length"), "line-length");
1002        assert_eq!(normalize_key("line-length"), "line-length");
1003        assert_eq!(normalize_key("LINE_LENGTH"), "line-length");
1004        assert_eq!(normalize_key("respect_gitignore"), "respect-gitignore");
1005
1006        // Edge cases
1007        assert_eq!(normalize_key("MD"), "md"); // Too short to be a rule
1008        assert_eq!(normalize_key("MD00"), "md00"); // Too short
1009        assert_eq!(normalize_key("MD0001"), "md0001"); // Too long
1010        assert_eq!(normalize_key("MDabc"), "mdabc"); // Non-digit
1011        assert_eq!(normalize_key("MD00a"), "md00a"); // Partial digit
1012        assert_eq!(normalize_key(""), "");
1013        assert_eq!(normalize_key("_"), "-");
1014        assert_eq!(normalize_key("___"), "---");
1015    }
1016
1017    #[test]
1018    fn test_missing_config_file() {
1019        let temp_dir = tempdir().unwrap();
1020        let config_path = temp_dir.path().join("nonexistent.toml");
1021
1022        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
1023        assert!(result.is_err());
1024        match result.unwrap_err() {
1025            ConfigError::IoError { .. } => {}
1026            _ => panic!("Expected IoError for missing file"),
1027        }
1028    }
1029
1030    #[test]
1031    #[cfg(unix)]
1032    fn test_permission_denied_config() {
1033        use std::os::unix::fs::PermissionsExt;
1034
1035        let temp_dir = tempdir().unwrap();
1036        let config_path = temp_dir.path().join(".rumdl.toml");
1037
1038        fs::write(&config_path, "enable = [\"MD001\"]").unwrap();
1039
1040        // Remove read permissions
1041        let mut perms = fs::metadata(&config_path).unwrap().permissions();
1042        perms.set_mode(0o000);
1043        fs::set_permissions(&config_path, perms).unwrap();
1044
1045        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
1046
1047        // Restore permissions for cleanup
1048        let mut perms = fs::metadata(&config_path).unwrap().permissions();
1049        perms.set_mode(0o644);
1050        fs::set_permissions(&config_path, perms).unwrap();
1051
1052        assert!(result.is_err());
1053        match result.unwrap_err() {
1054            ConfigError::IoError { .. } => {}
1055            _ => panic!("Expected IoError for permission denied"),
1056        }
1057    }
1058
1059    #[test]
1060    fn test_circular_reference_detection() {
1061        // This test is more conceptual since TOML doesn't support circular references
1062        // But we test that deeply nested structures don't cause stack overflow
1063        let temp_dir = tempdir().unwrap();
1064        let config_path = temp_dir.path().join(".rumdl.toml");
1065
1066        let mut config_content = String::from("[MD001]\n");
1067        for i in 0..100 {
1068            config_content.push_str(&format!("key{i} = {i}\n"));
1069        }
1070
1071        fs::write(&config_path, config_content).unwrap();
1072
1073        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1074        let config: Config = sourced.into();
1075
1076        let rule_config = config.rules.get("MD001").unwrap();
1077        assert_eq!(rule_config.values.len(), 100);
1078    }
1079
1080    #[test]
1081    fn test_special_toml_values() {
1082        let temp_dir = tempdir().unwrap();
1083        let config_path = temp_dir.path().join(".rumdl.toml");
1084
1085        let config_content = r#"
1086[MD001]
1087infinity = inf
1088neg_infinity = -inf
1089not_a_number = nan
1090datetime = 1979-05-27T07:32:00Z
1091local_date = 1979-05-27
1092local_time = 07:32:00
1093"#;
1094        fs::write(&config_path, config_content).unwrap();
1095
1096        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1097        let config: Config = sourced.into();
1098
1099        // Some values might not be parsed due to parser limitations
1100        if let Some(rule_config) = config.rules.get("MD001") {
1101            // Check special float values if present
1102            if let Some(toml::Value::Float(f)) = rule_config.values.get("infinity") {
1103                assert!(f.is_infinite() && f.is_sign_positive());
1104            }
1105            if let Some(toml::Value::Float(f)) = rule_config.values.get("neg_infinity") {
1106                assert!(f.is_infinite() && f.is_sign_negative());
1107            }
1108            if let Some(toml::Value::Float(f)) = rule_config.values.get("not_a_number") {
1109                assert!(f.is_nan());
1110            }
1111
1112            // Check datetime values if present
1113            if let Some(val) = rule_config.values.get("datetime") {
1114                assert!(matches!(val, toml::Value::Datetime(_)));
1115            }
1116            // Note: local_date and local_time might not be parsed by the current implementation
1117        }
1118    }
1119
1120    #[test]
1121    fn test_default_config_passes_validation() {
1122        use crate::rules;
1123
1124        let temp_dir = tempdir().unwrap();
1125        let config_path = temp_dir.path().join(".rumdl.toml");
1126        let config_path_str = config_path.to_str().unwrap();
1127
1128        // Create the default config using the same function that `rumdl init` uses
1129        create_default_config(config_path_str).unwrap();
1130
1131        // Load it back as a SourcedConfig
1132        let sourced =
1133            SourcedConfig::load(Some(config_path_str), None).expect("Default config should load successfully");
1134
1135        // Create the rule registry
1136        let all_rules = rules::all_rules(&Config::default());
1137        let registry = RuleRegistry::from_rules(&all_rules);
1138
1139        // Validate the config
1140        let warnings = validate_config_sourced(&sourced, &registry);
1141
1142        // The default config should have no warnings
1143        if !warnings.is_empty() {
1144            for warning in &warnings {
1145                eprintln!("Config validation warning: {}", warning.message);
1146                if let Some(rule) = &warning.rule {
1147                    eprintln!("  Rule: {rule}");
1148                }
1149                if let Some(key) = &warning.key {
1150                    eprintln!("  Key: {key}");
1151                }
1152            }
1153        }
1154        assert!(
1155            warnings.is_empty(),
1156            "Default config from rumdl init should pass validation without warnings"
1157        );
1158    }
1159
1160    #[test]
1161    fn test_per_file_ignores_config_parsing() {
1162        let temp_dir = tempdir().unwrap();
1163        let config_path = temp_dir.path().join(".rumdl.toml");
1164        let config_content = r#"
1165[per-file-ignores]
1166"README.md" = ["MD033"]
1167"docs/**/*.md" = ["MD013", "MD033"]
1168"test/*.md" = ["MD041"]
1169"#;
1170        fs::write(&config_path, config_content).unwrap();
1171
1172        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1173        let config: Config = sourced.into();
1174
1175        // Verify per-file-ignores was loaded
1176        assert_eq!(config.per_file_ignores.len(), 3);
1177        assert_eq!(
1178            config.per_file_ignores.get("README.md"),
1179            Some(&vec!["MD033".to_string()])
1180        );
1181        assert_eq!(
1182            config.per_file_ignores.get("docs/**/*.md"),
1183            Some(&vec!["MD013".to_string(), "MD033".to_string()])
1184        );
1185        assert_eq!(
1186            config.per_file_ignores.get("test/*.md"),
1187            Some(&vec!["MD041".to_string()])
1188        );
1189    }
1190
1191    #[test]
1192    fn test_per_file_ignores_glob_matching() {
1193        use std::path::PathBuf;
1194
1195        let temp_dir = tempdir().unwrap();
1196        let config_path = temp_dir.path().join(".rumdl.toml");
1197        let config_content = r#"
1198[per-file-ignores]
1199"README.md" = ["MD033"]
1200"docs/**/*.md" = ["MD013"]
1201"**/test_*.md" = ["MD041"]
1202"#;
1203        fs::write(&config_path, config_content).unwrap();
1204
1205        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1206        let config: Config = sourced.into();
1207
1208        // Test exact match
1209        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1210        assert!(ignored.contains("MD033"));
1211        assert_eq!(ignored.len(), 1);
1212
1213        // Test glob pattern matching
1214        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("docs/api/overview.md"));
1215        assert!(ignored.contains("MD013"));
1216        assert_eq!(ignored.len(), 1);
1217
1218        // Test recursive glob pattern
1219        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("tests/fixtures/test_example.md"));
1220        assert!(ignored.contains("MD041"));
1221        assert_eq!(ignored.len(), 1);
1222
1223        // Test non-matching path
1224        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("other/file.md"));
1225        assert!(ignored.is_empty());
1226    }
1227
1228    #[test]
1229    fn test_per_file_ignores_pyproject_toml() {
1230        let temp_dir = tempdir().unwrap();
1231        let config_path = temp_dir.path().join("pyproject.toml");
1232        let config_content = r#"
1233[tool.rumdl]
1234[tool.rumdl.per-file-ignores]
1235"README.md" = ["MD033", "MD013"]
1236"generated/*.md" = ["MD041"]
1237"#;
1238        fs::write(&config_path, config_content).unwrap();
1239
1240        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1241        let config: Config = sourced.into();
1242
1243        // Verify per-file-ignores was loaded from pyproject.toml
1244        assert_eq!(config.per_file_ignores.len(), 2);
1245        assert_eq!(
1246            config.per_file_ignores.get("README.md"),
1247            Some(&vec!["MD033".to_string(), "MD013".to_string()])
1248        );
1249        assert_eq!(
1250            config.per_file_ignores.get("generated/*.md"),
1251            Some(&vec!["MD041".to_string()])
1252        );
1253    }
1254
1255    #[test]
1256    fn test_per_file_ignores_multiple_patterns_match() {
1257        use std::path::PathBuf;
1258
1259        let temp_dir = tempdir().unwrap();
1260        let config_path = temp_dir.path().join(".rumdl.toml");
1261        let config_content = r#"
1262[per-file-ignores]
1263"docs/**/*.md" = ["MD013"]
1264"**/api/*.md" = ["MD033"]
1265"docs/api/overview.md" = ["MD041"]
1266"#;
1267        fs::write(&config_path, config_content).unwrap();
1268
1269        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1270        let config: Config = sourced.into();
1271
1272        // File matches multiple patterns - should get union of all rules
1273        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("docs/api/overview.md"));
1274        assert_eq!(ignored.len(), 3);
1275        assert!(ignored.contains("MD013"));
1276        assert!(ignored.contains("MD033"));
1277        assert!(ignored.contains("MD041"));
1278    }
1279
1280    #[test]
1281    fn test_per_file_ignores_rule_name_normalization() {
1282        use std::path::PathBuf;
1283
1284        let temp_dir = tempdir().unwrap();
1285        let config_path = temp_dir.path().join(".rumdl.toml");
1286        let config_content = r#"
1287[per-file-ignores]
1288"README.md" = ["md033", "MD013", "Md041"]
1289"#;
1290        fs::write(&config_path, config_content).unwrap();
1291
1292        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1293        let config: Config = sourced.into();
1294
1295        // All rule names should be normalized to uppercase
1296        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1297        assert_eq!(ignored.len(), 3);
1298        assert!(ignored.contains("MD033"));
1299        assert!(ignored.contains("MD013"));
1300        assert!(ignored.contains("MD041"));
1301    }
1302
1303    #[test]
1304    fn test_per_file_ignores_invalid_glob_pattern() {
1305        use std::path::PathBuf;
1306
1307        let temp_dir = tempdir().unwrap();
1308        let config_path = temp_dir.path().join(".rumdl.toml");
1309        let config_content = r#"
1310[per-file-ignores]
1311"[invalid" = ["MD033"]
1312"valid/*.md" = ["MD013"]
1313"#;
1314        fs::write(&config_path, config_content).unwrap();
1315
1316        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1317        let config: Config = sourced.into();
1318
1319        // Invalid pattern should be skipped, valid pattern should work
1320        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("valid/test.md"));
1321        assert!(ignored.contains("MD013"));
1322
1323        // Invalid pattern should not cause issues
1324        let ignored2 = config.get_ignored_rules_for_file(&PathBuf::from("[invalid"));
1325        assert!(ignored2.is_empty());
1326    }
1327
1328    #[test]
1329    fn test_per_file_ignores_empty_section() {
1330        use std::path::PathBuf;
1331
1332        let temp_dir = tempdir().unwrap();
1333        let config_path = temp_dir.path().join(".rumdl.toml");
1334        let config_content = r#"
1335[global]
1336disable = ["MD001"]
1337
1338[per-file-ignores]
1339"#;
1340        fs::write(&config_path, config_content).unwrap();
1341
1342        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1343        let config: Config = sourced.into();
1344
1345        // Empty per-file-ignores should work fine
1346        assert_eq!(config.per_file_ignores.len(), 0);
1347        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1348        assert!(ignored.is_empty());
1349    }
1350
1351    #[test]
1352    fn test_per_file_ignores_with_underscores_in_pyproject() {
1353        let temp_dir = tempdir().unwrap();
1354        let config_path = temp_dir.path().join("pyproject.toml");
1355        let config_content = r#"
1356[tool.rumdl]
1357[tool.rumdl.per_file_ignores]
1358"README.md" = ["MD033"]
1359"#;
1360        fs::write(&config_path, config_content).unwrap();
1361
1362        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1363        let config: Config = sourced.into();
1364
1365        // Should support both per-file-ignores and per_file_ignores
1366        assert_eq!(config.per_file_ignores.len(), 1);
1367        assert_eq!(
1368            config.per_file_ignores.get("README.md"),
1369            Some(&vec!["MD033".to_string()])
1370        );
1371    }
1372
1373    #[test]
1374    fn test_generate_json_schema() {
1375        use schemars::schema_for;
1376        use std::env;
1377
1378        let schema = schema_for!(Config);
1379        let schema_json = serde_json::to_string_pretty(&schema).expect("Failed to serialize schema");
1380
1381        // Write schema to file if RUMDL_UPDATE_SCHEMA env var is set
1382        if env::var("RUMDL_UPDATE_SCHEMA").is_ok() {
1383            let schema_path = env::current_dir().unwrap().join("rumdl.schema.json");
1384            fs::write(&schema_path, &schema_json).expect("Failed to write schema file");
1385            println!("Schema written to: {}", schema_path.display());
1386        }
1387
1388        // Basic validation that schema was generated
1389        assert!(schema_json.contains("\"title\": \"Config\""));
1390        assert!(schema_json.contains("\"global\""));
1391        assert!(schema_json.contains("\"per-file-ignores\""));
1392    }
1393}
1394
1395#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1396pub enum ConfigSource {
1397    Default,
1398    RumdlToml,
1399    PyprojectToml,
1400    Cli,
1401    /// Value was loaded from a markdownlint config file (e.g. .markdownlint.json, .markdownlint.yaml)
1402    Markdownlint,
1403}
1404
1405#[derive(Debug, Clone)]
1406pub struct ConfigOverride<T> {
1407    pub value: T,
1408    pub source: ConfigSource,
1409    pub file: Option<String>,
1410    pub line: Option<usize>,
1411}
1412
1413#[derive(Debug, Clone)]
1414pub struct SourcedValue<T> {
1415    pub value: T,
1416    pub source: ConfigSource,
1417    pub overrides: Vec<ConfigOverride<T>>,
1418}
1419
1420impl<T: Clone> SourcedValue<T> {
1421    pub fn new(value: T, source: ConfigSource) -> Self {
1422        Self {
1423            value: value.clone(),
1424            source,
1425            overrides: vec![ConfigOverride {
1426                value,
1427                source,
1428                file: None,
1429                line: None,
1430            }],
1431        }
1432    }
1433
1434    /// Merges a new override into this SourcedValue based on source precedence.
1435    /// If the new source has higher or equal precedence, the value and source are updated,
1436    /// and the new override is added to the history.
1437    pub fn merge_override(
1438        &mut self,
1439        new_value: T,
1440        new_source: ConfigSource,
1441        new_file: Option<String>,
1442        new_line: Option<usize>,
1443    ) {
1444        // Helper function to get precedence, defined locally or globally
1445        fn source_precedence(src: ConfigSource) -> u8 {
1446            match src {
1447                ConfigSource::Default => 0,
1448                ConfigSource::PyprojectToml => 1,
1449                ConfigSource::Markdownlint => 2,
1450                ConfigSource::RumdlToml => 3,
1451                ConfigSource::Cli => 4,
1452            }
1453        }
1454
1455        if source_precedence(new_source) >= source_precedence(self.source) {
1456            self.value = new_value.clone();
1457            self.source = new_source;
1458            self.overrides.push(ConfigOverride {
1459                value: new_value,
1460                source: new_source,
1461                file: new_file,
1462                line: new_line,
1463            });
1464        }
1465    }
1466
1467    pub fn push_override(&mut self, value: T, source: ConfigSource, file: Option<String>, line: Option<usize>) {
1468        // This is essentially merge_override without the precedence check
1469        // We might consolidate these later, but keep separate for now during refactor
1470        self.value = value.clone();
1471        self.source = source;
1472        self.overrides.push(ConfigOverride {
1473            value,
1474            source,
1475            file,
1476            line,
1477        });
1478    }
1479}
1480
1481#[derive(Debug, Clone)]
1482pub struct SourcedGlobalConfig {
1483    pub enable: SourcedValue<Vec<String>>,
1484    pub disable: SourcedValue<Vec<String>>,
1485    pub exclude: SourcedValue<Vec<String>>,
1486    pub include: SourcedValue<Vec<String>>,
1487    pub respect_gitignore: SourcedValue<bool>,
1488    pub line_length: SourcedValue<u64>,
1489    pub output_format: Option<SourcedValue<String>>,
1490    pub fixable: SourcedValue<Vec<String>>,
1491    pub unfixable: SourcedValue<Vec<String>>,
1492    pub flavor: SourcedValue<MarkdownFlavor>,
1493    pub force_exclude: SourcedValue<bool>,
1494}
1495
1496impl Default for SourcedGlobalConfig {
1497    fn default() -> Self {
1498        SourcedGlobalConfig {
1499            enable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1500            disable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1501            exclude: SourcedValue::new(Vec::new(), ConfigSource::Default),
1502            include: SourcedValue::new(Vec::new(), ConfigSource::Default),
1503            respect_gitignore: SourcedValue::new(true, ConfigSource::Default),
1504            line_length: SourcedValue::new(80, ConfigSource::Default),
1505            output_format: None,
1506            fixable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1507            unfixable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1508            flavor: SourcedValue::new(MarkdownFlavor::default(), ConfigSource::Default),
1509            force_exclude: SourcedValue::new(false, ConfigSource::Default),
1510        }
1511    }
1512}
1513
1514#[derive(Debug, Default, Clone)]
1515pub struct SourcedRuleConfig {
1516    pub values: BTreeMap<String, SourcedValue<toml::Value>>,
1517}
1518
1519/// Represents configuration loaded from a single source file, with provenance.
1520/// Used as an intermediate step before merging into the final SourcedConfig.
1521#[derive(Debug, Clone)]
1522pub struct SourcedConfigFragment {
1523    pub global: SourcedGlobalConfig,
1524    pub per_file_ignores: SourcedValue<HashMap<String, Vec<String>>>,
1525    pub rules: BTreeMap<String, SourcedRuleConfig>,
1526    pub unknown_keys: Vec<(String, String, Option<String>)>, // (section, key, file_path)
1527                                                             // Note: loaded_files is tracked globally in SourcedConfig.
1528}
1529
1530impl Default for SourcedConfigFragment {
1531    fn default() -> Self {
1532        Self {
1533            global: SourcedGlobalConfig::default(),
1534            per_file_ignores: SourcedValue::new(HashMap::new(), ConfigSource::Default),
1535            rules: BTreeMap::new(),
1536            unknown_keys: Vec::new(),
1537        }
1538    }
1539}
1540
1541#[derive(Debug, Clone)]
1542pub struct SourcedConfig {
1543    pub global: SourcedGlobalConfig,
1544    pub per_file_ignores: SourcedValue<HashMap<String, Vec<String>>>,
1545    pub rules: BTreeMap<String, SourcedRuleConfig>,
1546    pub loaded_files: Vec<String>,
1547    pub unknown_keys: Vec<(String, String, Option<String>)>, // (section, key, file_path)
1548}
1549
1550impl Default for SourcedConfig {
1551    fn default() -> Self {
1552        Self {
1553            global: SourcedGlobalConfig::default(),
1554            per_file_ignores: SourcedValue::new(HashMap::new(), ConfigSource::Default),
1555            rules: BTreeMap::new(),
1556            loaded_files: Vec::new(),
1557            unknown_keys: Vec::new(),
1558        }
1559    }
1560}
1561
1562impl SourcedConfig {
1563    /// Merges another SourcedConfigFragment into this SourcedConfig.
1564    /// Uses source precedence to determine which values take effect.
1565    fn merge(&mut self, fragment: SourcedConfigFragment) {
1566        // Merge global config
1567        self.global.enable.merge_override(
1568            fragment.global.enable.value,
1569            fragment.global.enable.source,
1570            fragment.global.enable.overrides.first().and_then(|o| o.file.clone()),
1571            fragment.global.enable.overrides.first().and_then(|o| o.line),
1572        );
1573        self.global.disable.merge_override(
1574            fragment.global.disable.value,
1575            fragment.global.disable.source,
1576            fragment.global.disable.overrides.first().and_then(|o| o.file.clone()),
1577            fragment.global.disable.overrides.first().and_then(|o| o.line),
1578        );
1579        self.global.include.merge_override(
1580            fragment.global.include.value,
1581            fragment.global.include.source,
1582            fragment.global.include.overrides.first().and_then(|o| o.file.clone()),
1583            fragment.global.include.overrides.first().and_then(|o| o.line),
1584        );
1585        self.global.exclude.merge_override(
1586            fragment.global.exclude.value,
1587            fragment.global.exclude.source,
1588            fragment.global.exclude.overrides.first().and_then(|o| o.file.clone()),
1589            fragment.global.exclude.overrides.first().and_then(|o| o.line),
1590        );
1591        self.global.respect_gitignore.merge_override(
1592            fragment.global.respect_gitignore.value,
1593            fragment.global.respect_gitignore.source,
1594            fragment
1595                .global
1596                .respect_gitignore
1597                .overrides
1598                .first()
1599                .and_then(|o| o.file.clone()),
1600            fragment.global.respect_gitignore.overrides.first().and_then(|o| o.line),
1601        );
1602        self.global.line_length.merge_override(
1603            fragment.global.line_length.value,
1604            fragment.global.line_length.source,
1605            fragment
1606                .global
1607                .line_length
1608                .overrides
1609                .first()
1610                .and_then(|o| o.file.clone()),
1611            fragment.global.line_length.overrides.first().and_then(|o| o.line),
1612        );
1613        self.global.fixable.merge_override(
1614            fragment.global.fixable.value,
1615            fragment.global.fixable.source,
1616            fragment.global.fixable.overrides.first().and_then(|o| o.file.clone()),
1617            fragment.global.fixable.overrides.first().and_then(|o| o.line),
1618        );
1619        self.global.unfixable.merge_override(
1620            fragment.global.unfixable.value,
1621            fragment.global.unfixable.source,
1622            fragment.global.unfixable.overrides.first().and_then(|o| o.file.clone()),
1623            fragment.global.unfixable.overrides.first().and_then(|o| o.line),
1624        );
1625
1626        // Merge flavor
1627        self.global.flavor.merge_override(
1628            fragment.global.flavor.value,
1629            fragment.global.flavor.source,
1630            fragment.global.flavor.overrides.first().and_then(|o| o.file.clone()),
1631            fragment.global.flavor.overrides.first().and_then(|o| o.line),
1632        );
1633
1634        // Merge force_exclude
1635        self.global.force_exclude.merge_override(
1636            fragment.global.force_exclude.value,
1637            fragment.global.force_exclude.source,
1638            fragment
1639                .global
1640                .force_exclude
1641                .overrides
1642                .first()
1643                .and_then(|o| o.file.clone()),
1644            fragment.global.force_exclude.overrides.first().and_then(|o| o.line),
1645        );
1646
1647        // Merge output_format if present
1648        if let Some(output_format_fragment) = fragment.global.output_format {
1649            if let Some(ref mut output_format) = self.global.output_format {
1650                output_format.merge_override(
1651                    output_format_fragment.value,
1652                    output_format_fragment.source,
1653                    output_format_fragment.overrides.first().and_then(|o| o.file.clone()),
1654                    output_format_fragment.overrides.first().and_then(|o| o.line),
1655                );
1656            } else {
1657                self.global.output_format = Some(output_format_fragment);
1658            }
1659        }
1660
1661        // Merge per_file_ignores
1662        self.per_file_ignores.merge_override(
1663            fragment.per_file_ignores.value,
1664            fragment.per_file_ignores.source,
1665            fragment.per_file_ignores.overrides.first().and_then(|o| o.file.clone()),
1666            fragment.per_file_ignores.overrides.first().and_then(|o| o.line),
1667        );
1668
1669        // Merge rule configs
1670        for (rule_name, rule_fragment) in fragment.rules {
1671            let norm_rule_name = rule_name.to_ascii_uppercase(); // Normalize to uppercase for case-insensitivity
1672            let rule_entry = self.rules.entry(norm_rule_name).or_default();
1673            for (key, sourced_value_fragment) in rule_fragment.values {
1674                let sv_entry = rule_entry
1675                    .values
1676                    .entry(key.clone())
1677                    .or_insert_with(|| SourcedValue::new(sourced_value_fragment.value.clone(), ConfigSource::Default));
1678                let file_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.file.clone());
1679                let line_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.line);
1680                sv_entry.merge_override(
1681                    sourced_value_fragment.value,  // Use the value from the fragment
1682                    sourced_value_fragment.source, // Use the source from the fragment
1683                    file_from_fragment,            // Pass the file path from the fragment override
1684                    line_from_fragment,            // Pass the line number from the fragment override
1685                );
1686            }
1687        }
1688
1689        // Merge unknown_keys from fragment
1690        for (section, key, file_path) in fragment.unknown_keys {
1691            // Deduplicate: only add if not already present
1692            if !self.unknown_keys.iter().any(|(s, k, _)| s == &section && k == &key) {
1693                self.unknown_keys.push((section, key, file_path));
1694            }
1695        }
1696    }
1697
1698    /// Load and merge configurations from files and CLI overrides.
1699    pub fn load(config_path: Option<&str>, cli_overrides: Option<&SourcedGlobalConfig>) -> Result<Self, ConfigError> {
1700        Self::load_with_discovery(config_path, cli_overrides, false)
1701    }
1702
1703    /// Discover configuration file by traversing up the directory tree.
1704    /// Returns the first configuration file found.
1705    fn discover_config_upward() -> Option<std::path::PathBuf> {
1706        use std::env;
1707
1708        const CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", "pyproject.toml"];
1709        const MAX_DEPTH: usize = 100; // Prevent infinite traversal
1710
1711        let start_dir = match env::current_dir() {
1712            Ok(dir) => dir,
1713            Err(e) => {
1714                log::debug!("[rumdl-config] Failed to get current directory: {e}");
1715                return None;
1716            }
1717        };
1718
1719        let mut current_dir = start_dir.clone();
1720        let mut depth = 0;
1721
1722        loop {
1723            if depth >= MAX_DEPTH {
1724                log::debug!("[rumdl-config] Maximum traversal depth reached");
1725                break;
1726            }
1727
1728            log::debug!("[rumdl-config] Searching for config in: {}", current_dir.display());
1729
1730            // Check for config files in order of precedence
1731            for config_name in CONFIG_FILES {
1732                let config_path = current_dir.join(config_name);
1733
1734                if config_path.exists() {
1735                    // For pyproject.toml, verify it contains [tool.rumdl] section
1736                    if *config_name == "pyproject.toml" {
1737                        if let Ok(content) = std::fs::read_to_string(&config_path) {
1738                            if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
1739                                log::debug!("[rumdl-config] Found config file: {}", config_path.display());
1740                                return Some(config_path);
1741                            }
1742                            log::debug!("[rumdl-config] Found pyproject.toml but no [tool.rumdl] section");
1743                            continue;
1744                        }
1745                    } else {
1746                        log::debug!("[rumdl-config] Found config file: {}", config_path.display());
1747                        return Some(config_path);
1748                    }
1749                }
1750            }
1751
1752            // Check for .git directory (stop boundary)
1753            if current_dir.join(".git").exists() {
1754                log::debug!("[rumdl-config] Stopping at .git directory");
1755                break;
1756            }
1757
1758            // Move to parent directory
1759            match current_dir.parent() {
1760                Some(parent) => {
1761                    current_dir = parent.to_owned();
1762                    depth += 1;
1763                }
1764                None => {
1765                    log::debug!("[rumdl-config] Reached filesystem root");
1766                    break;
1767                }
1768            }
1769        }
1770
1771        None
1772    }
1773
1774    /// Internal implementation that accepts config directory for testing
1775    fn user_configuration_path_impl(config_dir: &Path) -> Option<std::path::PathBuf> {
1776        let config_dir = config_dir.join("rumdl");
1777
1778        // Check for config files in precedence order (same as project discovery)
1779        const USER_CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", "pyproject.toml"];
1780
1781        log::debug!(
1782            "[rumdl-config] Checking for user configuration in: {}",
1783            config_dir.display()
1784        );
1785
1786        for filename in USER_CONFIG_FILES {
1787            let config_path = config_dir.join(filename);
1788
1789            if config_path.exists() {
1790                // For pyproject.toml, verify it contains [tool.rumdl] section
1791                if *filename == "pyproject.toml" {
1792                    if let Ok(content) = std::fs::read_to_string(&config_path) {
1793                        if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
1794                            log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
1795                            return Some(config_path);
1796                        }
1797                        log::debug!("[rumdl-config] Found user pyproject.toml but no [tool.rumdl] section");
1798                        continue;
1799                    }
1800                } else {
1801                    log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
1802                    return Some(config_path);
1803                }
1804            }
1805        }
1806
1807        log::debug!(
1808            "[rumdl-config] No user configuration found in: {}",
1809            config_dir.display()
1810        );
1811        None
1812    }
1813
1814    /// Discover user-level configuration file from platform-specific config directory.
1815    /// Returns the first configuration file found in the user config directory.
1816    fn user_configuration_path() -> Option<std::path::PathBuf> {
1817        use etcetera::{BaseStrategy, choose_base_strategy};
1818
1819        match choose_base_strategy() {
1820            Ok(strategy) => {
1821                let config_dir = strategy.config_dir();
1822                Self::user_configuration_path_impl(&config_dir)
1823            }
1824            Err(e) => {
1825                log::debug!("[rumdl-config] Failed to determine user config directory: {e}");
1826                None
1827            }
1828        }
1829    }
1830
1831    /// Internal implementation that accepts user config directory for testing
1832    #[doc(hidden)]
1833    pub fn load_with_discovery_impl(
1834        config_path: Option<&str>,
1835        cli_overrides: Option<&SourcedGlobalConfig>,
1836        skip_auto_discovery: bool,
1837        user_config_dir: Option<&Path>,
1838    ) -> Result<Self, ConfigError> {
1839        use std::env;
1840        log::debug!("[rumdl-config] Current working directory: {:?}", env::current_dir());
1841        if config_path.is_none() {
1842            if skip_auto_discovery {
1843                log::debug!("[rumdl-config] Skipping auto-discovery due to --no-config flag");
1844            } else {
1845                log::debug!("[rumdl-config] No explicit config_path provided, will search default locations");
1846            }
1847        } else {
1848            log::debug!("[rumdl-config] Explicit config_path provided: {config_path:?}");
1849        }
1850        let mut sourced_config = SourcedConfig::default();
1851
1852        // 1. Load explicit config path if provided
1853        if let Some(path) = config_path {
1854            let path_obj = Path::new(path);
1855            let filename = path_obj.file_name().and_then(|name| name.to_str()).unwrap_or("");
1856            log::debug!("[rumdl-config] Trying to load config file: {filename}");
1857            let path_str = path.to_string();
1858
1859            // Known markdownlint config files
1860            const MARKDOWNLINT_FILENAMES: &[&str] = &[".markdownlint.json", ".markdownlint.yaml", ".markdownlint.yml"];
1861
1862            if filename == "pyproject.toml" || filename == ".rumdl.toml" || filename == "rumdl.toml" {
1863                let content = std::fs::read_to_string(path).map_err(|e| ConfigError::IoError {
1864                    source: e,
1865                    path: path_str.clone(),
1866                })?;
1867                if filename == "pyproject.toml" {
1868                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
1869                        sourced_config.merge(fragment);
1870                        sourced_config.loaded_files.push(path_str.clone());
1871                    }
1872                } else {
1873                    let fragment = parse_rumdl_toml(&content, &path_str)?;
1874                    sourced_config.merge(fragment);
1875                    sourced_config.loaded_files.push(path_str.clone());
1876                }
1877            } else if MARKDOWNLINT_FILENAMES.contains(&filename)
1878                || path_str.ends_with(".json")
1879                || path_str.ends_with(".jsonc")
1880                || path_str.ends_with(".yaml")
1881                || path_str.ends_with(".yml")
1882            {
1883                // Parse as markdownlint config (JSON/YAML)
1884                let fragment = load_from_markdownlint(&path_str)?;
1885                sourced_config.merge(fragment);
1886                sourced_config.loaded_files.push(path_str.clone());
1887                // markdownlint is fallback only
1888            } else {
1889                // Try TOML only
1890                let content = std::fs::read_to_string(path).map_err(|e| ConfigError::IoError {
1891                    source: e,
1892                    path: path_str.clone(),
1893                })?;
1894                let fragment = parse_rumdl_toml(&content, &path_str)?;
1895                sourced_config.merge(fragment);
1896                sourced_config.loaded_files.push(path_str.clone());
1897            }
1898        }
1899
1900        // Only perform auto-discovery if not skipped AND no explicit config path provided
1901        if !skip_auto_discovery && config_path.is_none() {
1902            // Step 1: Load user configuration first (as a base)
1903            let user_config_path = if let Some(dir) = user_config_dir {
1904                Self::user_configuration_path_impl(dir)
1905            } else {
1906                Self::user_configuration_path()
1907            };
1908
1909            if let Some(user_config_path) = user_config_path {
1910                let path_str = user_config_path.display().to_string();
1911                let filename = user_config_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1912
1913                log::debug!("[rumdl-config] Loading user configuration file: {path_str}");
1914
1915                if filename == "pyproject.toml" {
1916                    let content = std::fs::read_to_string(&user_config_path).map_err(|e| ConfigError::IoError {
1917                        source: e,
1918                        path: path_str.clone(),
1919                    })?;
1920                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
1921                        sourced_config.merge(fragment);
1922                        sourced_config.loaded_files.push(path_str);
1923                    }
1924                } else {
1925                    let content = std::fs::read_to_string(&user_config_path).map_err(|e| ConfigError::IoError {
1926                        source: e,
1927                        path: path_str.clone(),
1928                    })?;
1929                    let fragment = parse_rumdl_toml(&content, &path_str)?;
1930                    sourced_config.merge(fragment);
1931                    sourced_config.loaded_files.push(path_str);
1932                }
1933            } else {
1934                log::debug!("[rumdl-config] No user configuration file found");
1935            }
1936
1937            // Step 2: Look for project configuration files (override user config)
1938            if let Some(config_file) = Self::discover_config_upward() {
1939                let path_str = config_file.display().to_string();
1940                let filename = config_file.file_name().and_then(|n| n.to_str()).unwrap_or("");
1941
1942                log::debug!("[rumdl-config] Loading discovered config file: {path_str}");
1943
1944                if filename == "pyproject.toml" {
1945                    let content = std::fs::read_to_string(&config_file).map_err(|e| ConfigError::IoError {
1946                        source: e,
1947                        path: path_str.clone(),
1948                    })?;
1949                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
1950                        sourced_config.merge(fragment);
1951                        sourced_config.loaded_files.push(path_str);
1952                    }
1953                } else if filename == ".rumdl.toml" || filename == "rumdl.toml" {
1954                    let content = std::fs::read_to_string(&config_file).map_err(|e| ConfigError::IoError {
1955                        source: e,
1956                        path: path_str.clone(),
1957                    })?;
1958                    let fragment = parse_rumdl_toml(&content, &path_str)?;
1959                    sourced_config.merge(fragment);
1960                    sourced_config.loaded_files.push(path_str);
1961                }
1962            } else {
1963                log::debug!("[rumdl-config] No configuration file found via upward traversal");
1964
1965                // Step 3: If no project config found, fallback to markdownlint config in current directory
1966                let mut found_markdownlint = false;
1967                for filename in MARKDOWNLINT_CONFIG_FILES {
1968                    if std::path::Path::new(filename).exists() {
1969                        match load_from_markdownlint(filename) {
1970                            Ok(fragment) => {
1971                                sourced_config.merge(fragment);
1972                                sourced_config.loaded_files.push(filename.to_string());
1973                                found_markdownlint = true;
1974                                break; // Load only the first one found
1975                            }
1976                            Err(_e) => {
1977                                // Log error but continue (it's just a fallback)
1978                            }
1979                        }
1980                    }
1981                }
1982
1983                if !found_markdownlint {
1984                    log::debug!("[rumdl-config] No markdownlint configuration file found");
1985                }
1986            }
1987        }
1988
1989        // 5. Apply CLI overrides (highest precedence)
1990        if let Some(cli) = cli_overrides {
1991            sourced_config
1992                .global
1993                .enable
1994                .merge_override(cli.enable.value.clone(), ConfigSource::Cli, None, None);
1995            sourced_config
1996                .global
1997                .disable
1998                .merge_override(cli.disable.value.clone(), ConfigSource::Cli, None, None);
1999            sourced_config
2000                .global
2001                .exclude
2002                .merge_override(cli.exclude.value.clone(), ConfigSource::Cli, None, None);
2003            sourced_config
2004                .global
2005                .include
2006                .merge_override(cli.include.value.clone(), ConfigSource::Cli, None, None);
2007            sourced_config.global.respect_gitignore.merge_override(
2008                cli.respect_gitignore.value,
2009                ConfigSource::Cli,
2010                None,
2011                None,
2012            );
2013            sourced_config
2014                .global
2015                .fixable
2016                .merge_override(cli.fixable.value.clone(), ConfigSource::Cli, None, None);
2017            sourced_config
2018                .global
2019                .unfixable
2020                .merge_override(cli.unfixable.value.clone(), ConfigSource::Cli, None, None);
2021            // No rule-specific CLI overrides implemented yet
2022        }
2023
2024        // Unknown keys are now collected during parsing and validated via validate_config_sourced()
2025
2026        Ok(sourced_config)
2027    }
2028
2029    /// Load and merge configurations from files and CLI overrides.
2030    /// If skip_auto_discovery is true, only explicit config paths are loaded.
2031    pub fn load_with_discovery(
2032        config_path: Option<&str>,
2033        cli_overrides: Option<&SourcedGlobalConfig>,
2034        skip_auto_discovery: bool,
2035    ) -> Result<Self, ConfigError> {
2036        Self::load_with_discovery_impl(config_path, cli_overrides, skip_auto_discovery, None)
2037    }
2038}
2039
2040impl From<SourcedConfig> for Config {
2041    fn from(sourced: SourcedConfig) -> Self {
2042        let mut rules = BTreeMap::new();
2043        for (rule_name, sourced_rule_cfg) in sourced.rules {
2044            // Normalize rule name to uppercase for case-insensitive lookup
2045            let normalized_rule_name = rule_name.to_ascii_uppercase();
2046            let mut values = BTreeMap::new();
2047            for (key, sourced_val) in sourced_rule_cfg.values {
2048                values.insert(key, sourced_val.value);
2049            }
2050            rules.insert(normalized_rule_name, RuleConfig { values });
2051        }
2052        #[allow(deprecated)]
2053        let global = GlobalConfig {
2054            enable: sourced.global.enable.value,
2055            disable: sourced.global.disable.value,
2056            exclude: sourced.global.exclude.value,
2057            include: sourced.global.include.value,
2058            respect_gitignore: sourced.global.respect_gitignore.value,
2059            line_length: sourced.global.line_length.value,
2060            output_format: sourced.global.output_format.as_ref().map(|v| v.value.clone()),
2061            fixable: sourced.global.fixable.value,
2062            unfixable: sourced.global.unfixable.value,
2063            flavor: sourced.global.flavor.value,
2064            force_exclude: sourced.global.force_exclude.value,
2065        };
2066        Config {
2067            global,
2068            per_file_ignores: sourced.per_file_ignores.value,
2069            rules,
2070        }
2071    }
2072}
2073
2074/// Registry of all known rules and their config schemas
2075pub struct RuleRegistry {
2076    /// Map of rule name (e.g. "MD013") to set of valid config keys and their TOML value types
2077    pub rule_schemas: std::collections::BTreeMap<String, toml::map::Map<String, toml::Value>>,
2078    /// Map of rule name to config key aliases
2079    pub rule_aliases: std::collections::BTreeMap<String, std::collections::HashMap<String, String>>,
2080}
2081
2082impl RuleRegistry {
2083    /// Build a registry from a list of rules
2084    pub fn from_rules(rules: &[Box<dyn Rule>]) -> Self {
2085        let mut rule_schemas = std::collections::BTreeMap::new();
2086        let mut rule_aliases = std::collections::BTreeMap::new();
2087
2088        for rule in rules {
2089            let norm_name = if let Some((name, toml::Value::Table(table))) = rule.default_config_section() {
2090                let norm_name = normalize_key(&name); // Normalize the name from default_config_section
2091                rule_schemas.insert(norm_name.clone(), table);
2092                norm_name
2093            } else {
2094                let norm_name = normalize_key(rule.name()); // Normalize the name from rule.name()
2095                rule_schemas.insert(norm_name.clone(), toml::map::Map::new());
2096                norm_name
2097            };
2098
2099            // Store aliases if the rule provides them
2100            if let Some(aliases) = rule.config_aliases() {
2101                rule_aliases.insert(norm_name, aliases);
2102            }
2103        }
2104
2105        RuleRegistry {
2106            rule_schemas,
2107            rule_aliases,
2108        }
2109    }
2110
2111    /// Get all known rule names
2112    pub fn rule_names(&self) -> std::collections::BTreeSet<String> {
2113        self.rule_schemas.keys().cloned().collect()
2114    }
2115
2116    /// Get the valid configuration keys for a rule, including both original and normalized variants
2117    pub fn config_keys_for(&self, rule: &str) -> Option<std::collections::BTreeSet<String>> {
2118        self.rule_schemas.get(rule).map(|schema| {
2119            let mut all_keys = std::collections::BTreeSet::new();
2120
2121            // Add original keys from schema
2122            for key in schema.keys() {
2123                all_keys.insert(key.clone());
2124            }
2125
2126            // Add normalized variants for markdownlint compatibility
2127            for key in schema.keys() {
2128                // Add kebab-case variant
2129                all_keys.insert(key.replace('_', "-"));
2130                // Add snake_case variant
2131                all_keys.insert(key.replace('-', "_"));
2132                // Add normalized variant
2133                all_keys.insert(normalize_key(key));
2134            }
2135
2136            // Add any aliases defined by the rule
2137            if let Some(aliases) = self.rule_aliases.get(rule) {
2138                for alias_key in aliases.keys() {
2139                    all_keys.insert(alias_key.clone());
2140                    // Also add normalized variants of the alias
2141                    all_keys.insert(alias_key.replace('_', "-"));
2142                    all_keys.insert(alias_key.replace('-', "_"));
2143                    all_keys.insert(normalize_key(alias_key));
2144                }
2145            }
2146
2147            all_keys
2148        })
2149    }
2150
2151    /// Get the expected value type for a rule's configuration key, trying variants
2152    pub fn expected_value_for(&self, rule: &str, key: &str) -> Option<&toml::Value> {
2153        if let Some(schema) = self.rule_schemas.get(rule) {
2154            // Check if this key is an alias
2155            if let Some(aliases) = self.rule_aliases.get(rule)
2156                && let Some(canonical_key) = aliases.get(key)
2157            {
2158                // Use the canonical key for schema lookup
2159                if let Some(value) = schema.get(canonical_key) {
2160                    return Some(value);
2161                }
2162            }
2163
2164            // Try the original key
2165            if let Some(value) = schema.get(key) {
2166                return Some(value);
2167            }
2168
2169            // Try key variants
2170            let key_variants = [
2171                key.replace('-', "_"), // Convert kebab-case to snake_case
2172                key.replace('_', "-"), // Convert snake_case to kebab-case
2173                normalize_key(key),    // Normalized key (lowercase, kebab-case)
2174            ];
2175
2176            for variant in &key_variants {
2177                if let Some(value) = schema.get(variant) {
2178                    return Some(value);
2179                }
2180            }
2181        }
2182        None
2183    }
2184}
2185
2186/// Represents a config validation warning or error
2187#[derive(Debug, Clone)]
2188pub struct ConfigValidationWarning {
2189    pub message: String,
2190    pub rule: Option<String>,
2191    pub key: Option<String>,
2192}
2193
2194/// Validate a loaded config against the rule registry, using SourcedConfig for unknown key tracking
2195pub fn validate_config_sourced(sourced: &SourcedConfig, registry: &RuleRegistry) -> Vec<ConfigValidationWarning> {
2196    let mut warnings = Vec::new();
2197    let known_rules = registry.rule_names();
2198    // 1. Unknown rules
2199    for rule in sourced.rules.keys() {
2200        if !known_rules.contains(rule) {
2201            warnings.push(ConfigValidationWarning {
2202                message: format!("Unknown rule in config: {rule}"),
2203                rule: Some(rule.clone()),
2204                key: None,
2205            });
2206        }
2207    }
2208    // 2. Unknown options and type mismatches
2209    for (rule, rule_cfg) in &sourced.rules {
2210        if let Some(valid_keys) = registry.config_keys_for(rule) {
2211            for key in rule_cfg.values.keys() {
2212                if !valid_keys.contains(key) {
2213                    let valid_keys_vec: Vec<String> = valid_keys.iter().cloned().collect();
2214                    let message = if let Some(suggestion) = suggest_similar_key(key, &valid_keys_vec) {
2215                        format!("Unknown option for rule {rule}: {key} (did you mean: {suggestion}?)")
2216                    } else {
2217                        format!("Unknown option for rule {rule}: {key}")
2218                    };
2219                    warnings.push(ConfigValidationWarning {
2220                        message,
2221                        rule: Some(rule.clone()),
2222                        key: Some(key.clone()),
2223                    });
2224                } else {
2225                    // Type check: compare type of value to type of default
2226                    if let Some(expected) = registry.expected_value_for(rule, key) {
2227                        let actual = &rule_cfg.values[key].value;
2228                        if !toml_value_type_matches(expected, actual) {
2229                            warnings.push(ConfigValidationWarning {
2230                                message: format!(
2231                                    "Type mismatch for {}.{}: expected {}, got {}",
2232                                    rule,
2233                                    key,
2234                                    toml_type_name(expected),
2235                                    toml_type_name(actual)
2236                                ),
2237                                rule: Some(rule.clone()),
2238                                key: Some(key.clone()),
2239                            });
2240                        }
2241                    }
2242                }
2243            }
2244        }
2245    }
2246    // 3. Unknown global options (from unknown_keys)
2247    let known_global_keys = vec![
2248        "enable".to_string(),
2249        "disable".to_string(),
2250        "include".to_string(),
2251        "exclude".to_string(),
2252        "respect-gitignore".to_string(),
2253        "line-length".to_string(),
2254        "fixable".to_string(),
2255        "unfixable".to_string(),
2256        "flavor".to_string(),
2257        "force-exclude".to_string(),
2258        "output-format".to_string(),
2259    ];
2260
2261    for (section, key, file_path) in &sourced.unknown_keys {
2262        if section.contains("[global]") || section.contains("[tool.rumdl]") {
2263            let message = if let Some(suggestion) = suggest_similar_key(key, &known_global_keys) {
2264                if let Some(path) = file_path {
2265                    format!("Unknown global option in {path}: {key} (did you mean: {suggestion}?)")
2266                } else {
2267                    format!("Unknown global option: {key} (did you mean: {suggestion}?)")
2268                }
2269            } else if let Some(path) = file_path {
2270                format!("Unknown global option in {path}: {key}")
2271            } else {
2272                format!("Unknown global option: {key}")
2273            };
2274            warnings.push(ConfigValidationWarning {
2275                message,
2276                rule: None,
2277                key: Some(key.clone()),
2278            });
2279        } else if !key.is_empty() {
2280            // This is an unknown rule section (key is empty means it's a section header)
2281            // No suggestions for rule names - just warn
2282            continue;
2283        } else {
2284            // Unknown rule section
2285            let message = if let Some(path) = file_path {
2286                format!(
2287                    "Unknown rule in {path}: {}",
2288                    section.trim_matches(|c| c == '[' || c == ']')
2289                )
2290            } else {
2291                format!(
2292                    "Unknown rule in config: {}",
2293                    section.trim_matches(|c| c == '[' || c == ']')
2294                )
2295            };
2296            warnings.push(ConfigValidationWarning {
2297                message,
2298                rule: None,
2299                key: None,
2300            });
2301        }
2302    }
2303    warnings
2304}
2305
2306fn toml_type_name(val: &toml::Value) -> &'static str {
2307    match val {
2308        toml::Value::String(_) => "string",
2309        toml::Value::Integer(_) => "integer",
2310        toml::Value::Float(_) => "float",
2311        toml::Value::Boolean(_) => "boolean",
2312        toml::Value::Array(_) => "array",
2313        toml::Value::Table(_) => "table",
2314        toml::Value::Datetime(_) => "datetime",
2315    }
2316}
2317
2318/// Calculate Levenshtein distance between two strings (simple implementation)
2319fn levenshtein_distance(s1: &str, s2: &str) -> usize {
2320    let len1 = s1.len();
2321    let len2 = s2.len();
2322
2323    if len1 == 0 {
2324        return len2;
2325    }
2326    if len2 == 0 {
2327        return len1;
2328    }
2329
2330    let s1_chars: Vec<char> = s1.chars().collect();
2331    let s2_chars: Vec<char> = s2.chars().collect();
2332
2333    let mut prev_row: Vec<usize> = (0..=len2).collect();
2334    let mut curr_row = vec![0; len2 + 1];
2335
2336    for i in 1..=len1 {
2337        curr_row[0] = i;
2338        for j in 1..=len2 {
2339            let cost = if s1_chars[i - 1] == s2_chars[j - 1] { 0 } else { 1 };
2340            curr_row[j] = (prev_row[j] + 1)          // deletion
2341                .min(curr_row[j - 1] + 1)            // insertion
2342                .min(prev_row[j - 1] + cost); // substitution
2343        }
2344        std::mem::swap(&mut prev_row, &mut curr_row);
2345    }
2346
2347    prev_row[len2]
2348}
2349
2350/// Suggest a similar key from a list of valid keys using fuzzy matching
2351fn suggest_similar_key(unknown: &str, valid_keys: &[String]) -> Option<String> {
2352    let unknown_lower = unknown.to_lowercase();
2353    let max_distance = 2.max(unknown.len() / 3); // Allow up to 2 edits or 30% of string length
2354
2355    let mut best_match: Option<(String, usize)> = None;
2356
2357    for valid in valid_keys {
2358        let valid_lower = valid.to_lowercase();
2359        let distance = levenshtein_distance(&unknown_lower, &valid_lower);
2360
2361        if distance <= max_distance {
2362            if let Some((_, best_dist)) = &best_match {
2363                if distance < *best_dist {
2364                    best_match = Some((valid.clone(), distance));
2365                }
2366            } else {
2367                best_match = Some((valid.clone(), distance));
2368            }
2369        }
2370    }
2371
2372    best_match.map(|(key, _)| key)
2373}
2374
2375fn toml_value_type_matches(expected: &toml::Value, actual: &toml::Value) -> bool {
2376    use toml::Value::*;
2377    match (expected, actual) {
2378        (String(_), String(_)) => true,
2379        (Integer(_), Integer(_)) => true,
2380        (Float(_), Float(_)) => true,
2381        (Boolean(_), Boolean(_)) => true,
2382        (Array(_), Array(_)) => true,
2383        (Table(_), Table(_)) => true,
2384        (Datetime(_), Datetime(_)) => true,
2385        // Allow integer for float
2386        (Float(_), Integer(_)) => true,
2387        _ => false,
2388    }
2389}
2390
2391/// Parses pyproject.toml content and extracts the [tool.rumdl] section if present.
2392fn parse_pyproject_toml(content: &str, path: &str) -> Result<Option<SourcedConfigFragment>, ConfigError> {
2393    let doc: toml::Value =
2394        toml::from_str(content).map_err(|e| ConfigError::ParseError(format!("{path}: Failed to parse TOML: {e}")))?;
2395    let mut fragment = SourcedConfigFragment::default();
2396    let source = ConfigSource::PyprojectToml;
2397    let file = Some(path.to_string());
2398
2399    // 1. Handle [tool.rumdl] and [tool.rumdl.global] sections
2400    if let Some(rumdl_config) = doc.get("tool").and_then(|t| t.get("rumdl"))
2401        && let Some(rumdl_table) = rumdl_config.as_table()
2402    {
2403        // Helper function to extract global config from a table
2404        let extract_global_config = |fragment: &mut SourcedConfigFragment, table: &toml::value::Table| {
2405            // Extract global options from the given table
2406            if let Some(enable) = table.get("enable")
2407                && let Ok(values) = Vec::<String>::deserialize(enable.clone())
2408            {
2409                // Normalize rule names in the list
2410                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2411                fragment
2412                    .global
2413                    .enable
2414                    .push_override(normalized_values, source, file.clone(), None);
2415            }
2416
2417            if let Some(disable) = table.get("disable")
2418                && let Ok(values) = Vec::<String>::deserialize(disable.clone())
2419            {
2420                // Re-enable normalization
2421                let normalized_values: Vec<String> = values.into_iter().map(|s| normalize_key(&s)).collect();
2422                fragment
2423                    .global
2424                    .disable
2425                    .push_override(normalized_values, source, file.clone(), None);
2426            }
2427
2428            if let Some(include) = table.get("include")
2429                && let Ok(values) = Vec::<String>::deserialize(include.clone())
2430            {
2431                fragment
2432                    .global
2433                    .include
2434                    .push_override(values, source, file.clone(), None);
2435            }
2436
2437            if let Some(exclude) = table.get("exclude")
2438                && let Ok(values) = Vec::<String>::deserialize(exclude.clone())
2439            {
2440                fragment
2441                    .global
2442                    .exclude
2443                    .push_override(values, source, file.clone(), None);
2444            }
2445
2446            if let Some(respect_gitignore) = table
2447                .get("respect-gitignore")
2448                .or_else(|| table.get("respect_gitignore"))
2449                && let Ok(value) = bool::deserialize(respect_gitignore.clone())
2450            {
2451                fragment
2452                    .global
2453                    .respect_gitignore
2454                    .push_override(value, source, file.clone(), None);
2455            }
2456
2457            if let Some(force_exclude) = table.get("force-exclude").or_else(|| table.get("force_exclude"))
2458                && let Ok(value) = bool::deserialize(force_exclude.clone())
2459            {
2460                fragment
2461                    .global
2462                    .force_exclude
2463                    .push_override(value, source, file.clone(), None);
2464            }
2465
2466            if let Some(output_format) = table.get("output-format").or_else(|| table.get("output_format"))
2467                && let Ok(value) = String::deserialize(output_format.clone())
2468            {
2469                if fragment.global.output_format.is_none() {
2470                    fragment.global.output_format = Some(SourcedValue::new(value.clone(), source));
2471                } else {
2472                    fragment
2473                        .global
2474                        .output_format
2475                        .as_mut()
2476                        .unwrap()
2477                        .push_override(value, source, file.clone(), None);
2478                }
2479            }
2480
2481            if let Some(fixable) = table.get("fixable")
2482                && let Ok(values) = Vec::<String>::deserialize(fixable.clone())
2483            {
2484                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2485                fragment
2486                    .global
2487                    .fixable
2488                    .push_override(normalized_values, source, file.clone(), None);
2489            }
2490
2491            if let Some(unfixable) = table.get("unfixable")
2492                && let Ok(values) = Vec::<String>::deserialize(unfixable.clone())
2493            {
2494                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2495                fragment
2496                    .global
2497                    .unfixable
2498                    .push_override(normalized_values, source, file.clone(), None);
2499            }
2500
2501            if let Some(flavor) = table.get("flavor")
2502                && let Ok(value) = MarkdownFlavor::deserialize(flavor.clone())
2503            {
2504                fragment.global.flavor.push_override(value, source, file.clone(), None);
2505            }
2506
2507            // Handle line-length special case - this should set the global line_length
2508            if let Some(line_length) = table.get("line-length").or_else(|| table.get("line_length"))
2509                && let Ok(value) = u64::deserialize(line_length.clone())
2510            {
2511                fragment
2512                    .global
2513                    .line_length
2514                    .push_override(value, source, file.clone(), None);
2515
2516                // Also add to MD013 rule config for backward compatibility
2517                let norm_md013_key = normalize_key("MD013");
2518                let rule_entry = fragment.rules.entry(norm_md013_key).or_default();
2519                let norm_line_length_key = normalize_key("line-length");
2520                let sv = rule_entry
2521                    .values
2522                    .entry(norm_line_length_key)
2523                    .or_insert_with(|| SourcedValue::new(line_length.clone(), ConfigSource::Default));
2524                sv.push_override(line_length.clone(), source, file.clone(), None);
2525            }
2526        };
2527
2528        // First, check for [tool.rumdl.global] section
2529        if let Some(global_table) = rumdl_table.get("global").and_then(|g| g.as_table()) {
2530            extract_global_config(&mut fragment, global_table);
2531        }
2532
2533        // Also extract global options from [tool.rumdl] directly (for flat structure)
2534        extract_global_config(&mut fragment, rumdl_table);
2535
2536        // --- Extract per-file-ignores configurations ---
2537        // Check both hyphenated and underscored versions for compatibility
2538        let per_file_ignores_key = rumdl_table
2539            .get("per-file-ignores")
2540            .or_else(|| rumdl_table.get("per_file_ignores"));
2541
2542        if let Some(per_file_ignores_value) = per_file_ignores_key
2543            && let Some(per_file_table) = per_file_ignores_value.as_table()
2544        {
2545            let mut per_file_map = HashMap::new();
2546            for (pattern, rules_value) in per_file_table {
2547                if let Ok(rules) = Vec::<String>::deserialize(rules_value.clone()) {
2548                    let normalized_rules = rules.into_iter().map(|s| normalize_key(&s)).collect();
2549                    per_file_map.insert(pattern.clone(), normalized_rules);
2550                } else {
2551                    log::warn!(
2552                        "[WARN] Expected array for per-file-ignores pattern '{pattern}' in {path}, found {rules_value:?}"
2553                    );
2554                }
2555            }
2556            fragment
2557                .per_file_ignores
2558                .push_override(per_file_map, source, file.clone(), None);
2559        }
2560
2561        // --- Extract rule-specific configurations ---
2562        for (key, value) in rumdl_table {
2563            let norm_rule_key = normalize_key(key);
2564
2565            // Skip keys already handled as global or special cases
2566            if [
2567                "enable",
2568                "disable",
2569                "include",
2570                "exclude",
2571                "respect_gitignore",
2572                "respect-gitignore", // Added kebab-case here too
2573                "force_exclude",
2574                "force-exclude",
2575                "line_length",
2576                "line-length",
2577                "output_format",
2578                "output-format",
2579                "fixable",
2580                "unfixable",
2581                "per-file-ignores",
2582                "per_file_ignores",
2583                "global",
2584            ]
2585            .contains(&norm_rule_key.as_str())
2586            {
2587                continue;
2588            }
2589
2590            // Explicitly check if the key looks like a rule name (e.g., starts with 'md')
2591            // AND if the value is actually a TOML table before processing as rule config.
2592            // This prevents misinterpreting other top-level keys under [tool.rumdl]
2593            let norm_rule_key_upper = norm_rule_key.to_ascii_uppercase();
2594            if norm_rule_key_upper.len() == 5
2595                && norm_rule_key_upper.starts_with("MD")
2596                && norm_rule_key_upper[2..].chars().all(|c| c.is_ascii_digit())
2597                && value.is_table()
2598            {
2599                if let Some(rule_config_table) = value.as_table() {
2600                    // Get the entry for this rule (e.g., "md013")
2601                    let rule_entry = fragment.rules.entry(norm_rule_key_upper).or_default();
2602                    for (rk, rv) in rule_config_table {
2603                        let norm_rk = normalize_key(rk); // Normalize the config key itself
2604
2605                        let toml_val = rv.clone();
2606
2607                        let sv = rule_entry
2608                            .values
2609                            .entry(norm_rk.clone())
2610                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), ConfigSource::Default));
2611                        sv.push_override(toml_val, source, file.clone(), None);
2612                    }
2613                }
2614            } else {
2615                // Key is not a global/special key, doesn't start with 'md', or isn't a table.
2616                // Track unknown keys under [tool.rumdl] for validation
2617                fragment
2618                    .unknown_keys
2619                    .push(("[tool.rumdl]".to_string(), key.to_string(), Some(path.to_string())));
2620            }
2621        }
2622    }
2623
2624    // 2. Handle [tool.rumdl.MDxxx] sections as rule-specific config (nested under [tool])
2625    if let Some(tool_table) = doc.get("tool").and_then(|t| t.as_table()) {
2626        for (key, value) in tool_table.iter() {
2627            if let Some(rule_name) = key.strip_prefix("rumdl.") {
2628                let norm_rule_name = normalize_key(rule_name);
2629                if norm_rule_name.len() == 5
2630                    && norm_rule_name.to_ascii_uppercase().starts_with("MD")
2631                    && norm_rule_name[2..].chars().all(|c| c.is_ascii_digit())
2632                    && let Some(rule_table) = value.as_table()
2633                {
2634                    let rule_entry = fragment.rules.entry(norm_rule_name.to_ascii_uppercase()).or_default();
2635                    for (rk, rv) in rule_table {
2636                        let norm_rk = normalize_key(rk);
2637                        let toml_val = rv.clone();
2638                        let sv = rule_entry
2639                            .values
2640                            .entry(norm_rk.clone())
2641                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), source));
2642                        sv.push_override(toml_val, source, file.clone(), None);
2643                    }
2644                } else if rule_name.to_ascii_uppercase().starts_with("MD") {
2645                    // Track unknown rule sections like [tool.rumdl.MD999]
2646                    fragment.unknown_keys.push((
2647                        format!("[tool.rumdl.{rule_name}]"),
2648                        String::new(),
2649                        Some(path.to_string()),
2650                    ));
2651                }
2652            }
2653        }
2654    }
2655
2656    // 3. Handle [tool.rumdl.MDxxx] sections as top-level keys (e.g., [tool.rumdl.MD007])
2657    if let Some(doc_table) = doc.as_table() {
2658        for (key, value) in doc_table.iter() {
2659            if let Some(rule_name) = key.strip_prefix("tool.rumdl.") {
2660                let norm_rule_name = normalize_key(rule_name);
2661                if norm_rule_name.len() == 5
2662                    && norm_rule_name.to_ascii_uppercase().starts_with("MD")
2663                    && norm_rule_name[2..].chars().all(|c| c.is_ascii_digit())
2664                    && let Some(rule_table) = value.as_table()
2665                {
2666                    let rule_entry = fragment.rules.entry(norm_rule_name.to_ascii_uppercase()).or_default();
2667                    for (rk, rv) in rule_table {
2668                        let norm_rk = normalize_key(rk);
2669                        let toml_val = rv.clone();
2670                        let sv = rule_entry
2671                            .values
2672                            .entry(norm_rk.clone())
2673                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), source));
2674                        sv.push_override(toml_val, source, file.clone(), None);
2675                    }
2676                } else if rule_name.to_ascii_uppercase().starts_with("MD") {
2677                    // Track unknown rule sections like [tool.rumdl.MD999]
2678                    fragment.unknown_keys.push((
2679                        format!("[tool.rumdl.{rule_name}]"),
2680                        String::new(),
2681                        Some(path.to_string()),
2682                    ));
2683                }
2684            }
2685        }
2686    }
2687
2688    // Only return Some(fragment) if any config was found
2689    let has_any = !fragment.global.enable.value.is_empty()
2690        || !fragment.global.disable.value.is_empty()
2691        || !fragment.global.include.value.is_empty()
2692        || !fragment.global.exclude.value.is_empty()
2693        || !fragment.global.fixable.value.is_empty()
2694        || !fragment.global.unfixable.value.is_empty()
2695        || fragment.global.output_format.is_some()
2696        || !fragment.per_file_ignores.value.is_empty()
2697        || !fragment.rules.is_empty();
2698    if has_any { Ok(Some(fragment)) } else { Ok(None) }
2699}
2700
2701/// Parses rumdl.toml / .rumdl.toml content.
2702fn parse_rumdl_toml(content: &str, path: &str) -> Result<SourcedConfigFragment, ConfigError> {
2703    let doc = content
2704        .parse::<DocumentMut>()
2705        .map_err(|e| ConfigError::ParseError(format!("{path}: Failed to parse TOML: {e}")))?;
2706    let mut fragment = SourcedConfigFragment::default();
2707    let source = ConfigSource::RumdlToml;
2708    let file = Some(path.to_string());
2709
2710    // Define known rules before the loop
2711    let all_rules = rules::all_rules(&Config::default());
2712    let registry = RuleRegistry::from_rules(&all_rules);
2713    let known_rule_names: BTreeSet<String> = registry
2714        .rule_names()
2715        .into_iter()
2716        .map(|s| s.to_ascii_uppercase())
2717        .collect();
2718
2719    // Handle [global] section
2720    if let Some(global_item) = doc.get("global")
2721        && let Some(global_table) = global_item.as_table()
2722    {
2723        for (key, value_item) in global_table.iter() {
2724            let norm_key = normalize_key(key);
2725            match norm_key.as_str() {
2726                "enable" | "disable" | "include" | "exclude" => {
2727                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
2728                        // Corrected: Iterate directly over the Formatted<Array>
2729                        let values: Vec<String> = formatted_array
2730                                .iter()
2731                                .filter_map(|item| item.as_str()) // Extract strings
2732                                .map(|s| s.to_string())
2733                                .collect();
2734
2735                        // Normalize rule names for enable/disable
2736                        let final_values = if norm_key == "enable" || norm_key == "disable" {
2737                            // Corrected: Pass &str to normalize_key
2738                            values.into_iter().map(|s| normalize_key(&s)).collect()
2739                        } else {
2740                            values
2741                        };
2742
2743                        match norm_key.as_str() {
2744                            "enable" => fragment
2745                                .global
2746                                .enable
2747                                .push_override(final_values, source, file.clone(), None),
2748                            "disable" => {
2749                                fragment
2750                                    .global
2751                                    .disable
2752                                    .push_override(final_values, source, file.clone(), None)
2753                            }
2754                            "include" => {
2755                                fragment
2756                                    .global
2757                                    .include
2758                                    .push_override(final_values, source, file.clone(), None)
2759                            }
2760                            "exclude" => {
2761                                fragment
2762                                    .global
2763                                    .exclude
2764                                    .push_override(final_values, source, file.clone(), None)
2765                            }
2766                            _ => unreachable!(), // Should not happen due to outer match
2767                        }
2768                    } else {
2769                        log::warn!(
2770                            "[WARN] Expected array for global key '{}' in {}, found {}",
2771                            key,
2772                            path,
2773                            value_item.type_name()
2774                        );
2775                    }
2776                }
2777                "respect_gitignore" | "respect-gitignore" => {
2778                    // Handle both cases
2779                    if let Some(toml_edit::Value::Boolean(formatted_bool)) = value_item.as_value() {
2780                        let val = *formatted_bool.value();
2781                        fragment
2782                            .global
2783                            .respect_gitignore
2784                            .push_override(val, source, file.clone(), None);
2785                    } else {
2786                        log::warn!(
2787                            "[WARN] Expected boolean for global key '{}' in {}, found {}",
2788                            key,
2789                            path,
2790                            value_item.type_name()
2791                        );
2792                    }
2793                }
2794                "force_exclude" | "force-exclude" => {
2795                    // Handle both cases
2796                    if let Some(toml_edit::Value::Boolean(formatted_bool)) = value_item.as_value() {
2797                        let val = *formatted_bool.value();
2798                        fragment
2799                            .global
2800                            .force_exclude
2801                            .push_override(val, source, file.clone(), None);
2802                    } else {
2803                        log::warn!(
2804                            "[WARN] Expected boolean for global key '{}' in {}, found {}",
2805                            key,
2806                            path,
2807                            value_item.type_name()
2808                        );
2809                    }
2810                }
2811                "line_length" | "line-length" => {
2812                    // Handle both cases
2813                    if let Some(toml_edit::Value::Integer(formatted_int)) = value_item.as_value() {
2814                        let val = *formatted_int.value() as u64;
2815                        fragment
2816                            .global
2817                            .line_length
2818                            .push_override(val, source, file.clone(), None);
2819                    } else {
2820                        log::warn!(
2821                            "[WARN] Expected integer for global key '{}' in {}, found {}",
2822                            key,
2823                            path,
2824                            value_item.type_name()
2825                        );
2826                    }
2827                }
2828                "output_format" | "output-format" => {
2829                    // Handle both cases
2830                    if let Some(toml_edit::Value::String(formatted_string)) = value_item.as_value() {
2831                        let val = formatted_string.value().clone();
2832                        if fragment.global.output_format.is_none() {
2833                            fragment.global.output_format = Some(SourcedValue::new(val.clone(), source));
2834                        } else {
2835                            fragment.global.output_format.as_mut().unwrap().push_override(
2836                                val,
2837                                source,
2838                                file.clone(),
2839                                None,
2840                            );
2841                        }
2842                    } else {
2843                        log::warn!(
2844                            "[WARN] Expected string for global key '{}' in {}, found {}",
2845                            key,
2846                            path,
2847                            value_item.type_name()
2848                        );
2849                    }
2850                }
2851                "fixable" => {
2852                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
2853                        let values: Vec<String> = formatted_array
2854                            .iter()
2855                            .filter_map(|item| item.as_str())
2856                            .map(normalize_key)
2857                            .collect();
2858                        fragment
2859                            .global
2860                            .fixable
2861                            .push_override(values, source, file.clone(), None);
2862                    } else {
2863                        log::warn!(
2864                            "[WARN] Expected array for global key '{}' in {}, found {}",
2865                            key,
2866                            path,
2867                            value_item.type_name()
2868                        );
2869                    }
2870                }
2871                "unfixable" => {
2872                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
2873                        let values: Vec<String> = formatted_array
2874                            .iter()
2875                            .filter_map(|item| item.as_str())
2876                            .map(normalize_key)
2877                            .collect();
2878                        fragment
2879                            .global
2880                            .unfixable
2881                            .push_override(values, source, file.clone(), None);
2882                    } else {
2883                        log::warn!(
2884                            "[WARN] Expected array for global key '{}' in {}, found {}",
2885                            key,
2886                            path,
2887                            value_item.type_name()
2888                        );
2889                    }
2890                }
2891                "flavor" => {
2892                    if let Some(toml_edit::Value::String(formatted_string)) = value_item.as_value() {
2893                        let val = formatted_string.value();
2894                        if let Ok(flavor) = MarkdownFlavor::from_str(val) {
2895                            fragment.global.flavor.push_override(flavor, source, file.clone(), None);
2896                        } else {
2897                            log::warn!("[WARN] Unknown markdown flavor '{val}' in {path}");
2898                        }
2899                    } else {
2900                        log::warn!(
2901                            "[WARN] Expected string for global key '{}' in {}, found {}",
2902                            key,
2903                            path,
2904                            value_item.type_name()
2905                        );
2906                    }
2907                }
2908                _ => {
2909                    // Track unknown global keys for validation
2910                    fragment
2911                        .unknown_keys
2912                        .push(("[global]".to_string(), key.to_string(), Some(path.to_string())));
2913                    log::warn!("[WARN] Unknown key in [global] section of {path}: {key}");
2914                }
2915            }
2916        }
2917    }
2918
2919    // Handle [per-file-ignores] section
2920    if let Some(per_file_item) = doc.get("per-file-ignores")
2921        && let Some(per_file_table) = per_file_item.as_table()
2922    {
2923        let mut per_file_map = HashMap::new();
2924        for (pattern, value_item) in per_file_table.iter() {
2925            if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
2926                let rules: Vec<String> = formatted_array
2927                    .iter()
2928                    .filter_map(|item| item.as_str())
2929                    .map(normalize_key)
2930                    .collect();
2931                per_file_map.insert(pattern.to_string(), rules);
2932            } else {
2933                let type_name = value_item.type_name();
2934                log::warn!(
2935                    "[WARN] Expected array for per-file-ignores pattern '{pattern}' in {path}, found {type_name}"
2936                );
2937            }
2938        }
2939        fragment
2940            .per_file_ignores
2941            .push_override(per_file_map, source, file.clone(), None);
2942    }
2943
2944    // Rule-specific: all other top-level tables
2945    for (key, item) in doc.iter() {
2946        let norm_rule_name = key.to_ascii_uppercase();
2947
2948        // Skip known special sections
2949        if key == "global" || key == "per-file-ignores" {
2950            continue;
2951        }
2952
2953        // Track unknown rule sections (like [MD999])
2954        if !known_rule_names.contains(&norm_rule_name) {
2955            // Only track if it looks like a rule section (starts with MD or is uppercase)
2956            if norm_rule_name.starts_with("MD") || key.chars().all(|c| c.is_uppercase() || c.is_numeric()) {
2957                fragment
2958                    .unknown_keys
2959                    .push((format!("[{key}]"), String::new(), Some(path.to_string())));
2960            }
2961            continue;
2962        }
2963
2964        if let Some(tbl) = item.as_table() {
2965            let rule_entry = fragment.rules.entry(norm_rule_name.clone()).or_default();
2966            for (rk, rv_item) in tbl.iter() {
2967                let norm_rk = normalize_key(rk);
2968                let maybe_toml_val: Option<toml::Value> = match rv_item.as_value() {
2969                    Some(toml_edit::Value::String(formatted)) => Some(toml::Value::String(formatted.value().clone())),
2970                    Some(toml_edit::Value::Integer(formatted)) => Some(toml::Value::Integer(*formatted.value())),
2971                    Some(toml_edit::Value::Float(formatted)) => Some(toml::Value::Float(*formatted.value())),
2972                    Some(toml_edit::Value::Boolean(formatted)) => Some(toml::Value::Boolean(*formatted.value())),
2973                    Some(toml_edit::Value::Datetime(formatted)) => Some(toml::Value::Datetime(*formatted.value())),
2974                    Some(toml_edit::Value::Array(formatted_array)) => {
2975                        // Convert toml_edit Array to toml::Value::Array
2976                        let mut values = Vec::new();
2977                        for item in formatted_array.iter() {
2978                            match item {
2979                                toml_edit::Value::String(formatted) => {
2980                                    values.push(toml::Value::String(formatted.value().clone()))
2981                                }
2982                                toml_edit::Value::Integer(formatted) => {
2983                                    values.push(toml::Value::Integer(*formatted.value()))
2984                                }
2985                                toml_edit::Value::Float(formatted) => {
2986                                    values.push(toml::Value::Float(*formatted.value()))
2987                                }
2988                                toml_edit::Value::Boolean(formatted) => {
2989                                    values.push(toml::Value::Boolean(*formatted.value()))
2990                                }
2991                                toml_edit::Value::Datetime(formatted) => {
2992                                    values.push(toml::Value::Datetime(*formatted.value()))
2993                                }
2994                                _ => {
2995                                    log::warn!(
2996                                        "[WARN] Skipping unsupported array element type in key '{norm_rule_name}.{norm_rk}' in {path}"
2997                                    );
2998                                }
2999                            }
3000                        }
3001                        Some(toml::Value::Array(values))
3002                    }
3003                    Some(toml_edit::Value::InlineTable(_)) => {
3004                        log::warn!(
3005                            "[WARN] Skipping inline table value for key '{norm_rule_name}.{norm_rk}' in {path}. Table conversion not yet fully implemented in parser."
3006                        );
3007                        None
3008                    }
3009                    None => {
3010                        log::warn!(
3011                            "[WARN] Skipping non-value item for key '{norm_rule_name}.{norm_rk}' in {path}. Expected simple value."
3012                        );
3013                        None
3014                    }
3015                };
3016                if let Some(toml_val) = maybe_toml_val {
3017                    let sv = rule_entry
3018                        .values
3019                        .entry(norm_rk.clone())
3020                        .or_insert_with(|| SourcedValue::new(toml_val.clone(), ConfigSource::Default));
3021                    sv.push_override(toml_val, source, file.clone(), None);
3022                }
3023            }
3024        } else if item.is_value() {
3025            log::warn!("[WARN] Ignoring top-level value key in {path}: '{key}'. Expected a table like [{key}].");
3026        }
3027    }
3028
3029    Ok(fragment)
3030}
3031
3032/// Loads and converts a markdownlint config file (.json or .yaml) into a SourcedConfigFragment.
3033fn load_from_markdownlint(path: &str) -> Result<SourcedConfigFragment, ConfigError> {
3034    // Use the unified loader from markdownlint_config.rs
3035    let ml_config = crate::markdownlint_config::load_markdownlint_config(path)
3036        .map_err(|e| ConfigError::ParseError(format!("{path}: {e}")))?;
3037    Ok(ml_config.map_to_sourced_rumdl_config_fragment(Some(path)))
3038}