rumdl_lib/
config.rs

1//!
2//! This module defines configuration structures, loading logic, and provenance tracking for rumdl.
3//! Supports TOML, pyproject.toml, and markdownlint config formats, and provides merging and override logic.
4
5use crate::rule::Rule;
6use crate::rules;
7use crate::types::LineLength;
8use log;
9use serde::{Deserialize, Serialize};
10use std::collections::BTreeMap;
11use std::collections::{BTreeSet, HashMap, HashSet};
12use std::fmt;
13use std::fs;
14use std::io;
15use std::path::Path;
16use std::str::FromStr;
17use toml_edit::DocumentMut;
18
19/// Markdown flavor/dialect enumeration
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, schemars::JsonSchema)]
21#[serde(rename_all = "lowercase")]
22pub enum MarkdownFlavor {
23    /// Standard Markdown without flavor-specific adjustments
24    #[serde(rename = "standard", alias = "none", alias = "")]
25    #[default]
26    Standard,
27    /// MkDocs flavor with auto-reference support
28    #[serde(rename = "mkdocs")]
29    MkDocs,
30    /// MDX flavor with JSX and ESM support (.mdx files)
31    #[serde(rename = "mdx")]
32    MDX,
33    /// Quarto/RMarkdown flavor for scientific publishing (.qmd, .Rmd files)
34    #[serde(rename = "quarto")]
35    Quarto,
36    // Future flavors can be added here when they have actual implementation differences
37    // Planned: GFM (GitHub Flavored Markdown) - for GitHub-specific features like tables, strikethrough
38    // Planned: CommonMark - for strict CommonMark compliance
39}
40
41impl fmt::Display for MarkdownFlavor {
42    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
43        match self {
44            MarkdownFlavor::Standard => write!(f, "standard"),
45            MarkdownFlavor::MkDocs => write!(f, "mkdocs"),
46            MarkdownFlavor::MDX => write!(f, "mdx"),
47            MarkdownFlavor::Quarto => write!(f, "quarto"),
48        }
49    }
50}
51
52impl FromStr for MarkdownFlavor {
53    type Err = String;
54
55    fn from_str(s: &str) -> Result<Self, Self::Err> {
56        match s.to_lowercase().as_str() {
57            "standard" | "" | "none" => Ok(MarkdownFlavor::Standard),
58            "mkdocs" => Ok(MarkdownFlavor::MkDocs),
59            "mdx" => Ok(MarkdownFlavor::MDX),
60            "quarto" | "qmd" | "rmd" | "rmarkdown" => Ok(MarkdownFlavor::Quarto),
61            // Accept but warn about unimplemented flavors
62            "gfm" | "github" => {
63                eprintln!("Warning: GFM flavor not yet implemented, using standard");
64                Ok(MarkdownFlavor::Standard)
65            }
66            "commonmark" => {
67                eprintln!("Warning: CommonMark flavor not yet implemented, using standard");
68                Ok(MarkdownFlavor::Standard)
69            }
70            _ => Err(format!("Unknown markdown flavor: {s}")),
71        }
72    }
73}
74
75impl MarkdownFlavor {
76    /// Detect flavor from file extension
77    pub fn from_extension(ext: &str) -> Self {
78        match ext.to_lowercase().as_str() {
79            "mdx" => Self::MDX,
80            "qmd" => Self::Quarto,
81            "rmd" => Self::Quarto,
82            _ => Self::Standard,
83        }
84    }
85
86    /// Detect flavor from file path
87    pub fn from_path(path: &std::path::Path) -> Self {
88        path.extension()
89            .and_then(|e| e.to_str())
90            .map(Self::from_extension)
91            .unwrap_or(Self::Standard)
92    }
93
94    /// Check if this flavor supports ESM imports/exports (MDX-specific)
95    pub fn supports_esm_blocks(self) -> bool {
96        matches!(self, Self::MDX)
97    }
98
99    /// Check if this flavor supports JSX components (MDX-specific)
100    pub fn supports_jsx(self) -> bool {
101        matches!(self, Self::MDX)
102    }
103
104    /// Check if this flavor supports auto-references (MkDocs-specific)
105    pub fn supports_auto_references(self) -> bool {
106        matches!(self, Self::MkDocs)
107    }
108
109    /// Get a human-readable name for this flavor
110    pub fn name(self) -> &'static str {
111        match self {
112            Self::Standard => "Standard",
113            Self::MkDocs => "MkDocs",
114            Self::MDX => "MDX",
115            Self::Quarto => "Quarto",
116        }
117    }
118}
119
120/// Normalizes configuration keys (rule names, option names) to lowercase kebab-case.
121pub fn normalize_key(key: &str) -> String {
122    // If the key looks like a rule name (e.g., MD013), uppercase it
123    if key.len() == 5 && key.to_ascii_lowercase().starts_with("md") && key[2..].chars().all(|c| c.is_ascii_digit()) {
124        key.to_ascii_uppercase()
125    } else {
126        key.replace('_', "-").to_ascii_lowercase()
127    }
128}
129
130/// Represents a rule-specific configuration
131#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, schemars::JsonSchema)]
132pub struct RuleConfig {
133    /// Configuration values for the rule
134    #[serde(flatten)]
135    #[schemars(schema_with = "arbitrary_value_schema")]
136    pub values: BTreeMap<String, toml::Value>,
137}
138
139/// Generate a JSON schema for arbitrary configuration values
140fn arbitrary_value_schema(_gen: &mut schemars::SchemaGenerator) -> schemars::Schema {
141    schemars::json_schema!({
142        "type": "object",
143        "additionalProperties": true
144    })
145}
146
147/// Represents the complete configuration loaded from rumdl.toml
148#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, schemars::JsonSchema)]
149#[schemars(
150    description = "rumdl configuration for linting Markdown files. Rules can be configured individually using [MD###] sections with rule-specific options."
151)]
152pub struct Config {
153    /// Global configuration options
154    #[serde(default)]
155    pub global: GlobalConfig,
156
157    /// Per-file rule ignores: maps file patterns to lists of rules to ignore
158    /// Example: { "README.md": ["MD033"], "docs/**/*.md": ["MD013"] }
159    #[serde(default, rename = "per-file-ignores")]
160    pub per_file_ignores: HashMap<String, Vec<String>>,
161
162    /// Rule-specific configurations (e.g., MD013, MD007, MD044)
163    /// Each rule section can contain options specific to that rule.
164    ///
165    /// Common examples:
166    /// - MD013: line_length, code_blocks, tables, headings
167    /// - MD007: indent
168    /// - MD003: style ("atx", "atx_closed", "setext")
169    /// - MD044: names (array of proper names to check)
170    ///
171    /// See https://github.com/rvben/rumdl for full rule documentation.
172    #[serde(flatten)]
173    pub rules: BTreeMap<String, RuleConfig>,
174}
175
176impl Config {
177    /// Check if the Markdown flavor is set to MkDocs
178    pub fn is_mkdocs_flavor(&self) -> bool {
179        self.global.flavor == MarkdownFlavor::MkDocs
180    }
181
182    // Future methods for when GFM and CommonMark are implemented:
183    // pub fn is_gfm_flavor(&self) -> bool
184    // pub fn is_commonmark_flavor(&self) -> bool
185
186    /// Get the configured Markdown flavor
187    pub fn markdown_flavor(&self) -> MarkdownFlavor {
188        self.global.flavor
189    }
190
191    /// Legacy method for backwards compatibility - redirects to is_mkdocs_flavor
192    pub fn is_mkdocs_project(&self) -> bool {
193        self.is_mkdocs_flavor()
194    }
195
196    /// Get the set of rules that should be ignored for a specific file based on per-file-ignores configuration
197    /// Returns a HashSet of rule names (uppercase, e.g., "MD033") that match the given file path
198    pub fn get_ignored_rules_for_file(&self, file_path: &Path) -> HashSet<String> {
199        use globset::{Glob, GlobSetBuilder};
200
201        let mut ignored_rules = HashSet::new();
202
203        if self.per_file_ignores.is_empty() {
204            return ignored_rules;
205        }
206
207        // Build a globset for efficient matching
208        let mut builder = GlobSetBuilder::new();
209        let mut pattern_to_rules: Vec<(usize, &Vec<String>)> = Vec::new();
210
211        for (idx, (pattern, rules)) in self.per_file_ignores.iter().enumerate() {
212            if let Ok(glob) = Glob::new(pattern) {
213                builder.add(glob);
214                pattern_to_rules.push((idx, rules));
215            } else {
216                log::warn!("Invalid glob pattern in per-file-ignores: {pattern}");
217            }
218        }
219
220        let globset = match builder.build() {
221            Ok(gs) => gs,
222            Err(e) => {
223                log::error!("Failed to build globset for per-file-ignores: {e}");
224                return ignored_rules;
225            }
226        };
227
228        // Match the file path against all patterns
229        for match_idx in globset.matches(file_path) {
230            if let Some((_, rules)) = pattern_to_rules.get(match_idx) {
231                for rule in rules.iter() {
232                    // Normalize rule names to uppercase (MD033, md033 -> MD033)
233                    ignored_rules.insert(normalize_key(rule));
234                }
235            }
236        }
237
238        ignored_rules
239    }
240}
241
242/// Global configuration options
243#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, schemars::JsonSchema)]
244#[serde(default, rename_all = "kebab-case")]
245pub struct GlobalConfig {
246    /// Enabled rules
247    #[serde(default)]
248    pub enable: Vec<String>,
249
250    /// Disabled rules
251    #[serde(default)]
252    pub disable: Vec<String>,
253
254    /// Files to exclude
255    #[serde(default)]
256    pub exclude: Vec<String>,
257
258    /// Files to include
259    #[serde(default)]
260    pub include: Vec<String>,
261
262    /// Respect .gitignore files when scanning directories
263    #[serde(default = "default_respect_gitignore", alias = "respect_gitignore")]
264    pub respect_gitignore: bool,
265
266    /// Global line length setting (used by MD013 and other rules if not overridden)
267    #[serde(default, alias = "line_length")]
268    pub line_length: LineLength,
269
270    /// Output format for linting results (e.g., "text", "json", "pylint", etc.)
271    #[serde(skip_serializing_if = "Option::is_none", alias = "output_format")]
272    pub output_format: Option<String>,
273
274    /// Rules that are allowed to be fixed when --fix is used
275    /// If specified, only these rules will be fixed
276    #[serde(default)]
277    pub fixable: Vec<String>,
278
279    /// Rules that should never be fixed, even when --fix is used
280    /// Takes precedence over fixable
281    #[serde(default)]
282    pub unfixable: Vec<String>,
283
284    /// Markdown flavor/dialect to use (mkdocs, gfm, commonmark, etc.)
285    /// When set, adjusts parsing and validation rules for that specific Markdown variant
286    #[serde(default)]
287    pub flavor: MarkdownFlavor,
288
289    /// [DEPRECATED] Whether to enforce exclude patterns for explicitly passed paths.
290    /// This option is deprecated as of v0.0.156 and has no effect.
291    /// Exclude patterns are now always respected, even for explicitly provided files.
292    /// This prevents duplication between rumdl config and tool configs like pre-commit.
293    #[serde(default, alias = "force_exclude")]
294    #[deprecated(since = "0.0.156", note = "Exclude patterns are now always respected")]
295    pub force_exclude: bool,
296
297    /// Directory to store cache files (default: .rumdl_cache)
298    /// Can also be set via --cache-dir CLI flag or RUMDL_CACHE_DIR environment variable
299    #[serde(default, alias = "cache_dir", skip_serializing_if = "Option::is_none")]
300    pub cache_dir: Option<String>,
301
302    /// Whether caching is enabled (default: true)
303    /// Can also be disabled via --no-cache CLI flag
304    #[serde(default = "default_true")]
305    pub cache: bool,
306}
307
308fn default_respect_gitignore() -> bool {
309    true
310}
311
312fn default_true() -> bool {
313    true
314}
315
316// Add the Default impl
317impl Default for GlobalConfig {
318    #[allow(deprecated)]
319    fn default() -> Self {
320        Self {
321            enable: Vec::new(),
322            disable: Vec::new(),
323            exclude: Vec::new(),
324            include: Vec::new(),
325            respect_gitignore: true,
326            line_length: LineLength::default(),
327            output_format: None,
328            fixable: Vec::new(),
329            unfixable: Vec::new(),
330            flavor: MarkdownFlavor::default(),
331            force_exclude: false,
332            cache_dir: None,
333            cache: true,
334        }
335    }
336}
337
338const MARKDOWNLINT_CONFIG_FILES: &[&str] = &[
339    ".markdownlint.json",
340    ".markdownlint.jsonc",
341    ".markdownlint.yaml",
342    ".markdownlint.yml",
343    "markdownlint.json",
344    "markdownlint.jsonc",
345    "markdownlint.yaml",
346    "markdownlint.yml",
347];
348
349/// Create a default configuration file at the specified path
350pub fn create_default_config(path: &str) -> Result<(), ConfigError> {
351    // Check if file already exists
352    if Path::new(path).exists() {
353        return Err(ConfigError::FileExists { path: path.to_string() });
354    }
355
356    // Default configuration content
357    let default_config = r#"# rumdl configuration file
358
359# Global configuration options
360[global]
361# List of rules to disable (uncomment and modify as needed)
362# disable = ["MD013", "MD033"]
363
364# List of rules to enable exclusively (if provided, only these rules will run)
365# enable = ["MD001", "MD003", "MD004"]
366
367# List of file/directory patterns to include for linting (if provided, only these will be linted)
368# include = [
369#    "docs/*.md",
370#    "src/**/*.md",
371#    "README.md"
372# ]
373
374# List of file/directory patterns to exclude from linting
375exclude = [
376    # Common directories to exclude
377    ".git",
378    ".github",
379    "node_modules",
380    "vendor",
381    "dist",
382    "build",
383
384    # Specific files or patterns
385    "CHANGELOG.md",
386    "LICENSE.md",
387]
388
389# Respect .gitignore files when scanning directories (default: true)
390respect-gitignore = true
391
392# Markdown flavor/dialect (uncomment to enable)
393# Options: mkdocs, gfm, commonmark
394# flavor = "mkdocs"
395
396# Rule-specific configurations (uncomment and modify as needed)
397
398# [MD003]
399# style = "atx"  # Heading style (atx, atx_closed, setext)
400
401# [MD004]
402# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
403
404# [MD007]
405# indent = 4  # Unordered list indentation
406
407# [MD013]
408# line-length = 100  # Line length
409# code-blocks = false  # Exclude code blocks from line length check
410# tables = false  # Exclude tables from line length check
411# headings = true  # Include headings in line length check
412
413# [MD044]
414# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
415# code-blocks = false  # Check code blocks for proper names (default: false, skips code blocks)
416"#;
417
418    // Write the default configuration to the file
419    match fs::write(path, default_config) {
420        Ok(_) => Ok(()),
421        Err(err) => Err(ConfigError::IoError {
422            source: err,
423            path: path.to_string(),
424        }),
425    }
426}
427
428/// Errors that can occur when loading configuration
429#[derive(Debug, thiserror::Error)]
430pub enum ConfigError {
431    /// Failed to read the configuration file
432    #[error("Failed to read config file at {path}: {source}")]
433    IoError { source: io::Error, path: String },
434
435    /// Failed to parse the configuration content (TOML or JSON)
436    #[error("Failed to parse config: {0}")]
437    ParseError(String),
438
439    /// Configuration file already exists
440    #[error("Configuration file already exists at {path}")]
441    FileExists { path: String },
442}
443
444/// Get a rule-specific configuration value
445/// Automatically tries both the original key and normalized variants (kebab-case ↔ snake_case)
446/// for better markdownlint compatibility
447pub fn get_rule_config_value<T: serde::de::DeserializeOwned>(config: &Config, rule_name: &str, key: &str) -> Option<T> {
448    let norm_rule_name = rule_name.to_ascii_uppercase(); // Use uppercase for lookup
449
450    let rule_config = config.rules.get(&norm_rule_name)?;
451
452    // Try multiple key variants to support both underscore and kebab-case formats
453    let key_variants = [
454        key.to_string(),       // Original key as provided
455        normalize_key(key),    // Normalized key (lowercase, kebab-case)
456        key.replace('-', "_"), // Convert kebab-case to snake_case
457        key.replace('_', "-"), // Convert snake_case to kebab-case
458    ];
459
460    // Try each variant until we find a match
461    for variant in &key_variants {
462        if let Some(value) = rule_config.values.get(variant)
463            && let Ok(result) = T::deserialize(value.clone())
464        {
465            return Some(result);
466        }
467    }
468
469    None
470}
471
472/// Generate default rumdl configuration for pyproject.toml
473pub fn generate_pyproject_config() -> String {
474    let config_content = r#"
475[tool.rumdl]
476# Global configuration options
477line-length = 100
478disable = []
479exclude = [
480    # Common directories to exclude
481    ".git",
482    ".github",
483    "node_modules",
484    "vendor",
485    "dist",
486    "build",
487]
488respect-gitignore = true
489
490# Rule-specific configurations (uncomment and modify as needed)
491
492# [tool.rumdl.MD003]
493# style = "atx"  # Heading style (atx, atx_closed, setext)
494
495# [tool.rumdl.MD004]
496# style = "asterisk"  # Unordered list style (asterisk, plus, dash, consistent)
497
498# [tool.rumdl.MD007]
499# indent = 4  # Unordered list indentation
500
501# [tool.rumdl.MD013]
502# line-length = 100  # Line length
503# code-blocks = false  # Exclude code blocks from line length check
504# tables = false  # Exclude tables from line length check
505# headings = true  # Include headings in line length check
506
507# [tool.rumdl.MD044]
508# names = ["rumdl", "Markdown", "GitHub"]  # Proper names that should be capitalized correctly
509# code-blocks = false  # Check code blocks for proper names (default: false, skips code blocks)
510"#;
511
512    config_content.to_string()
513}
514
515#[cfg(test)]
516mod tests {
517    use super::*;
518    use std::fs;
519    use tempfile::tempdir;
520
521    #[test]
522    fn test_flavor_loading() {
523        let temp_dir = tempdir().unwrap();
524        let config_path = temp_dir.path().join(".rumdl.toml");
525        let config_content = r#"
526[global]
527flavor = "mkdocs"
528disable = ["MD001"]
529"#;
530        fs::write(&config_path, config_content).unwrap();
531
532        // Load the config
533        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
534        let config: Config = sourced.into();
535
536        // Check that flavor was loaded
537        assert_eq!(config.global.flavor, MarkdownFlavor::MkDocs);
538        assert!(config.is_mkdocs_flavor());
539        assert!(config.is_mkdocs_project()); // Test backwards compatibility
540        assert_eq!(config.global.disable, vec!["MD001".to_string()]);
541    }
542
543    #[test]
544    fn test_pyproject_toml_root_level_config() {
545        let temp_dir = tempdir().unwrap();
546        let config_path = temp_dir.path().join("pyproject.toml");
547
548        // Create a test pyproject.toml with root-level configuration
549        let content = r#"
550[tool.rumdl]
551line-length = 120
552disable = ["MD033"]
553enable = ["MD001", "MD004"]
554include = ["docs/*.md"]
555exclude = ["node_modules"]
556respect-gitignore = true
557        "#;
558
559        fs::write(&config_path, content).unwrap();
560
561        // Load the config with skip_auto_discovery to avoid environment config files
562        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
563        let config: Config = sourced.into(); // Convert to plain config for assertions
564
565        // Check global settings
566        assert_eq!(config.global.disable, vec!["MD033".to_string()]);
567        assert_eq!(config.global.enable, vec!["MD001".to_string(), "MD004".to_string()]);
568        // Should now contain only the configured pattern since auto-discovery is disabled
569        assert_eq!(config.global.include, vec!["docs/*.md".to_string()]);
570        assert_eq!(config.global.exclude, vec!["node_modules".to_string()]);
571        assert!(config.global.respect_gitignore);
572
573        // Check line-length was correctly added to MD013
574        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
575        assert_eq!(line_length, Some(120));
576    }
577
578    #[test]
579    fn test_pyproject_toml_snake_case_and_kebab_case() {
580        let temp_dir = tempdir().unwrap();
581        let config_path = temp_dir.path().join("pyproject.toml");
582
583        // Test with both kebab-case and snake_case variants
584        let content = r#"
585[tool.rumdl]
586line-length = 150
587respect_gitignore = true
588        "#;
589
590        fs::write(&config_path, content).unwrap();
591
592        // Load the config with skip_auto_discovery to avoid environment config files
593        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
594        let config: Config = sourced.into(); // Convert to plain config for assertions
595
596        // Check settings were correctly loaded
597        assert!(config.global.respect_gitignore);
598        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
599        assert_eq!(line_length, Some(150));
600    }
601
602    #[test]
603    fn test_md013_key_normalization_in_rumdl_toml() {
604        let temp_dir = tempdir().unwrap();
605        let config_path = temp_dir.path().join(".rumdl.toml");
606        let config_content = r#"
607[MD013]
608line_length = 111
609line-length = 222
610"#;
611        fs::write(&config_path, config_content).unwrap();
612        // Load the config with skip_auto_discovery to avoid environment config files
613        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
614        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
615        // Now we should only get the explicitly configured key
616        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
617        assert_eq!(keys, vec!["line-length"]);
618        let val = &rule_cfg.values["line-length"].value;
619        assert_eq!(val.as_integer(), Some(222));
620        // get_rule_config_value should retrieve the value for both snake_case and kebab-case
621        let config: Config = sourced.clone().into();
622        let v1 = get_rule_config_value::<usize>(&config, "MD013", "line_length");
623        let v2 = get_rule_config_value::<usize>(&config, "MD013", "line-length");
624        assert_eq!(v1, Some(222));
625        assert_eq!(v2, Some(222));
626    }
627
628    #[test]
629    fn test_md013_section_case_insensitivity() {
630        let temp_dir = tempdir().unwrap();
631        let config_path = temp_dir.path().join(".rumdl.toml");
632        let config_content = r#"
633[md013]
634line-length = 101
635
636[Md013]
637line-length = 102
638
639[MD013]
640line-length = 103
641"#;
642        fs::write(&config_path, config_content).unwrap();
643        // Load the config with skip_auto_discovery to avoid environment config files
644        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
645        let config: Config = sourced.clone().into();
646        // Only the last section should win, and be present
647        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
648        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
649        assert_eq!(keys, vec!["line-length"]);
650        let val = &rule_cfg.values["line-length"].value;
651        assert_eq!(val.as_integer(), Some(103));
652        let v = get_rule_config_value::<usize>(&config, "MD013", "line-length");
653        assert_eq!(v, Some(103));
654    }
655
656    #[test]
657    fn test_md013_key_snake_and_kebab_case() {
658        let temp_dir = tempdir().unwrap();
659        let config_path = temp_dir.path().join(".rumdl.toml");
660        let config_content = r#"
661[MD013]
662line_length = 201
663line-length = 202
664"#;
665        fs::write(&config_path, config_content).unwrap();
666        // Load the config with skip_auto_discovery to avoid environment config files
667        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
668        let config: Config = sourced.clone().into();
669        let rule_cfg = sourced.rules.get("MD013").expect("MD013 rule config should exist");
670        let keys: Vec<_> = rule_cfg.values.keys().cloned().collect();
671        assert_eq!(keys, vec!["line-length"]);
672        let val = &rule_cfg.values["line-length"].value;
673        assert_eq!(val.as_integer(), Some(202));
674        let v1 = get_rule_config_value::<usize>(&config, "MD013", "line_length");
675        let v2 = get_rule_config_value::<usize>(&config, "MD013", "line-length");
676        assert_eq!(v1, Some(202));
677        assert_eq!(v2, Some(202));
678    }
679
680    #[test]
681    fn test_unknown_rule_section_is_ignored() {
682        let temp_dir = tempdir().unwrap();
683        let config_path = temp_dir.path().join(".rumdl.toml");
684        let config_content = r#"
685[MD999]
686foo = 1
687bar = 2
688[MD013]
689line-length = 303
690"#;
691        fs::write(&config_path, config_content).unwrap();
692        // Load the config with skip_auto_discovery to avoid environment config files
693        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
694        let config: Config = sourced.clone().into();
695        // MD999 should not be present
696        assert!(!sourced.rules.contains_key("MD999"));
697        // MD013 should be present and correct
698        let v = get_rule_config_value::<usize>(&config, "MD013", "line-length");
699        assert_eq!(v, Some(303));
700    }
701
702    #[test]
703    fn test_invalid_toml_syntax() {
704        let temp_dir = tempdir().unwrap();
705        let config_path = temp_dir.path().join(".rumdl.toml");
706
707        // Invalid TOML with unclosed string
708        let config_content = r#"
709[MD013]
710line-length = "unclosed string
711"#;
712        fs::write(&config_path, config_content).unwrap();
713
714        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
715        assert!(result.is_err());
716        match result.unwrap_err() {
717            ConfigError::ParseError(msg) => {
718                // The actual error message from toml parser might vary
719                assert!(msg.contains("expected") || msg.contains("invalid") || msg.contains("unterminated"));
720            }
721            _ => panic!("Expected ParseError"),
722        }
723    }
724
725    #[test]
726    fn test_wrong_type_for_config_value() {
727        let temp_dir = tempdir().unwrap();
728        let config_path = temp_dir.path().join(".rumdl.toml");
729
730        // line-length should be a number, not a string
731        let config_content = r#"
732[MD013]
733line-length = "not a number"
734"#;
735        fs::write(&config_path, config_content).unwrap();
736
737        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
738        let config: Config = sourced.into();
739
740        // The value should be loaded as a string, not converted
741        let rule_config = config.rules.get("MD013").unwrap();
742        let value = rule_config.values.get("line-length").unwrap();
743        assert!(matches!(value, toml::Value::String(_)));
744    }
745
746    #[test]
747    fn test_empty_config_file() {
748        let temp_dir = tempdir().unwrap();
749        let config_path = temp_dir.path().join(".rumdl.toml");
750
751        // Empty file
752        fs::write(&config_path, "").unwrap();
753
754        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
755        let config: Config = sourced.into();
756
757        // Should have default values
758        assert_eq!(config.global.line_length.get(), 80);
759        assert!(config.global.respect_gitignore);
760        assert!(config.rules.is_empty());
761    }
762
763    #[test]
764    fn test_malformed_pyproject_toml() {
765        let temp_dir = tempdir().unwrap();
766        let config_path = temp_dir.path().join("pyproject.toml");
767
768        // Missing closing bracket
769        let content = r#"
770[tool.rumdl
771line-length = 120
772"#;
773        fs::write(&config_path, content).unwrap();
774
775        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
776        assert!(result.is_err());
777    }
778
779    #[test]
780    fn test_conflicting_config_values() {
781        let temp_dir = tempdir().unwrap();
782        let config_path = temp_dir.path().join(".rumdl.toml");
783
784        // Both enable and disable the same rule - these need to be in a global section
785        let config_content = r#"
786[global]
787enable = ["MD013"]
788disable = ["MD013"]
789"#;
790        fs::write(&config_path, config_content).unwrap();
791
792        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
793        let config: Config = sourced.into();
794
795        // Conflict resolution: enable wins over disable
796        assert!(config.global.enable.contains(&"MD013".to_string()));
797        assert!(!config.global.disable.contains(&"MD013".to_string()));
798    }
799
800    #[test]
801    fn test_invalid_rule_names() {
802        let temp_dir = tempdir().unwrap();
803        let config_path = temp_dir.path().join(".rumdl.toml");
804
805        let config_content = r#"
806[global]
807enable = ["MD001", "NOT_A_RULE", "md002", "12345"]
808disable = ["MD-001", "MD_002"]
809"#;
810        fs::write(&config_path, config_content).unwrap();
811
812        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
813        let config: Config = sourced.into();
814
815        // All values should be preserved as-is
816        assert_eq!(config.global.enable.len(), 4);
817        assert_eq!(config.global.disable.len(), 2);
818    }
819
820    #[test]
821    fn test_deeply_nested_config() {
822        let temp_dir = tempdir().unwrap();
823        let config_path = temp_dir.path().join(".rumdl.toml");
824
825        // This should be ignored as we don't support nested tables within rule configs
826        let config_content = r#"
827[MD013]
828line-length = 100
829[MD013.nested]
830value = 42
831"#;
832        fs::write(&config_path, config_content).unwrap();
833
834        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
835        let config: Config = sourced.into();
836
837        let rule_config = config.rules.get("MD013").unwrap();
838        assert_eq!(
839            rule_config.values.get("line-length").unwrap(),
840            &toml::Value::Integer(100)
841        );
842        // Nested table should not be present
843        assert!(!rule_config.values.contains_key("nested"));
844    }
845
846    #[test]
847    fn test_unicode_in_config() {
848        let temp_dir = tempdir().unwrap();
849        let config_path = temp_dir.path().join(".rumdl.toml");
850
851        let config_content = r#"
852[global]
853include = ["文档/*.md", "ドキュメント/*.md"]
854exclude = ["测试/*", "🚀/*"]
855
856[MD013]
857line-length = 80
858message = "行太长了 🚨"
859"#;
860        fs::write(&config_path, config_content).unwrap();
861
862        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
863        let config: Config = sourced.into();
864
865        assert_eq!(config.global.include.len(), 2);
866        assert_eq!(config.global.exclude.len(), 2);
867        assert!(config.global.include[0].contains("文档"));
868        assert!(config.global.exclude[1].contains("🚀"));
869
870        let rule_config = config.rules.get("MD013").unwrap();
871        let message = rule_config.values.get("message").unwrap();
872        if let toml::Value::String(s) = message {
873            assert!(s.contains("行太长了"));
874            assert!(s.contains("🚨"));
875        }
876    }
877
878    #[test]
879    fn test_extremely_long_values() {
880        let temp_dir = tempdir().unwrap();
881        let config_path = temp_dir.path().join(".rumdl.toml");
882
883        let long_string = "a".repeat(10000);
884        let config_content = format!(
885            r#"
886[global]
887exclude = ["{long_string}"]
888
889[MD013]
890line-length = 999999999
891"#
892        );
893
894        fs::write(&config_path, config_content).unwrap();
895
896        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
897        let config: Config = sourced.into();
898
899        assert_eq!(config.global.exclude[0].len(), 10000);
900        let line_length = get_rule_config_value::<usize>(&config, "MD013", "line-length");
901        assert_eq!(line_length, Some(999999999));
902    }
903
904    #[test]
905    fn test_config_with_comments() {
906        let temp_dir = tempdir().unwrap();
907        let config_path = temp_dir.path().join(".rumdl.toml");
908
909        let config_content = r#"
910[global]
911# This is a comment
912enable = ["MD001"] # Enable MD001
913# disable = ["MD002"] # This is commented out
914
915[MD013] # Line length rule
916line-length = 100 # Set to 100 characters
917# ignored = true # This setting is commented out
918"#;
919        fs::write(&config_path, config_content).unwrap();
920
921        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
922        let config: Config = sourced.into();
923
924        assert_eq!(config.global.enable, vec!["MD001"]);
925        assert!(config.global.disable.is_empty()); // Commented out
926
927        let rule_config = config.rules.get("MD013").unwrap();
928        assert_eq!(rule_config.values.len(), 1); // Only line-length
929        assert!(!rule_config.values.contains_key("ignored"));
930    }
931
932    #[test]
933    fn test_arrays_in_rule_config() {
934        let temp_dir = tempdir().unwrap();
935        let config_path = temp_dir.path().join(".rumdl.toml");
936
937        let config_content = r#"
938[MD003]
939levels = [1, 2, 3]
940tags = ["important", "critical"]
941mixed = [1, "two", true]
942"#;
943        fs::write(&config_path, config_content).unwrap();
944
945        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
946        let config: Config = sourced.into();
947
948        // Arrays should now be properly parsed
949        let rule_config = config.rules.get("MD003").expect("MD003 config should exist");
950
951        // Check that arrays are present and correctly parsed
952        assert!(rule_config.values.contains_key("levels"));
953        assert!(rule_config.values.contains_key("tags"));
954        assert!(rule_config.values.contains_key("mixed"));
955
956        // Verify array contents
957        if let Some(toml::Value::Array(levels)) = rule_config.values.get("levels") {
958            assert_eq!(levels.len(), 3);
959            assert_eq!(levels[0], toml::Value::Integer(1));
960            assert_eq!(levels[1], toml::Value::Integer(2));
961            assert_eq!(levels[2], toml::Value::Integer(3));
962        } else {
963            panic!("levels should be an array");
964        }
965
966        if let Some(toml::Value::Array(tags)) = rule_config.values.get("tags") {
967            assert_eq!(tags.len(), 2);
968            assert_eq!(tags[0], toml::Value::String("important".to_string()));
969            assert_eq!(tags[1], toml::Value::String("critical".to_string()));
970        } else {
971            panic!("tags should be an array");
972        }
973
974        if let Some(toml::Value::Array(mixed)) = rule_config.values.get("mixed") {
975            assert_eq!(mixed.len(), 3);
976            assert_eq!(mixed[0], toml::Value::Integer(1));
977            assert_eq!(mixed[1], toml::Value::String("two".to_string()));
978            assert_eq!(mixed[2], toml::Value::Boolean(true));
979        } else {
980            panic!("mixed should be an array");
981        }
982    }
983
984    #[test]
985    fn test_normalize_key_edge_cases() {
986        // Rule names
987        assert_eq!(normalize_key("MD001"), "MD001");
988        assert_eq!(normalize_key("md001"), "MD001");
989        assert_eq!(normalize_key("Md001"), "MD001");
990        assert_eq!(normalize_key("mD001"), "MD001");
991
992        // Non-rule names
993        assert_eq!(normalize_key("line_length"), "line-length");
994        assert_eq!(normalize_key("line-length"), "line-length");
995        assert_eq!(normalize_key("LINE_LENGTH"), "line-length");
996        assert_eq!(normalize_key("respect_gitignore"), "respect-gitignore");
997
998        // Edge cases
999        assert_eq!(normalize_key("MD"), "md"); // Too short to be a rule
1000        assert_eq!(normalize_key("MD00"), "md00"); // Too short
1001        assert_eq!(normalize_key("MD0001"), "md0001"); // Too long
1002        assert_eq!(normalize_key("MDabc"), "mdabc"); // Non-digit
1003        assert_eq!(normalize_key("MD00a"), "md00a"); // Partial digit
1004        assert_eq!(normalize_key(""), "");
1005        assert_eq!(normalize_key("_"), "-");
1006        assert_eq!(normalize_key("___"), "---");
1007    }
1008
1009    #[test]
1010    fn test_missing_config_file() {
1011        let temp_dir = tempdir().unwrap();
1012        let config_path = temp_dir.path().join("nonexistent.toml");
1013
1014        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
1015        assert!(result.is_err());
1016        match result.unwrap_err() {
1017            ConfigError::IoError { .. } => {}
1018            _ => panic!("Expected IoError for missing file"),
1019        }
1020    }
1021
1022    #[test]
1023    #[cfg(unix)]
1024    fn test_permission_denied_config() {
1025        use std::os::unix::fs::PermissionsExt;
1026
1027        let temp_dir = tempdir().unwrap();
1028        let config_path = temp_dir.path().join(".rumdl.toml");
1029
1030        fs::write(&config_path, "enable = [\"MD001\"]").unwrap();
1031
1032        // Remove read permissions
1033        let mut perms = fs::metadata(&config_path).unwrap().permissions();
1034        perms.set_mode(0o000);
1035        fs::set_permissions(&config_path, perms).unwrap();
1036
1037        let result = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true);
1038
1039        // Restore permissions for cleanup
1040        let mut perms = fs::metadata(&config_path).unwrap().permissions();
1041        perms.set_mode(0o644);
1042        fs::set_permissions(&config_path, perms).unwrap();
1043
1044        assert!(result.is_err());
1045        match result.unwrap_err() {
1046            ConfigError::IoError { .. } => {}
1047            _ => panic!("Expected IoError for permission denied"),
1048        }
1049    }
1050
1051    #[test]
1052    fn test_circular_reference_detection() {
1053        // This test is more conceptual since TOML doesn't support circular references
1054        // But we test that deeply nested structures don't cause stack overflow
1055        let temp_dir = tempdir().unwrap();
1056        let config_path = temp_dir.path().join(".rumdl.toml");
1057
1058        let mut config_content = String::from("[MD001]\n");
1059        for i in 0..100 {
1060            config_content.push_str(&format!("key{i} = {i}\n"));
1061        }
1062
1063        fs::write(&config_path, config_content).unwrap();
1064
1065        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1066        let config: Config = sourced.into();
1067
1068        let rule_config = config.rules.get("MD001").unwrap();
1069        assert_eq!(rule_config.values.len(), 100);
1070    }
1071
1072    #[test]
1073    fn test_special_toml_values() {
1074        let temp_dir = tempdir().unwrap();
1075        let config_path = temp_dir.path().join(".rumdl.toml");
1076
1077        let config_content = r#"
1078[MD001]
1079infinity = inf
1080neg_infinity = -inf
1081not_a_number = nan
1082datetime = 1979-05-27T07:32:00Z
1083local_date = 1979-05-27
1084local_time = 07:32:00
1085"#;
1086        fs::write(&config_path, config_content).unwrap();
1087
1088        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1089        let config: Config = sourced.into();
1090
1091        // Some values might not be parsed due to parser limitations
1092        if let Some(rule_config) = config.rules.get("MD001") {
1093            // Check special float values if present
1094            if let Some(toml::Value::Float(f)) = rule_config.values.get("infinity") {
1095                assert!(f.is_infinite() && f.is_sign_positive());
1096            }
1097            if let Some(toml::Value::Float(f)) = rule_config.values.get("neg_infinity") {
1098                assert!(f.is_infinite() && f.is_sign_negative());
1099            }
1100            if let Some(toml::Value::Float(f)) = rule_config.values.get("not_a_number") {
1101                assert!(f.is_nan());
1102            }
1103
1104            // Check datetime values if present
1105            if let Some(val) = rule_config.values.get("datetime") {
1106                assert!(matches!(val, toml::Value::Datetime(_)));
1107            }
1108            // Note: local_date and local_time might not be parsed by the current implementation
1109        }
1110    }
1111
1112    #[test]
1113    fn test_default_config_passes_validation() {
1114        use crate::rules;
1115
1116        let temp_dir = tempdir().unwrap();
1117        let config_path = temp_dir.path().join(".rumdl.toml");
1118        let config_path_str = config_path.to_str().unwrap();
1119
1120        // Create the default config using the same function that `rumdl init` uses
1121        create_default_config(config_path_str).unwrap();
1122
1123        // Load it back as a SourcedConfig
1124        let sourced =
1125            SourcedConfig::load(Some(config_path_str), None).expect("Default config should load successfully");
1126
1127        // Create the rule registry
1128        let all_rules = rules::all_rules(&Config::default());
1129        let registry = RuleRegistry::from_rules(&all_rules);
1130
1131        // Validate the config
1132        let warnings = validate_config_sourced(&sourced, &registry);
1133
1134        // The default config should have no warnings
1135        if !warnings.is_empty() {
1136            for warning in &warnings {
1137                eprintln!("Config validation warning: {}", warning.message);
1138                if let Some(rule) = &warning.rule {
1139                    eprintln!("  Rule: {rule}");
1140                }
1141                if let Some(key) = &warning.key {
1142                    eprintln!("  Key: {key}");
1143                }
1144            }
1145        }
1146        assert!(
1147            warnings.is_empty(),
1148            "Default config from rumdl init should pass validation without warnings"
1149        );
1150    }
1151
1152    #[test]
1153    fn test_per_file_ignores_config_parsing() {
1154        let temp_dir = tempdir().unwrap();
1155        let config_path = temp_dir.path().join(".rumdl.toml");
1156        let config_content = r#"
1157[per-file-ignores]
1158"README.md" = ["MD033"]
1159"docs/**/*.md" = ["MD013", "MD033"]
1160"test/*.md" = ["MD041"]
1161"#;
1162        fs::write(&config_path, config_content).unwrap();
1163
1164        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1165        let config: Config = sourced.into();
1166
1167        // Verify per-file-ignores was loaded
1168        assert_eq!(config.per_file_ignores.len(), 3);
1169        assert_eq!(
1170            config.per_file_ignores.get("README.md"),
1171            Some(&vec!["MD033".to_string()])
1172        );
1173        assert_eq!(
1174            config.per_file_ignores.get("docs/**/*.md"),
1175            Some(&vec!["MD013".to_string(), "MD033".to_string()])
1176        );
1177        assert_eq!(
1178            config.per_file_ignores.get("test/*.md"),
1179            Some(&vec!["MD041".to_string()])
1180        );
1181    }
1182
1183    #[test]
1184    fn test_per_file_ignores_glob_matching() {
1185        use std::path::PathBuf;
1186
1187        let temp_dir = tempdir().unwrap();
1188        let config_path = temp_dir.path().join(".rumdl.toml");
1189        let config_content = r#"
1190[per-file-ignores]
1191"README.md" = ["MD033"]
1192"docs/**/*.md" = ["MD013"]
1193"**/test_*.md" = ["MD041"]
1194"#;
1195        fs::write(&config_path, config_content).unwrap();
1196
1197        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1198        let config: Config = sourced.into();
1199
1200        // Test exact match
1201        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1202        assert!(ignored.contains("MD033"));
1203        assert_eq!(ignored.len(), 1);
1204
1205        // Test glob pattern matching
1206        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("docs/api/overview.md"));
1207        assert!(ignored.contains("MD013"));
1208        assert_eq!(ignored.len(), 1);
1209
1210        // Test recursive glob pattern
1211        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("tests/fixtures/test_example.md"));
1212        assert!(ignored.contains("MD041"));
1213        assert_eq!(ignored.len(), 1);
1214
1215        // Test non-matching path
1216        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("other/file.md"));
1217        assert!(ignored.is_empty());
1218    }
1219
1220    #[test]
1221    fn test_per_file_ignores_pyproject_toml() {
1222        let temp_dir = tempdir().unwrap();
1223        let config_path = temp_dir.path().join("pyproject.toml");
1224        let config_content = r#"
1225[tool.rumdl]
1226[tool.rumdl.per-file-ignores]
1227"README.md" = ["MD033", "MD013"]
1228"generated/*.md" = ["MD041"]
1229"#;
1230        fs::write(&config_path, config_content).unwrap();
1231
1232        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1233        let config: Config = sourced.into();
1234
1235        // Verify per-file-ignores was loaded from pyproject.toml
1236        assert_eq!(config.per_file_ignores.len(), 2);
1237        assert_eq!(
1238            config.per_file_ignores.get("README.md"),
1239            Some(&vec!["MD033".to_string(), "MD013".to_string()])
1240        );
1241        assert_eq!(
1242            config.per_file_ignores.get("generated/*.md"),
1243            Some(&vec!["MD041".to_string()])
1244        );
1245    }
1246
1247    #[test]
1248    fn test_per_file_ignores_multiple_patterns_match() {
1249        use std::path::PathBuf;
1250
1251        let temp_dir = tempdir().unwrap();
1252        let config_path = temp_dir.path().join(".rumdl.toml");
1253        let config_content = r#"
1254[per-file-ignores]
1255"docs/**/*.md" = ["MD013"]
1256"**/api/*.md" = ["MD033"]
1257"docs/api/overview.md" = ["MD041"]
1258"#;
1259        fs::write(&config_path, config_content).unwrap();
1260
1261        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1262        let config: Config = sourced.into();
1263
1264        // File matches multiple patterns - should get union of all rules
1265        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("docs/api/overview.md"));
1266        assert_eq!(ignored.len(), 3);
1267        assert!(ignored.contains("MD013"));
1268        assert!(ignored.contains("MD033"));
1269        assert!(ignored.contains("MD041"));
1270    }
1271
1272    #[test]
1273    fn test_per_file_ignores_rule_name_normalization() {
1274        use std::path::PathBuf;
1275
1276        let temp_dir = tempdir().unwrap();
1277        let config_path = temp_dir.path().join(".rumdl.toml");
1278        let config_content = r#"
1279[per-file-ignores]
1280"README.md" = ["md033", "MD013", "Md041"]
1281"#;
1282        fs::write(&config_path, config_content).unwrap();
1283
1284        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1285        let config: Config = sourced.into();
1286
1287        // All rule names should be normalized to uppercase
1288        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1289        assert_eq!(ignored.len(), 3);
1290        assert!(ignored.contains("MD033"));
1291        assert!(ignored.contains("MD013"));
1292        assert!(ignored.contains("MD041"));
1293    }
1294
1295    #[test]
1296    fn test_per_file_ignores_invalid_glob_pattern() {
1297        use std::path::PathBuf;
1298
1299        let temp_dir = tempdir().unwrap();
1300        let config_path = temp_dir.path().join(".rumdl.toml");
1301        let config_content = r#"
1302[per-file-ignores]
1303"[invalid" = ["MD033"]
1304"valid/*.md" = ["MD013"]
1305"#;
1306        fs::write(&config_path, config_content).unwrap();
1307
1308        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1309        let config: Config = sourced.into();
1310
1311        // Invalid pattern should be skipped, valid pattern should work
1312        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("valid/test.md"));
1313        assert!(ignored.contains("MD013"));
1314
1315        // Invalid pattern should not cause issues
1316        let ignored2 = config.get_ignored_rules_for_file(&PathBuf::from("[invalid"));
1317        assert!(ignored2.is_empty());
1318    }
1319
1320    #[test]
1321    fn test_per_file_ignores_empty_section() {
1322        use std::path::PathBuf;
1323
1324        let temp_dir = tempdir().unwrap();
1325        let config_path = temp_dir.path().join(".rumdl.toml");
1326        let config_content = r#"
1327[global]
1328disable = ["MD001"]
1329
1330[per-file-ignores]
1331"#;
1332        fs::write(&config_path, config_content).unwrap();
1333
1334        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1335        let config: Config = sourced.into();
1336
1337        // Empty per-file-ignores should work fine
1338        assert_eq!(config.per_file_ignores.len(), 0);
1339        let ignored = config.get_ignored_rules_for_file(&PathBuf::from("README.md"));
1340        assert!(ignored.is_empty());
1341    }
1342
1343    #[test]
1344    fn test_per_file_ignores_with_underscores_in_pyproject() {
1345        let temp_dir = tempdir().unwrap();
1346        let config_path = temp_dir.path().join("pyproject.toml");
1347        let config_content = r#"
1348[tool.rumdl]
1349[tool.rumdl.per_file_ignores]
1350"README.md" = ["MD033"]
1351"#;
1352        fs::write(&config_path, config_content).unwrap();
1353
1354        let sourced = SourcedConfig::load_with_discovery(Some(config_path.to_str().unwrap()), None, true).unwrap();
1355        let config: Config = sourced.into();
1356
1357        // Should support both per-file-ignores and per_file_ignores
1358        assert_eq!(config.per_file_ignores.len(), 1);
1359        assert_eq!(
1360            config.per_file_ignores.get("README.md"),
1361            Some(&vec!["MD033".to_string()])
1362        );
1363    }
1364
1365    #[test]
1366    fn test_generate_json_schema() {
1367        use schemars::schema_for;
1368        use std::env;
1369
1370        let schema = schema_for!(Config);
1371        let schema_json = serde_json::to_string_pretty(&schema).expect("Failed to serialize schema");
1372
1373        // Write schema to file if RUMDL_UPDATE_SCHEMA env var is set
1374        if env::var("RUMDL_UPDATE_SCHEMA").is_ok() {
1375            let schema_path = env::current_dir().unwrap().join("rumdl.schema.json");
1376            fs::write(&schema_path, &schema_json).expect("Failed to write schema file");
1377            println!("Schema written to: {}", schema_path.display());
1378        }
1379
1380        // Basic validation that schema was generated
1381        assert!(schema_json.contains("\"title\": \"Config\""));
1382        assert!(schema_json.contains("\"global\""));
1383        assert!(schema_json.contains("\"per-file-ignores\""));
1384    }
1385
1386    #[test]
1387    fn test_user_config_loaded_with_explicit_project_config() {
1388        // Regression test for issue #131: User config should always be loaded as base layer,
1389        // even when an explicit project config path is provided
1390        let temp_dir = tempdir().unwrap();
1391
1392        // Create a fake user config directory
1393        // Note: user_configuration_path_impl adds /rumdl to the config dir
1394        let user_config_dir = temp_dir.path().join("user_config");
1395        let rumdl_config_dir = user_config_dir.join("rumdl");
1396        fs::create_dir_all(&rumdl_config_dir).unwrap();
1397        let user_config_path = rumdl_config_dir.join("rumdl.toml");
1398
1399        // User config disables MD013 and MD041
1400        let user_config_content = r#"
1401[global]
1402disable = ["MD013", "MD041"]
1403line-length = 100
1404"#;
1405        fs::write(&user_config_path, user_config_content).unwrap();
1406
1407        // Create a project config that enables MD001
1408        let project_config_path = temp_dir.path().join("project").join("pyproject.toml");
1409        fs::create_dir_all(project_config_path.parent().unwrap()).unwrap();
1410        let project_config_content = r#"
1411[tool.rumdl]
1412enable = ["MD001"]
1413"#;
1414        fs::write(&project_config_path, project_config_content).unwrap();
1415
1416        // Load config with explicit project path, passing user_config_dir
1417        let sourced = SourcedConfig::load_with_discovery_impl(
1418            Some(project_config_path.to_str().unwrap()),
1419            None,
1420            false,
1421            Some(&user_config_dir),
1422        )
1423        .unwrap();
1424
1425        let config: Config = sourced.into();
1426
1427        // User config settings should be preserved
1428        assert!(
1429            config.global.disable.contains(&"MD013".to_string()),
1430            "User config disabled rules should be preserved"
1431        );
1432        assert!(
1433            config.global.disable.contains(&"MD041".to_string()),
1434            "User config disabled rules should be preserved"
1435        );
1436
1437        // Project config settings should also be applied (merged on top)
1438        assert!(
1439            config.global.enable.contains(&"MD001".to_string()),
1440            "Project config enabled rules should be applied"
1441        );
1442    }
1443}
1444
1445/// Configuration source with clear precedence hierarchy.
1446///
1447/// Precedence order (lower values override higher values):
1448/// - Default (0): Built-in defaults
1449/// - UserConfig (1): User-level ~/.config/rumdl/rumdl.toml
1450/// - PyprojectToml (2): Project-level pyproject.toml
1451/// - ProjectConfig (3): Project-level .rumdl.toml (most specific)
1452/// - Cli (4): Command-line flags (highest priority)
1453#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1454pub enum ConfigSource {
1455    /// Built-in default configuration
1456    Default,
1457    /// User-level configuration from ~/.config/rumdl/rumdl.toml
1458    UserConfig,
1459    /// Project-level configuration from pyproject.toml
1460    PyprojectToml,
1461    /// Project-level configuration from .rumdl.toml or rumdl.toml
1462    ProjectConfig,
1463    /// Command-line flags (highest precedence)
1464    Cli,
1465}
1466
1467#[derive(Debug, Clone)]
1468pub struct ConfigOverride<T> {
1469    pub value: T,
1470    pub source: ConfigSource,
1471    pub file: Option<String>,
1472    pub line: Option<usize>,
1473}
1474
1475#[derive(Debug, Clone)]
1476pub struct SourcedValue<T> {
1477    pub value: T,
1478    pub source: ConfigSource,
1479    pub overrides: Vec<ConfigOverride<T>>,
1480}
1481
1482impl<T: Clone> SourcedValue<T> {
1483    pub fn new(value: T, source: ConfigSource) -> Self {
1484        Self {
1485            value: value.clone(),
1486            source,
1487            overrides: vec![ConfigOverride {
1488                value,
1489                source,
1490                file: None,
1491                line: None,
1492            }],
1493        }
1494    }
1495
1496    /// Merges a new override into this SourcedValue based on source precedence.
1497    /// If the new source has higher or equal precedence, the value and source are updated,
1498    /// and the new override is added to the history.
1499    pub fn merge_override(
1500        &mut self,
1501        new_value: T,
1502        new_source: ConfigSource,
1503        new_file: Option<String>,
1504        new_line: Option<usize>,
1505    ) {
1506        // Helper function to get precedence, defined locally or globally
1507        fn source_precedence(src: ConfigSource) -> u8 {
1508            match src {
1509                ConfigSource::Default => 0,
1510                ConfigSource::UserConfig => 1,
1511                ConfigSource::PyprojectToml => 2,
1512                ConfigSource::ProjectConfig => 3,
1513                ConfigSource::Cli => 4,
1514            }
1515        }
1516
1517        if source_precedence(new_source) >= source_precedence(self.source) {
1518            self.value = new_value.clone();
1519            self.source = new_source;
1520            self.overrides.push(ConfigOverride {
1521                value: new_value,
1522                source: new_source,
1523                file: new_file,
1524                line: new_line,
1525            });
1526        }
1527    }
1528
1529    pub fn push_override(&mut self, value: T, source: ConfigSource, file: Option<String>, line: Option<usize>) {
1530        // This is essentially merge_override without the precedence check
1531        // We might consolidate these later, but keep separate for now during refactor
1532        self.value = value.clone();
1533        self.source = source;
1534        self.overrides.push(ConfigOverride {
1535            value,
1536            source,
1537            file,
1538            line,
1539        });
1540    }
1541}
1542
1543impl<T: Clone + Eq + std::hash::Hash> SourcedValue<Vec<T>> {
1544    /// Merges a new value using union semantics (for arrays like `disable`)
1545    /// Values from both sources are combined, with deduplication
1546    pub fn merge_union(
1547        &mut self,
1548        new_value: Vec<T>,
1549        new_source: ConfigSource,
1550        new_file: Option<String>,
1551        new_line: Option<usize>,
1552    ) {
1553        fn source_precedence(src: ConfigSource) -> u8 {
1554            match src {
1555                ConfigSource::Default => 0,
1556                ConfigSource::UserConfig => 1,
1557                ConfigSource::PyprojectToml => 2,
1558                ConfigSource::ProjectConfig => 3,
1559                ConfigSource::Cli => 4,
1560            }
1561        }
1562
1563        if source_precedence(new_source) >= source_precedence(self.source) {
1564            // Union: combine values from both sources with deduplication
1565            let mut combined = self.value.clone();
1566            for item in new_value.iter() {
1567                if !combined.contains(item) {
1568                    combined.push(item.clone());
1569                }
1570            }
1571
1572            self.value = combined;
1573            self.source = new_source;
1574            self.overrides.push(ConfigOverride {
1575                value: new_value,
1576                source: new_source,
1577                file: new_file,
1578                line: new_line,
1579            });
1580        }
1581    }
1582}
1583
1584#[derive(Debug, Clone)]
1585pub struct SourcedGlobalConfig {
1586    pub enable: SourcedValue<Vec<String>>,
1587    pub disable: SourcedValue<Vec<String>>,
1588    pub exclude: SourcedValue<Vec<String>>,
1589    pub include: SourcedValue<Vec<String>>,
1590    pub respect_gitignore: SourcedValue<bool>,
1591    pub line_length: SourcedValue<LineLength>,
1592    pub output_format: Option<SourcedValue<String>>,
1593    pub fixable: SourcedValue<Vec<String>>,
1594    pub unfixable: SourcedValue<Vec<String>>,
1595    pub flavor: SourcedValue<MarkdownFlavor>,
1596    pub force_exclude: SourcedValue<bool>,
1597    pub cache_dir: Option<SourcedValue<String>>,
1598    pub cache: SourcedValue<bool>,
1599}
1600
1601impl Default for SourcedGlobalConfig {
1602    fn default() -> Self {
1603        SourcedGlobalConfig {
1604            enable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1605            disable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1606            exclude: SourcedValue::new(Vec::new(), ConfigSource::Default),
1607            include: SourcedValue::new(Vec::new(), ConfigSource::Default),
1608            respect_gitignore: SourcedValue::new(true, ConfigSource::Default),
1609            line_length: SourcedValue::new(LineLength::default(), ConfigSource::Default),
1610            output_format: None,
1611            fixable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1612            unfixable: SourcedValue::new(Vec::new(), ConfigSource::Default),
1613            flavor: SourcedValue::new(MarkdownFlavor::default(), ConfigSource::Default),
1614            force_exclude: SourcedValue::new(false, ConfigSource::Default),
1615            cache_dir: None,
1616            cache: SourcedValue::new(true, ConfigSource::Default),
1617        }
1618    }
1619}
1620
1621#[derive(Debug, Default, Clone)]
1622pub struct SourcedRuleConfig {
1623    pub values: BTreeMap<String, SourcedValue<toml::Value>>,
1624}
1625
1626/// Represents configuration loaded from a single source file, with provenance.
1627/// Used as an intermediate step before merging into the final SourcedConfig.
1628#[derive(Debug, Clone)]
1629pub struct SourcedConfigFragment {
1630    pub global: SourcedGlobalConfig,
1631    pub per_file_ignores: SourcedValue<HashMap<String, Vec<String>>>,
1632    pub rules: BTreeMap<String, SourcedRuleConfig>,
1633    pub unknown_keys: Vec<(String, String, Option<String>)>, // (section, key, file_path)
1634                                                             // Note: loaded_files is tracked globally in SourcedConfig.
1635}
1636
1637impl Default for SourcedConfigFragment {
1638    fn default() -> Self {
1639        Self {
1640            global: SourcedGlobalConfig::default(),
1641            per_file_ignores: SourcedValue::new(HashMap::new(), ConfigSource::Default),
1642            rules: BTreeMap::new(),
1643            unknown_keys: Vec::new(),
1644        }
1645    }
1646}
1647
1648#[derive(Debug, Clone)]
1649pub struct SourcedConfig {
1650    pub global: SourcedGlobalConfig,
1651    pub per_file_ignores: SourcedValue<HashMap<String, Vec<String>>>,
1652    pub rules: BTreeMap<String, SourcedRuleConfig>,
1653    pub loaded_files: Vec<String>,
1654    pub unknown_keys: Vec<(String, String, Option<String>)>, // (section, key, file_path)
1655    /// Project root directory (parent of config file), used for resolving relative paths
1656    pub project_root: Option<std::path::PathBuf>,
1657}
1658
1659impl Default for SourcedConfig {
1660    fn default() -> Self {
1661        Self {
1662            global: SourcedGlobalConfig::default(),
1663            per_file_ignores: SourcedValue::new(HashMap::new(), ConfigSource::Default),
1664            rules: BTreeMap::new(),
1665            loaded_files: Vec::new(),
1666            unknown_keys: Vec::new(),
1667            project_root: None,
1668        }
1669    }
1670}
1671
1672impl SourcedConfig {
1673    /// Merges another SourcedConfigFragment into this SourcedConfig.
1674    /// Uses source precedence to determine which values take effect.
1675    fn merge(&mut self, fragment: SourcedConfigFragment) {
1676        // Merge global config
1677        // Enable uses replace semantics (project can enforce rules)
1678        self.global.enable.merge_override(
1679            fragment.global.enable.value,
1680            fragment.global.enable.source,
1681            fragment.global.enable.overrides.first().and_then(|o| o.file.clone()),
1682            fragment.global.enable.overrides.first().and_then(|o| o.line),
1683        );
1684
1685        // Disable uses union semantics (user can add to project disables)
1686        self.global.disable.merge_union(
1687            fragment.global.disable.value,
1688            fragment.global.disable.source,
1689            fragment.global.disable.overrides.first().and_then(|o| o.file.clone()),
1690            fragment.global.disable.overrides.first().and_then(|o| o.line),
1691        );
1692
1693        // Conflict resolution: Enable overrides disable
1694        // Remove any rules from disable that appear in enable
1695        self.global
1696            .disable
1697            .value
1698            .retain(|rule| !self.global.enable.value.contains(rule));
1699        self.global.include.merge_override(
1700            fragment.global.include.value,
1701            fragment.global.include.source,
1702            fragment.global.include.overrides.first().and_then(|o| o.file.clone()),
1703            fragment.global.include.overrides.first().and_then(|o| o.line),
1704        );
1705        self.global.exclude.merge_override(
1706            fragment.global.exclude.value,
1707            fragment.global.exclude.source,
1708            fragment.global.exclude.overrides.first().and_then(|o| o.file.clone()),
1709            fragment.global.exclude.overrides.first().and_then(|o| o.line),
1710        );
1711        self.global.respect_gitignore.merge_override(
1712            fragment.global.respect_gitignore.value,
1713            fragment.global.respect_gitignore.source,
1714            fragment
1715                .global
1716                .respect_gitignore
1717                .overrides
1718                .first()
1719                .and_then(|o| o.file.clone()),
1720            fragment.global.respect_gitignore.overrides.first().and_then(|o| o.line),
1721        );
1722        self.global.line_length.merge_override(
1723            fragment.global.line_length.value,
1724            fragment.global.line_length.source,
1725            fragment
1726                .global
1727                .line_length
1728                .overrides
1729                .first()
1730                .and_then(|o| o.file.clone()),
1731            fragment.global.line_length.overrides.first().and_then(|o| o.line),
1732        );
1733        self.global.fixable.merge_override(
1734            fragment.global.fixable.value,
1735            fragment.global.fixable.source,
1736            fragment.global.fixable.overrides.first().and_then(|o| o.file.clone()),
1737            fragment.global.fixable.overrides.first().and_then(|o| o.line),
1738        );
1739        self.global.unfixable.merge_override(
1740            fragment.global.unfixable.value,
1741            fragment.global.unfixable.source,
1742            fragment.global.unfixable.overrides.first().and_then(|o| o.file.clone()),
1743            fragment.global.unfixable.overrides.first().and_then(|o| o.line),
1744        );
1745
1746        // Merge flavor
1747        self.global.flavor.merge_override(
1748            fragment.global.flavor.value,
1749            fragment.global.flavor.source,
1750            fragment.global.flavor.overrides.first().and_then(|o| o.file.clone()),
1751            fragment.global.flavor.overrides.first().and_then(|o| o.line),
1752        );
1753
1754        // Merge force_exclude
1755        self.global.force_exclude.merge_override(
1756            fragment.global.force_exclude.value,
1757            fragment.global.force_exclude.source,
1758            fragment
1759                .global
1760                .force_exclude
1761                .overrides
1762                .first()
1763                .and_then(|o| o.file.clone()),
1764            fragment.global.force_exclude.overrides.first().and_then(|o| o.line),
1765        );
1766
1767        // Merge output_format if present
1768        if let Some(output_format_fragment) = fragment.global.output_format {
1769            if let Some(ref mut output_format) = self.global.output_format {
1770                output_format.merge_override(
1771                    output_format_fragment.value,
1772                    output_format_fragment.source,
1773                    output_format_fragment.overrides.first().and_then(|o| o.file.clone()),
1774                    output_format_fragment.overrides.first().and_then(|o| o.line),
1775                );
1776            } else {
1777                self.global.output_format = Some(output_format_fragment);
1778            }
1779        }
1780
1781        // Merge cache_dir if present
1782        if let Some(cache_dir_fragment) = fragment.global.cache_dir {
1783            if let Some(ref mut cache_dir) = self.global.cache_dir {
1784                cache_dir.merge_override(
1785                    cache_dir_fragment.value,
1786                    cache_dir_fragment.source,
1787                    cache_dir_fragment.overrides.first().and_then(|o| o.file.clone()),
1788                    cache_dir_fragment.overrides.first().and_then(|o| o.line),
1789                );
1790            } else {
1791                self.global.cache_dir = Some(cache_dir_fragment);
1792            }
1793        }
1794
1795        // Merge cache if not default (only override when explicitly set)
1796        if fragment.global.cache.source != ConfigSource::Default {
1797            self.global.cache.merge_override(
1798                fragment.global.cache.value,
1799                fragment.global.cache.source,
1800                fragment.global.cache.overrides.first().and_then(|o| o.file.clone()),
1801                fragment.global.cache.overrides.first().and_then(|o| o.line),
1802            );
1803        }
1804
1805        // Merge per_file_ignores
1806        self.per_file_ignores.merge_override(
1807            fragment.per_file_ignores.value,
1808            fragment.per_file_ignores.source,
1809            fragment.per_file_ignores.overrides.first().and_then(|o| o.file.clone()),
1810            fragment.per_file_ignores.overrides.first().and_then(|o| o.line),
1811        );
1812
1813        // Merge rule configs
1814        for (rule_name, rule_fragment) in fragment.rules {
1815            let norm_rule_name = rule_name.to_ascii_uppercase(); // Normalize to uppercase for case-insensitivity
1816            let rule_entry = self.rules.entry(norm_rule_name).or_default();
1817            for (key, sourced_value_fragment) in rule_fragment.values {
1818                let sv_entry = rule_entry
1819                    .values
1820                    .entry(key.clone())
1821                    .or_insert_with(|| SourcedValue::new(sourced_value_fragment.value.clone(), ConfigSource::Default));
1822                let file_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.file.clone());
1823                let line_from_fragment = sourced_value_fragment.overrides.first().and_then(|o| o.line);
1824                sv_entry.merge_override(
1825                    sourced_value_fragment.value,  // Use the value from the fragment
1826                    sourced_value_fragment.source, // Use the source from the fragment
1827                    file_from_fragment,            // Pass the file path from the fragment override
1828                    line_from_fragment,            // Pass the line number from the fragment override
1829                );
1830            }
1831        }
1832
1833        // Merge unknown_keys from fragment
1834        for (section, key, file_path) in fragment.unknown_keys {
1835            // Deduplicate: only add if not already present
1836            if !self.unknown_keys.iter().any(|(s, k, _)| s == &section && k == &key) {
1837                self.unknown_keys.push((section, key, file_path));
1838            }
1839        }
1840    }
1841
1842    /// Load and merge configurations from files and CLI overrides.
1843    pub fn load(config_path: Option<&str>, cli_overrides: Option<&SourcedGlobalConfig>) -> Result<Self, ConfigError> {
1844        Self::load_with_discovery(config_path, cli_overrides, false)
1845    }
1846
1847    /// Finds project root by walking up from start_dir looking for .git directory.
1848    /// Falls back to start_dir if no .git found.
1849    fn find_project_root_from(start_dir: &Path) -> std::path::PathBuf {
1850        let mut current = start_dir.to_path_buf();
1851        const MAX_DEPTH: usize = 100;
1852
1853        for _ in 0..MAX_DEPTH {
1854            if current.join(".git").exists() {
1855                log::debug!("[rumdl-config] Found .git at: {}", current.display());
1856                return current;
1857            }
1858
1859            match current.parent() {
1860                Some(parent) => current = parent.to_path_buf(),
1861                None => break,
1862            }
1863        }
1864
1865        // No .git found, use start_dir as project root
1866        log::debug!(
1867            "[rumdl-config] No .git found, using config location as project root: {}",
1868            start_dir.display()
1869        );
1870        start_dir.to_path_buf()
1871    }
1872
1873    /// Discover configuration file by traversing up the directory tree.
1874    /// Returns the first configuration file found.
1875    /// Discovers config file and returns both the config path and project root.
1876    /// Returns: (config_file_path, project_root_path)
1877    /// Project root is the directory containing .git, or config parent as fallback.
1878    fn discover_config_upward() -> Option<(std::path::PathBuf, std::path::PathBuf)> {
1879        use std::env;
1880
1881        const CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", ".config/rumdl.toml", "pyproject.toml"];
1882        const MAX_DEPTH: usize = 100; // Prevent infinite traversal
1883
1884        let start_dir = match env::current_dir() {
1885            Ok(dir) => dir,
1886            Err(e) => {
1887                log::debug!("[rumdl-config] Failed to get current directory: {e}");
1888                return None;
1889            }
1890        };
1891
1892        let mut current_dir = start_dir.clone();
1893        let mut depth = 0;
1894        let mut found_config: Option<(std::path::PathBuf, std::path::PathBuf)> = None;
1895
1896        loop {
1897            if depth >= MAX_DEPTH {
1898                log::debug!("[rumdl-config] Maximum traversal depth reached");
1899                break;
1900            }
1901
1902            log::debug!("[rumdl-config] Searching for config in: {}", current_dir.display());
1903
1904            // Check for config files in order of precedence (only if not already found)
1905            if found_config.is_none() {
1906                for config_name in CONFIG_FILES {
1907                    let config_path = current_dir.join(config_name);
1908
1909                    if config_path.exists() {
1910                        // For pyproject.toml, verify it contains [tool.rumdl] section
1911                        if *config_name == "pyproject.toml" {
1912                            if let Ok(content) = std::fs::read_to_string(&config_path) {
1913                                if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
1914                                    log::debug!("[rumdl-config] Found config file: {}", config_path.display());
1915                                    // Store config, but continue looking for .git
1916                                    found_config = Some((config_path.clone(), current_dir.clone()));
1917                                    break;
1918                                }
1919                                log::debug!("[rumdl-config] Found pyproject.toml but no [tool.rumdl] section");
1920                                continue;
1921                            }
1922                        } else {
1923                            log::debug!("[rumdl-config] Found config file: {}", config_path.display());
1924                            // Store config, but continue looking for .git
1925                            found_config = Some((config_path.clone(), current_dir.clone()));
1926                            break;
1927                        }
1928                    }
1929                }
1930            }
1931
1932            // Check for .git directory (stop boundary)
1933            if current_dir.join(".git").exists() {
1934                log::debug!("[rumdl-config] Stopping at .git directory");
1935                break;
1936            }
1937
1938            // Move to parent directory
1939            match current_dir.parent() {
1940                Some(parent) => {
1941                    current_dir = parent.to_owned();
1942                    depth += 1;
1943                }
1944                None => {
1945                    log::debug!("[rumdl-config] Reached filesystem root");
1946                    break;
1947                }
1948            }
1949        }
1950
1951        // If config found, determine project root by walking up from config location
1952        if let Some((config_path, config_dir)) = found_config {
1953            let project_root = Self::find_project_root_from(&config_dir);
1954            return Some((config_path, project_root));
1955        }
1956
1957        None
1958    }
1959
1960    /// Internal implementation that accepts config directory for testing
1961    fn user_configuration_path_impl(config_dir: &Path) -> Option<std::path::PathBuf> {
1962        let config_dir = config_dir.join("rumdl");
1963
1964        // Check for config files in precedence order (same as project discovery)
1965        const USER_CONFIG_FILES: &[&str] = &[".rumdl.toml", "rumdl.toml", "pyproject.toml"];
1966
1967        log::debug!(
1968            "[rumdl-config] Checking for user configuration in: {}",
1969            config_dir.display()
1970        );
1971
1972        for filename in USER_CONFIG_FILES {
1973            let config_path = config_dir.join(filename);
1974
1975            if config_path.exists() {
1976                // For pyproject.toml, verify it contains [tool.rumdl] section
1977                if *filename == "pyproject.toml" {
1978                    if let Ok(content) = std::fs::read_to_string(&config_path) {
1979                        if content.contains("[tool.rumdl]") || content.contains("tool.rumdl") {
1980                            log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
1981                            return Some(config_path);
1982                        }
1983                        log::debug!("[rumdl-config] Found user pyproject.toml but no [tool.rumdl] section");
1984                        continue;
1985                    }
1986                } else {
1987                    log::debug!("[rumdl-config] Found user configuration at: {}", config_path.display());
1988                    return Some(config_path);
1989                }
1990            }
1991        }
1992
1993        log::debug!(
1994            "[rumdl-config] No user configuration found in: {}",
1995            config_dir.display()
1996        );
1997        None
1998    }
1999
2000    /// Discover user-level configuration file from platform-specific config directory.
2001    /// Returns the first configuration file found in the user config directory.
2002    #[cfg(feature = "native")]
2003    fn user_configuration_path() -> Option<std::path::PathBuf> {
2004        use etcetera::{BaseStrategy, choose_base_strategy};
2005
2006        match choose_base_strategy() {
2007            Ok(strategy) => {
2008                let config_dir = strategy.config_dir();
2009                Self::user_configuration_path_impl(&config_dir)
2010            }
2011            Err(e) => {
2012                log::debug!("[rumdl-config] Failed to determine user config directory: {e}");
2013                None
2014            }
2015        }
2016    }
2017
2018    /// Stub for WASM builds - user config not supported
2019    #[cfg(not(feature = "native"))]
2020    fn user_configuration_path() -> Option<std::path::PathBuf> {
2021        None
2022    }
2023
2024    /// Internal implementation that accepts user config directory for testing
2025    #[doc(hidden)]
2026    pub fn load_with_discovery_impl(
2027        config_path: Option<&str>,
2028        cli_overrides: Option<&SourcedGlobalConfig>,
2029        skip_auto_discovery: bool,
2030        user_config_dir: Option<&Path>,
2031    ) -> Result<Self, ConfigError> {
2032        use std::env;
2033        log::debug!("[rumdl-config] Current working directory: {:?}", env::current_dir());
2034        if config_path.is_none() {
2035            if skip_auto_discovery {
2036                log::debug!("[rumdl-config] Skipping auto-discovery due to --no-config flag");
2037            } else {
2038                log::debug!("[rumdl-config] No explicit config_path provided, will search default locations");
2039            }
2040        } else {
2041            log::debug!("[rumdl-config] Explicit config_path provided: {config_path:?}");
2042        }
2043        let mut sourced_config = SourcedConfig::default();
2044
2045        // 1. Always load user configuration first (unless auto-discovery is disabled)
2046        // User config serves as the base layer that project configs build upon
2047        if !skip_auto_discovery {
2048            let user_config_path = if let Some(dir) = user_config_dir {
2049                Self::user_configuration_path_impl(dir)
2050            } else {
2051                Self::user_configuration_path()
2052            };
2053
2054            if let Some(user_config_path) = user_config_path {
2055                let path_str = user_config_path.display().to_string();
2056                let filename = user_config_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
2057
2058                log::debug!("[rumdl-config] Loading user configuration file: {path_str}");
2059
2060                if filename == "pyproject.toml" {
2061                    let content = std::fs::read_to_string(&user_config_path).map_err(|e| ConfigError::IoError {
2062                        source: e,
2063                        path: path_str.clone(),
2064                    })?;
2065                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
2066                        sourced_config.merge(fragment);
2067                        sourced_config.loaded_files.push(path_str);
2068                    }
2069                } else {
2070                    let content = std::fs::read_to_string(&user_config_path).map_err(|e| ConfigError::IoError {
2071                        source: e,
2072                        path: path_str.clone(),
2073                    })?;
2074                    let fragment = parse_rumdl_toml(&content, &path_str, ConfigSource::UserConfig)?;
2075                    sourced_config.merge(fragment);
2076                    sourced_config.loaded_files.push(path_str);
2077                }
2078            } else {
2079                log::debug!("[rumdl-config] No user configuration file found");
2080            }
2081        }
2082
2083        // 2. Load explicit config path if provided (overrides user config)
2084        if let Some(path) = config_path {
2085            let path_obj = Path::new(path);
2086            let filename = path_obj.file_name().and_then(|name| name.to_str()).unwrap_or("");
2087            log::debug!("[rumdl-config] Trying to load config file: {filename}");
2088            let path_str = path.to_string();
2089
2090            // Find project root by walking up from config location looking for .git
2091            if let Some(config_parent) = path_obj.parent() {
2092                let project_root = Self::find_project_root_from(config_parent);
2093                log::debug!(
2094                    "[rumdl-config] Project root (from explicit config): {}",
2095                    project_root.display()
2096                );
2097                sourced_config.project_root = Some(project_root);
2098            }
2099
2100            // Known markdownlint config files
2101            const MARKDOWNLINT_FILENAMES: &[&str] = &[".markdownlint.json", ".markdownlint.yaml", ".markdownlint.yml"];
2102
2103            if filename == "pyproject.toml" || filename == ".rumdl.toml" || filename == "rumdl.toml" {
2104                let content = std::fs::read_to_string(path).map_err(|e| ConfigError::IoError {
2105                    source: e,
2106                    path: path_str.clone(),
2107                })?;
2108                if filename == "pyproject.toml" {
2109                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
2110                        sourced_config.merge(fragment);
2111                        sourced_config.loaded_files.push(path_str.clone());
2112                    }
2113                } else {
2114                    let fragment = parse_rumdl_toml(&content, &path_str, ConfigSource::ProjectConfig)?;
2115                    sourced_config.merge(fragment);
2116                    sourced_config.loaded_files.push(path_str.clone());
2117                }
2118            } else if MARKDOWNLINT_FILENAMES.contains(&filename)
2119                || path_str.ends_with(".json")
2120                || path_str.ends_with(".jsonc")
2121                || path_str.ends_with(".yaml")
2122                || path_str.ends_with(".yml")
2123            {
2124                // Parse as markdownlint config (JSON/YAML)
2125                let fragment = load_from_markdownlint(&path_str)?;
2126                sourced_config.merge(fragment);
2127                sourced_config.loaded_files.push(path_str.clone());
2128                // markdownlint is fallback only
2129            } else {
2130                // Try TOML only
2131                let content = std::fs::read_to_string(path).map_err(|e| ConfigError::IoError {
2132                    source: e,
2133                    path: path_str.clone(),
2134                })?;
2135                let fragment = parse_rumdl_toml(&content, &path_str, ConfigSource::ProjectConfig)?;
2136                sourced_config.merge(fragment);
2137                sourced_config.loaded_files.push(path_str.clone());
2138            }
2139        }
2140
2141        // 3. Perform auto-discovery for project config if not skipped AND no explicit config path
2142        if !skip_auto_discovery && config_path.is_none() {
2143            // Look for project configuration files (override user config)
2144            if let Some((config_file, project_root)) = Self::discover_config_upward() {
2145                let path_str = config_file.display().to_string();
2146                let filename = config_file.file_name().and_then(|n| n.to_str()).unwrap_or("");
2147
2148                log::debug!("[rumdl-config] Loading discovered config file: {path_str}");
2149                log::debug!("[rumdl-config] Project root: {}", project_root.display());
2150
2151                // Store project root for cache directory resolution
2152                sourced_config.project_root = Some(project_root);
2153
2154                if filename == "pyproject.toml" {
2155                    let content = std::fs::read_to_string(&config_file).map_err(|e| ConfigError::IoError {
2156                        source: e,
2157                        path: path_str.clone(),
2158                    })?;
2159                    if let Some(fragment) = parse_pyproject_toml(&content, &path_str)? {
2160                        sourced_config.merge(fragment);
2161                        sourced_config.loaded_files.push(path_str);
2162                    }
2163                } else if filename == ".rumdl.toml" || filename == "rumdl.toml" {
2164                    let content = std::fs::read_to_string(&config_file).map_err(|e| ConfigError::IoError {
2165                        source: e,
2166                        path: path_str.clone(),
2167                    })?;
2168                    let fragment = parse_rumdl_toml(&content, &path_str, ConfigSource::ProjectConfig)?;
2169                    sourced_config.merge(fragment);
2170                    sourced_config.loaded_files.push(path_str);
2171                }
2172            } else {
2173                log::debug!("[rumdl-config] No configuration file found via upward traversal");
2174
2175                // If no project config found, fallback to markdownlint config in current directory
2176                let mut found_markdownlint = false;
2177                for filename in MARKDOWNLINT_CONFIG_FILES {
2178                    if std::path::Path::new(filename).exists() {
2179                        match load_from_markdownlint(filename) {
2180                            Ok(fragment) => {
2181                                sourced_config.merge(fragment);
2182                                sourced_config.loaded_files.push(filename.to_string());
2183                                found_markdownlint = true;
2184                                break; // Load only the first one found
2185                            }
2186                            Err(_e) => {
2187                                // Log error but continue (it's just a fallback)
2188                            }
2189                        }
2190                    }
2191                }
2192
2193                if !found_markdownlint {
2194                    log::debug!("[rumdl-config] No markdownlint configuration file found");
2195                }
2196            }
2197        }
2198
2199        // 4. Apply CLI overrides (highest precedence)
2200        if let Some(cli) = cli_overrides {
2201            sourced_config
2202                .global
2203                .enable
2204                .merge_override(cli.enable.value.clone(), ConfigSource::Cli, None, None);
2205            sourced_config
2206                .global
2207                .disable
2208                .merge_override(cli.disable.value.clone(), ConfigSource::Cli, None, None);
2209            sourced_config
2210                .global
2211                .exclude
2212                .merge_override(cli.exclude.value.clone(), ConfigSource::Cli, None, None);
2213            sourced_config
2214                .global
2215                .include
2216                .merge_override(cli.include.value.clone(), ConfigSource::Cli, None, None);
2217            sourced_config.global.respect_gitignore.merge_override(
2218                cli.respect_gitignore.value,
2219                ConfigSource::Cli,
2220                None,
2221                None,
2222            );
2223            sourced_config
2224                .global
2225                .fixable
2226                .merge_override(cli.fixable.value.clone(), ConfigSource::Cli, None, None);
2227            sourced_config
2228                .global
2229                .unfixable
2230                .merge_override(cli.unfixable.value.clone(), ConfigSource::Cli, None, None);
2231            // No rule-specific CLI overrides implemented yet
2232        }
2233
2234        // Unknown keys are now collected during parsing and validated via validate_config_sourced()
2235
2236        Ok(sourced_config)
2237    }
2238
2239    /// Load and merge configurations from files and CLI overrides.
2240    /// If skip_auto_discovery is true, only explicit config paths are loaded.
2241    pub fn load_with_discovery(
2242        config_path: Option<&str>,
2243        cli_overrides: Option<&SourcedGlobalConfig>,
2244        skip_auto_discovery: bool,
2245    ) -> Result<Self, ConfigError> {
2246        Self::load_with_discovery_impl(config_path, cli_overrides, skip_auto_discovery, None)
2247    }
2248}
2249
2250impl From<SourcedConfig> for Config {
2251    fn from(sourced: SourcedConfig) -> Self {
2252        let mut rules = BTreeMap::new();
2253        for (rule_name, sourced_rule_cfg) in sourced.rules {
2254            // Normalize rule name to uppercase for case-insensitive lookup
2255            let normalized_rule_name = rule_name.to_ascii_uppercase();
2256            let mut values = BTreeMap::new();
2257            for (key, sourced_val) in sourced_rule_cfg.values {
2258                values.insert(key, sourced_val.value);
2259            }
2260            rules.insert(normalized_rule_name, RuleConfig { values });
2261        }
2262        #[allow(deprecated)]
2263        let global = GlobalConfig {
2264            enable: sourced.global.enable.value,
2265            disable: sourced.global.disable.value,
2266            exclude: sourced.global.exclude.value,
2267            include: sourced.global.include.value,
2268            respect_gitignore: sourced.global.respect_gitignore.value,
2269            line_length: sourced.global.line_length.value,
2270            output_format: sourced.global.output_format.as_ref().map(|v| v.value.clone()),
2271            fixable: sourced.global.fixable.value,
2272            unfixable: sourced.global.unfixable.value,
2273            flavor: sourced.global.flavor.value,
2274            force_exclude: sourced.global.force_exclude.value,
2275            cache_dir: sourced.global.cache_dir.as_ref().map(|v| v.value.clone()),
2276            cache: sourced.global.cache.value,
2277        };
2278        Config {
2279            global,
2280            per_file_ignores: sourced.per_file_ignores.value,
2281            rules,
2282        }
2283    }
2284}
2285
2286/// Registry of all known rules and their config schemas
2287pub struct RuleRegistry {
2288    /// Map of rule name (e.g. "MD013") to set of valid config keys and their TOML value types
2289    pub rule_schemas: std::collections::BTreeMap<String, toml::map::Map<String, toml::Value>>,
2290    /// Map of rule name to config key aliases
2291    pub rule_aliases: std::collections::BTreeMap<String, std::collections::HashMap<String, String>>,
2292}
2293
2294impl RuleRegistry {
2295    /// Build a registry from a list of rules
2296    pub fn from_rules(rules: &[Box<dyn Rule>]) -> Self {
2297        let mut rule_schemas = std::collections::BTreeMap::new();
2298        let mut rule_aliases = std::collections::BTreeMap::new();
2299
2300        for rule in rules {
2301            let norm_name = if let Some((name, toml::Value::Table(table))) = rule.default_config_section() {
2302                let norm_name = normalize_key(&name); // Normalize the name from default_config_section
2303                rule_schemas.insert(norm_name.clone(), table);
2304                norm_name
2305            } else {
2306                let norm_name = normalize_key(rule.name()); // Normalize the name from rule.name()
2307                rule_schemas.insert(norm_name.clone(), toml::map::Map::new());
2308                norm_name
2309            };
2310
2311            // Store aliases if the rule provides them
2312            if let Some(aliases) = rule.config_aliases() {
2313                rule_aliases.insert(norm_name, aliases);
2314            }
2315        }
2316
2317        RuleRegistry {
2318            rule_schemas,
2319            rule_aliases,
2320        }
2321    }
2322
2323    /// Get all known rule names
2324    pub fn rule_names(&self) -> std::collections::BTreeSet<String> {
2325        self.rule_schemas.keys().cloned().collect()
2326    }
2327
2328    /// Get the valid configuration keys for a rule, including both original and normalized variants
2329    pub fn config_keys_for(&self, rule: &str) -> Option<std::collections::BTreeSet<String>> {
2330        self.rule_schemas.get(rule).map(|schema| {
2331            let mut all_keys = std::collections::BTreeSet::new();
2332
2333            // Add original keys from schema
2334            for key in schema.keys() {
2335                all_keys.insert(key.clone());
2336            }
2337
2338            // Add normalized variants for markdownlint compatibility
2339            for key in schema.keys() {
2340                // Add kebab-case variant
2341                all_keys.insert(key.replace('_', "-"));
2342                // Add snake_case variant
2343                all_keys.insert(key.replace('-', "_"));
2344                // Add normalized variant
2345                all_keys.insert(normalize_key(key));
2346            }
2347
2348            // Add any aliases defined by the rule
2349            if let Some(aliases) = self.rule_aliases.get(rule) {
2350                for alias_key in aliases.keys() {
2351                    all_keys.insert(alias_key.clone());
2352                    // Also add normalized variants of the alias
2353                    all_keys.insert(alias_key.replace('_', "-"));
2354                    all_keys.insert(alias_key.replace('-', "_"));
2355                    all_keys.insert(normalize_key(alias_key));
2356                }
2357            }
2358
2359            all_keys
2360        })
2361    }
2362
2363    /// Get the expected value type for a rule's configuration key, trying variants
2364    pub fn expected_value_for(&self, rule: &str, key: &str) -> Option<&toml::Value> {
2365        if let Some(schema) = self.rule_schemas.get(rule) {
2366            // Check if this key is an alias
2367            if let Some(aliases) = self.rule_aliases.get(rule)
2368                && let Some(canonical_key) = aliases.get(key)
2369            {
2370                // Use the canonical key for schema lookup
2371                if let Some(value) = schema.get(canonical_key) {
2372                    return Some(value);
2373                }
2374            }
2375
2376            // Try the original key
2377            if let Some(value) = schema.get(key) {
2378                return Some(value);
2379            }
2380
2381            // Try key variants
2382            let key_variants = [
2383                key.replace('-', "_"), // Convert kebab-case to snake_case
2384                key.replace('_', "-"), // Convert snake_case to kebab-case
2385                normalize_key(key),    // Normalized key (lowercase, kebab-case)
2386            ];
2387
2388            for variant in &key_variants {
2389                if let Some(value) = schema.get(variant) {
2390                    return Some(value);
2391                }
2392            }
2393        }
2394        None
2395    }
2396}
2397
2398/// Represents a config validation warning or error
2399#[derive(Debug, Clone)]
2400pub struct ConfigValidationWarning {
2401    pub message: String,
2402    pub rule: Option<String>,
2403    pub key: Option<String>,
2404}
2405
2406/// Validate a loaded config against the rule registry, using SourcedConfig for unknown key tracking
2407pub fn validate_config_sourced(sourced: &SourcedConfig, registry: &RuleRegistry) -> Vec<ConfigValidationWarning> {
2408    let mut warnings = Vec::new();
2409    let known_rules = registry.rule_names();
2410    // 1. Unknown rules
2411    for rule in sourced.rules.keys() {
2412        if !known_rules.contains(rule) {
2413            warnings.push(ConfigValidationWarning {
2414                message: format!("Unknown rule in config: {rule}"),
2415                rule: Some(rule.clone()),
2416                key: None,
2417            });
2418        }
2419    }
2420    // 2. Unknown options and type mismatches
2421    for (rule, rule_cfg) in &sourced.rules {
2422        if let Some(valid_keys) = registry.config_keys_for(rule) {
2423            for key in rule_cfg.values.keys() {
2424                if !valid_keys.contains(key) {
2425                    let valid_keys_vec: Vec<String> = valid_keys.iter().cloned().collect();
2426                    let message = if let Some(suggestion) = suggest_similar_key(key, &valid_keys_vec) {
2427                        format!("Unknown option for rule {rule}: {key} (did you mean: {suggestion}?)")
2428                    } else {
2429                        format!("Unknown option for rule {rule}: {key}")
2430                    };
2431                    warnings.push(ConfigValidationWarning {
2432                        message,
2433                        rule: Some(rule.clone()),
2434                        key: Some(key.clone()),
2435                    });
2436                } else {
2437                    // Type check: compare type of value to type of default
2438                    if let Some(expected) = registry.expected_value_for(rule, key) {
2439                        let actual = &rule_cfg.values[key].value;
2440                        if !toml_value_type_matches(expected, actual) {
2441                            warnings.push(ConfigValidationWarning {
2442                                message: format!(
2443                                    "Type mismatch for {}.{}: expected {}, got {}",
2444                                    rule,
2445                                    key,
2446                                    toml_type_name(expected),
2447                                    toml_type_name(actual)
2448                                ),
2449                                rule: Some(rule.clone()),
2450                                key: Some(key.clone()),
2451                            });
2452                        }
2453                    }
2454                }
2455            }
2456        }
2457    }
2458    // 3. Unknown global options (from unknown_keys)
2459    let known_global_keys = vec![
2460        "enable".to_string(),
2461        "disable".to_string(),
2462        "include".to_string(),
2463        "exclude".to_string(),
2464        "respect-gitignore".to_string(),
2465        "line-length".to_string(),
2466        "fixable".to_string(),
2467        "unfixable".to_string(),
2468        "flavor".to_string(),
2469        "force-exclude".to_string(),
2470        "output-format".to_string(),
2471        "cache-dir".to_string(),
2472        "cache".to_string(),
2473    ];
2474
2475    for (section, key, file_path) in &sourced.unknown_keys {
2476        if section.contains("[global]") || section.contains("[tool.rumdl]") {
2477            let message = if let Some(suggestion) = suggest_similar_key(key, &known_global_keys) {
2478                if let Some(path) = file_path {
2479                    format!("Unknown global option in {path}: {key} (did you mean: {suggestion}?)")
2480                } else {
2481                    format!("Unknown global option: {key} (did you mean: {suggestion}?)")
2482                }
2483            } else if let Some(path) = file_path {
2484                format!("Unknown global option in {path}: {key}")
2485            } else {
2486                format!("Unknown global option: {key}")
2487            };
2488            warnings.push(ConfigValidationWarning {
2489                message,
2490                rule: None,
2491                key: Some(key.clone()),
2492            });
2493        } else if !key.is_empty() {
2494            // This is an unknown rule section (key is empty means it's a section header)
2495            // No suggestions for rule names - just warn
2496            continue;
2497        } else {
2498            // Unknown rule section
2499            let message = if let Some(path) = file_path {
2500                format!(
2501                    "Unknown rule in {path}: {}",
2502                    section.trim_matches(|c| c == '[' || c == ']')
2503                )
2504            } else {
2505                format!(
2506                    "Unknown rule in config: {}",
2507                    section.trim_matches(|c| c == '[' || c == ']')
2508                )
2509            };
2510            warnings.push(ConfigValidationWarning {
2511                message,
2512                rule: None,
2513                key: None,
2514            });
2515        }
2516    }
2517    warnings
2518}
2519
2520fn toml_type_name(val: &toml::Value) -> &'static str {
2521    match val {
2522        toml::Value::String(_) => "string",
2523        toml::Value::Integer(_) => "integer",
2524        toml::Value::Float(_) => "float",
2525        toml::Value::Boolean(_) => "boolean",
2526        toml::Value::Array(_) => "array",
2527        toml::Value::Table(_) => "table",
2528        toml::Value::Datetime(_) => "datetime",
2529    }
2530}
2531
2532/// Calculate Levenshtein distance between two strings (simple implementation)
2533fn levenshtein_distance(s1: &str, s2: &str) -> usize {
2534    let len1 = s1.len();
2535    let len2 = s2.len();
2536
2537    if len1 == 0 {
2538        return len2;
2539    }
2540    if len2 == 0 {
2541        return len1;
2542    }
2543
2544    let s1_chars: Vec<char> = s1.chars().collect();
2545    let s2_chars: Vec<char> = s2.chars().collect();
2546
2547    let mut prev_row: Vec<usize> = (0..=len2).collect();
2548    let mut curr_row = vec![0; len2 + 1];
2549
2550    for i in 1..=len1 {
2551        curr_row[0] = i;
2552        for j in 1..=len2 {
2553            let cost = if s1_chars[i - 1] == s2_chars[j - 1] { 0 } else { 1 };
2554            curr_row[j] = (prev_row[j] + 1)          // deletion
2555                .min(curr_row[j - 1] + 1)            // insertion
2556                .min(prev_row[j - 1] + cost); // substitution
2557        }
2558        std::mem::swap(&mut prev_row, &mut curr_row);
2559    }
2560
2561    prev_row[len2]
2562}
2563
2564/// Suggest a similar key from a list of valid keys using fuzzy matching
2565fn suggest_similar_key(unknown: &str, valid_keys: &[String]) -> Option<String> {
2566    let unknown_lower = unknown.to_lowercase();
2567    let max_distance = 2.max(unknown.len() / 3); // Allow up to 2 edits or 30% of string length
2568
2569    let mut best_match: Option<(String, usize)> = None;
2570
2571    for valid in valid_keys {
2572        let valid_lower = valid.to_lowercase();
2573        let distance = levenshtein_distance(&unknown_lower, &valid_lower);
2574
2575        if distance <= max_distance {
2576            if let Some((_, best_dist)) = &best_match {
2577                if distance < *best_dist {
2578                    best_match = Some((valid.clone(), distance));
2579                }
2580            } else {
2581                best_match = Some((valid.clone(), distance));
2582            }
2583        }
2584    }
2585
2586    best_match.map(|(key, _)| key)
2587}
2588
2589fn toml_value_type_matches(expected: &toml::Value, actual: &toml::Value) -> bool {
2590    use toml::Value::*;
2591    match (expected, actual) {
2592        (String(_), String(_)) => true,
2593        (Integer(_), Integer(_)) => true,
2594        (Float(_), Float(_)) => true,
2595        (Boolean(_), Boolean(_)) => true,
2596        (Array(_), Array(_)) => true,
2597        (Table(_), Table(_)) => true,
2598        (Datetime(_), Datetime(_)) => true,
2599        // Allow integer for float
2600        (Float(_), Integer(_)) => true,
2601        _ => false,
2602    }
2603}
2604
2605/// Parses pyproject.toml content and extracts the [tool.rumdl] section if present.
2606fn parse_pyproject_toml(content: &str, path: &str) -> Result<Option<SourcedConfigFragment>, ConfigError> {
2607    let doc: toml::Value =
2608        toml::from_str(content).map_err(|e| ConfigError::ParseError(format!("{path}: Failed to parse TOML: {e}")))?;
2609    let mut fragment = SourcedConfigFragment::default();
2610    let source = ConfigSource::PyprojectToml;
2611    let file = Some(path.to_string());
2612
2613    // 1. Handle [tool.rumdl] and [tool.rumdl.global] sections
2614    if let Some(rumdl_config) = doc.get("tool").and_then(|t| t.get("rumdl"))
2615        && let Some(rumdl_table) = rumdl_config.as_table()
2616    {
2617        // Helper function to extract global config from a table
2618        let extract_global_config = |fragment: &mut SourcedConfigFragment, table: &toml::value::Table| {
2619            // Extract global options from the given table
2620            if let Some(enable) = table.get("enable")
2621                && let Ok(values) = Vec::<String>::deserialize(enable.clone())
2622            {
2623                // Normalize rule names in the list
2624                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2625                fragment
2626                    .global
2627                    .enable
2628                    .push_override(normalized_values, source, file.clone(), None);
2629            }
2630
2631            if let Some(disable) = table.get("disable")
2632                && let Ok(values) = Vec::<String>::deserialize(disable.clone())
2633            {
2634                // Re-enable normalization
2635                let normalized_values: Vec<String> = values.into_iter().map(|s| normalize_key(&s)).collect();
2636                fragment
2637                    .global
2638                    .disable
2639                    .push_override(normalized_values, source, file.clone(), None);
2640            }
2641
2642            if let Some(include) = table.get("include")
2643                && let Ok(values) = Vec::<String>::deserialize(include.clone())
2644            {
2645                fragment
2646                    .global
2647                    .include
2648                    .push_override(values, source, file.clone(), None);
2649            }
2650
2651            if let Some(exclude) = table.get("exclude")
2652                && let Ok(values) = Vec::<String>::deserialize(exclude.clone())
2653            {
2654                fragment
2655                    .global
2656                    .exclude
2657                    .push_override(values, source, file.clone(), None);
2658            }
2659
2660            if let Some(respect_gitignore) = table
2661                .get("respect-gitignore")
2662                .or_else(|| table.get("respect_gitignore"))
2663                && let Ok(value) = bool::deserialize(respect_gitignore.clone())
2664            {
2665                fragment
2666                    .global
2667                    .respect_gitignore
2668                    .push_override(value, source, file.clone(), None);
2669            }
2670
2671            if let Some(force_exclude) = table.get("force-exclude").or_else(|| table.get("force_exclude"))
2672                && let Ok(value) = bool::deserialize(force_exclude.clone())
2673            {
2674                fragment
2675                    .global
2676                    .force_exclude
2677                    .push_override(value, source, file.clone(), None);
2678            }
2679
2680            if let Some(output_format) = table.get("output-format").or_else(|| table.get("output_format"))
2681                && let Ok(value) = String::deserialize(output_format.clone())
2682            {
2683                if fragment.global.output_format.is_none() {
2684                    fragment.global.output_format = Some(SourcedValue::new(value.clone(), source));
2685                } else {
2686                    fragment
2687                        .global
2688                        .output_format
2689                        .as_mut()
2690                        .unwrap()
2691                        .push_override(value, source, file.clone(), None);
2692                }
2693            }
2694
2695            if let Some(fixable) = table.get("fixable")
2696                && let Ok(values) = Vec::<String>::deserialize(fixable.clone())
2697            {
2698                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2699                fragment
2700                    .global
2701                    .fixable
2702                    .push_override(normalized_values, source, file.clone(), None);
2703            }
2704
2705            if let Some(unfixable) = table.get("unfixable")
2706                && let Ok(values) = Vec::<String>::deserialize(unfixable.clone())
2707            {
2708                let normalized_values = values.into_iter().map(|s| normalize_key(&s)).collect();
2709                fragment
2710                    .global
2711                    .unfixable
2712                    .push_override(normalized_values, source, file.clone(), None);
2713            }
2714
2715            if let Some(flavor) = table.get("flavor")
2716                && let Ok(value) = MarkdownFlavor::deserialize(flavor.clone())
2717            {
2718                fragment.global.flavor.push_override(value, source, file.clone(), None);
2719            }
2720
2721            // Handle line-length special case - this should set the global line_length
2722            if let Some(line_length) = table.get("line-length").or_else(|| table.get("line_length"))
2723                && let Ok(value) = u64::deserialize(line_length.clone())
2724            {
2725                fragment
2726                    .global
2727                    .line_length
2728                    .push_override(LineLength::new(value as usize), source, file.clone(), None);
2729
2730                // Also add to MD013 rule config for backward compatibility
2731                let norm_md013_key = normalize_key("MD013");
2732                let rule_entry = fragment.rules.entry(norm_md013_key).or_default();
2733                let norm_line_length_key = normalize_key("line-length");
2734                let sv = rule_entry
2735                    .values
2736                    .entry(norm_line_length_key)
2737                    .or_insert_with(|| SourcedValue::new(line_length.clone(), ConfigSource::Default));
2738                sv.push_override(line_length.clone(), source, file.clone(), None);
2739            }
2740
2741            if let Some(cache_dir) = table.get("cache-dir").or_else(|| table.get("cache_dir"))
2742                && let Ok(value) = String::deserialize(cache_dir.clone())
2743            {
2744                if fragment.global.cache_dir.is_none() {
2745                    fragment.global.cache_dir = Some(SourcedValue::new(value.clone(), source));
2746                } else {
2747                    fragment
2748                        .global
2749                        .cache_dir
2750                        .as_mut()
2751                        .unwrap()
2752                        .push_override(value, source, file.clone(), None);
2753                }
2754            }
2755
2756            if let Some(cache) = table.get("cache")
2757                && let Ok(value) = bool::deserialize(cache.clone())
2758            {
2759                fragment.global.cache.push_override(value, source, file.clone(), None);
2760            }
2761        };
2762
2763        // First, check for [tool.rumdl.global] section
2764        if let Some(global_table) = rumdl_table.get("global").and_then(|g| g.as_table()) {
2765            extract_global_config(&mut fragment, global_table);
2766        }
2767
2768        // Also extract global options from [tool.rumdl] directly (for flat structure)
2769        extract_global_config(&mut fragment, rumdl_table);
2770
2771        // --- Extract per-file-ignores configurations ---
2772        // Check both hyphenated and underscored versions for compatibility
2773        let per_file_ignores_key = rumdl_table
2774            .get("per-file-ignores")
2775            .or_else(|| rumdl_table.get("per_file_ignores"));
2776
2777        if let Some(per_file_ignores_value) = per_file_ignores_key
2778            && let Some(per_file_table) = per_file_ignores_value.as_table()
2779        {
2780            let mut per_file_map = HashMap::new();
2781            for (pattern, rules_value) in per_file_table {
2782                if let Ok(rules) = Vec::<String>::deserialize(rules_value.clone()) {
2783                    let normalized_rules = rules.into_iter().map(|s| normalize_key(&s)).collect();
2784                    per_file_map.insert(pattern.clone(), normalized_rules);
2785                } else {
2786                    log::warn!(
2787                        "[WARN] Expected array for per-file-ignores pattern '{pattern}' in {path}, found {rules_value:?}"
2788                    );
2789                }
2790            }
2791            fragment
2792                .per_file_ignores
2793                .push_override(per_file_map, source, file.clone(), None);
2794        }
2795
2796        // --- Extract rule-specific configurations ---
2797        for (key, value) in rumdl_table {
2798            let norm_rule_key = normalize_key(key);
2799
2800            // Skip keys already handled as global or special cases
2801            if [
2802                "enable",
2803                "disable",
2804                "include",
2805                "exclude",
2806                "respect_gitignore",
2807                "respect-gitignore", // Added kebab-case here too
2808                "force_exclude",
2809                "force-exclude",
2810                "line_length",
2811                "line-length",
2812                "output_format",
2813                "output-format",
2814                "fixable",
2815                "unfixable",
2816                "per-file-ignores",
2817                "per_file_ignores",
2818                "global",
2819                "flavor",
2820                "cache_dir",
2821                "cache-dir",
2822                "cache",
2823            ]
2824            .contains(&norm_rule_key.as_str())
2825            {
2826                continue;
2827            }
2828
2829            // Explicitly check if the key looks like a rule name (e.g., starts with 'md')
2830            // AND if the value is actually a TOML table before processing as rule config.
2831            // This prevents misinterpreting other top-level keys under [tool.rumdl]
2832            let norm_rule_key_upper = norm_rule_key.to_ascii_uppercase();
2833            if norm_rule_key_upper.len() == 5
2834                && norm_rule_key_upper.starts_with("MD")
2835                && norm_rule_key_upper[2..].chars().all(|c| c.is_ascii_digit())
2836                && value.is_table()
2837            {
2838                if let Some(rule_config_table) = value.as_table() {
2839                    // Get the entry for this rule (e.g., "md013")
2840                    let rule_entry = fragment.rules.entry(norm_rule_key_upper).or_default();
2841                    for (rk, rv) in rule_config_table {
2842                        let norm_rk = normalize_key(rk); // Normalize the config key itself
2843
2844                        let toml_val = rv.clone();
2845
2846                        let sv = rule_entry
2847                            .values
2848                            .entry(norm_rk.clone())
2849                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), ConfigSource::Default));
2850                        sv.push_override(toml_val, source, file.clone(), None);
2851                    }
2852                }
2853            } else {
2854                // Key is not a global/special key, doesn't start with 'md', or isn't a table.
2855                // Track unknown keys under [tool.rumdl] for validation
2856                fragment
2857                    .unknown_keys
2858                    .push(("[tool.rumdl]".to_string(), key.to_string(), Some(path.to_string())));
2859            }
2860        }
2861    }
2862
2863    // 2. Handle [tool.rumdl.MDxxx] sections as rule-specific config (nested under [tool])
2864    if let Some(tool_table) = doc.get("tool").and_then(|t| t.as_table()) {
2865        for (key, value) in tool_table.iter() {
2866            if let Some(rule_name) = key.strip_prefix("rumdl.") {
2867                let norm_rule_name = normalize_key(rule_name);
2868                if norm_rule_name.len() == 5
2869                    && norm_rule_name.to_ascii_uppercase().starts_with("MD")
2870                    && norm_rule_name[2..].chars().all(|c| c.is_ascii_digit())
2871                    && let Some(rule_table) = value.as_table()
2872                {
2873                    let rule_entry = fragment.rules.entry(norm_rule_name.to_ascii_uppercase()).or_default();
2874                    for (rk, rv) in rule_table {
2875                        let norm_rk = normalize_key(rk);
2876                        let toml_val = rv.clone();
2877                        let sv = rule_entry
2878                            .values
2879                            .entry(norm_rk.clone())
2880                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), source));
2881                        sv.push_override(toml_val, source, file.clone(), None);
2882                    }
2883                } else if rule_name.to_ascii_uppercase().starts_with("MD") {
2884                    // Track unknown rule sections like [tool.rumdl.MD999]
2885                    fragment.unknown_keys.push((
2886                        format!("[tool.rumdl.{rule_name}]"),
2887                        String::new(),
2888                        Some(path.to_string()),
2889                    ));
2890                }
2891            }
2892        }
2893    }
2894
2895    // 3. Handle [tool.rumdl.MDxxx] sections as top-level keys (e.g., [tool.rumdl.MD007])
2896    if let Some(doc_table) = doc.as_table() {
2897        for (key, value) in doc_table.iter() {
2898            if let Some(rule_name) = key.strip_prefix("tool.rumdl.") {
2899                let norm_rule_name = normalize_key(rule_name);
2900                if norm_rule_name.len() == 5
2901                    && norm_rule_name.to_ascii_uppercase().starts_with("MD")
2902                    && norm_rule_name[2..].chars().all(|c| c.is_ascii_digit())
2903                    && let Some(rule_table) = value.as_table()
2904                {
2905                    let rule_entry = fragment.rules.entry(norm_rule_name.to_ascii_uppercase()).or_default();
2906                    for (rk, rv) in rule_table {
2907                        let norm_rk = normalize_key(rk);
2908                        let toml_val = rv.clone();
2909                        let sv = rule_entry
2910                            .values
2911                            .entry(norm_rk.clone())
2912                            .or_insert_with(|| SourcedValue::new(toml_val.clone(), source));
2913                        sv.push_override(toml_val, source, file.clone(), None);
2914                    }
2915                } else if rule_name.to_ascii_uppercase().starts_with("MD") {
2916                    // Track unknown rule sections like [tool.rumdl.MD999]
2917                    fragment.unknown_keys.push((
2918                        format!("[tool.rumdl.{rule_name}]"),
2919                        String::new(),
2920                        Some(path.to_string()),
2921                    ));
2922                }
2923            }
2924        }
2925    }
2926
2927    // Only return Some(fragment) if any config was found
2928    let has_any = !fragment.global.enable.value.is_empty()
2929        || !fragment.global.disable.value.is_empty()
2930        || !fragment.global.include.value.is_empty()
2931        || !fragment.global.exclude.value.is_empty()
2932        || !fragment.global.fixable.value.is_empty()
2933        || !fragment.global.unfixable.value.is_empty()
2934        || fragment.global.output_format.is_some()
2935        || fragment.global.cache_dir.is_some()
2936        || !fragment.global.cache.value
2937        || !fragment.per_file_ignores.value.is_empty()
2938        || !fragment.rules.is_empty();
2939    if has_any { Ok(Some(fragment)) } else { Ok(None) }
2940}
2941
2942/// Parses rumdl.toml / .rumdl.toml content.
2943fn parse_rumdl_toml(content: &str, path: &str, source: ConfigSource) -> Result<SourcedConfigFragment, ConfigError> {
2944    let doc = content
2945        .parse::<DocumentMut>()
2946        .map_err(|e| ConfigError::ParseError(format!("{path}: Failed to parse TOML: {e}")))?;
2947    let mut fragment = SourcedConfigFragment::default();
2948    // source parameter provided by caller
2949    let file = Some(path.to_string());
2950
2951    // Define known rules before the loop
2952    let all_rules = rules::all_rules(&Config::default());
2953    let registry = RuleRegistry::from_rules(&all_rules);
2954    let known_rule_names: BTreeSet<String> = registry
2955        .rule_names()
2956        .into_iter()
2957        .map(|s| s.to_ascii_uppercase())
2958        .collect();
2959
2960    // Handle [global] section
2961    if let Some(global_item) = doc.get("global")
2962        && let Some(global_table) = global_item.as_table()
2963    {
2964        for (key, value_item) in global_table.iter() {
2965            let norm_key = normalize_key(key);
2966            match norm_key.as_str() {
2967                "enable" | "disable" | "include" | "exclude" => {
2968                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
2969                        // Corrected: Iterate directly over the Formatted<Array>
2970                        let values: Vec<String> = formatted_array
2971                                .iter()
2972                                .filter_map(|item| item.as_str()) // Extract strings
2973                                .map(|s| s.to_string())
2974                                .collect();
2975
2976                        // Normalize rule names for enable/disable
2977                        let final_values = if norm_key == "enable" || norm_key == "disable" {
2978                            // Corrected: Pass &str to normalize_key
2979                            values.into_iter().map(|s| normalize_key(&s)).collect()
2980                        } else {
2981                            values
2982                        };
2983
2984                        match norm_key.as_str() {
2985                            "enable" => fragment
2986                                .global
2987                                .enable
2988                                .push_override(final_values, source, file.clone(), None),
2989                            "disable" => {
2990                                fragment
2991                                    .global
2992                                    .disable
2993                                    .push_override(final_values, source, file.clone(), None)
2994                            }
2995                            "include" => {
2996                                fragment
2997                                    .global
2998                                    .include
2999                                    .push_override(final_values, source, file.clone(), None)
3000                            }
3001                            "exclude" => {
3002                                fragment
3003                                    .global
3004                                    .exclude
3005                                    .push_override(final_values, source, file.clone(), None)
3006                            }
3007                            _ => unreachable!("Outer match guarantees only enable/disable/include/exclude"),
3008                        }
3009                    } else {
3010                        log::warn!(
3011                            "[WARN] Expected array for global key '{}' in {}, found {}",
3012                            key,
3013                            path,
3014                            value_item.type_name()
3015                        );
3016                    }
3017                }
3018                "respect_gitignore" | "respect-gitignore" => {
3019                    // Handle both cases
3020                    if let Some(toml_edit::Value::Boolean(formatted_bool)) = value_item.as_value() {
3021                        let val = *formatted_bool.value();
3022                        fragment
3023                            .global
3024                            .respect_gitignore
3025                            .push_override(val, source, file.clone(), None);
3026                    } else {
3027                        log::warn!(
3028                            "[WARN] Expected boolean for global key '{}' in {}, found {}",
3029                            key,
3030                            path,
3031                            value_item.type_name()
3032                        );
3033                    }
3034                }
3035                "force_exclude" | "force-exclude" => {
3036                    // Handle both cases
3037                    if let Some(toml_edit::Value::Boolean(formatted_bool)) = value_item.as_value() {
3038                        let val = *formatted_bool.value();
3039                        fragment
3040                            .global
3041                            .force_exclude
3042                            .push_override(val, source, file.clone(), None);
3043                    } else {
3044                        log::warn!(
3045                            "[WARN] Expected boolean for global key '{}' in {}, found {}",
3046                            key,
3047                            path,
3048                            value_item.type_name()
3049                        );
3050                    }
3051                }
3052                "line_length" | "line-length" => {
3053                    // Handle both cases
3054                    if let Some(toml_edit::Value::Integer(formatted_int)) = value_item.as_value() {
3055                        let val = LineLength::new(*formatted_int.value() as usize);
3056                        fragment
3057                            .global
3058                            .line_length
3059                            .push_override(val, source, file.clone(), None);
3060                    } else {
3061                        log::warn!(
3062                            "[WARN] Expected integer for global key '{}' in {}, found {}",
3063                            key,
3064                            path,
3065                            value_item.type_name()
3066                        );
3067                    }
3068                }
3069                "output_format" | "output-format" => {
3070                    // Handle both cases
3071                    if let Some(toml_edit::Value::String(formatted_string)) = value_item.as_value() {
3072                        let val = formatted_string.value().clone();
3073                        if fragment.global.output_format.is_none() {
3074                            fragment.global.output_format = Some(SourcedValue::new(val.clone(), source));
3075                        } else {
3076                            fragment.global.output_format.as_mut().unwrap().push_override(
3077                                val,
3078                                source,
3079                                file.clone(),
3080                                None,
3081                            );
3082                        }
3083                    } else {
3084                        log::warn!(
3085                            "[WARN] Expected string for global key '{}' in {}, found {}",
3086                            key,
3087                            path,
3088                            value_item.type_name()
3089                        );
3090                    }
3091                }
3092                "cache_dir" | "cache-dir" => {
3093                    // Handle both cases
3094                    if let Some(toml_edit::Value::String(formatted_string)) = value_item.as_value() {
3095                        let val = formatted_string.value().clone();
3096                        if fragment.global.cache_dir.is_none() {
3097                            fragment.global.cache_dir = Some(SourcedValue::new(val.clone(), source));
3098                        } else {
3099                            fragment
3100                                .global
3101                                .cache_dir
3102                                .as_mut()
3103                                .unwrap()
3104                                .push_override(val, source, file.clone(), None);
3105                        }
3106                    } else {
3107                        log::warn!(
3108                            "[WARN] Expected string for global key '{}' in {}, found {}",
3109                            key,
3110                            path,
3111                            value_item.type_name()
3112                        );
3113                    }
3114                }
3115                "cache" => {
3116                    if let Some(toml_edit::Value::Boolean(b)) = value_item.as_value() {
3117                        let val = *b.value();
3118                        fragment.global.cache.push_override(val, source, file.clone(), None);
3119                    } else {
3120                        log::warn!(
3121                            "[WARN] Expected boolean for global key '{}' in {}, found {}",
3122                            key,
3123                            path,
3124                            value_item.type_name()
3125                        );
3126                    }
3127                }
3128                "fixable" => {
3129                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
3130                        let values: Vec<String> = formatted_array
3131                            .iter()
3132                            .filter_map(|item| item.as_str())
3133                            .map(normalize_key)
3134                            .collect();
3135                        fragment
3136                            .global
3137                            .fixable
3138                            .push_override(values, source, file.clone(), None);
3139                    } else {
3140                        log::warn!(
3141                            "[WARN] Expected array for global key '{}' in {}, found {}",
3142                            key,
3143                            path,
3144                            value_item.type_name()
3145                        );
3146                    }
3147                }
3148                "unfixable" => {
3149                    if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
3150                        let values: Vec<String> = formatted_array
3151                            .iter()
3152                            .filter_map(|item| item.as_str())
3153                            .map(normalize_key)
3154                            .collect();
3155                        fragment
3156                            .global
3157                            .unfixable
3158                            .push_override(values, source, file.clone(), None);
3159                    } else {
3160                        log::warn!(
3161                            "[WARN] Expected array for global key '{}' in {}, found {}",
3162                            key,
3163                            path,
3164                            value_item.type_name()
3165                        );
3166                    }
3167                }
3168                "flavor" => {
3169                    if let Some(toml_edit::Value::String(formatted_string)) = value_item.as_value() {
3170                        let val = formatted_string.value();
3171                        if let Ok(flavor) = MarkdownFlavor::from_str(val) {
3172                            fragment.global.flavor.push_override(flavor, source, file.clone(), None);
3173                        } else {
3174                            log::warn!("[WARN] Unknown markdown flavor '{val}' in {path}");
3175                        }
3176                    } else {
3177                        log::warn!(
3178                            "[WARN] Expected string for global key '{}' in {}, found {}",
3179                            key,
3180                            path,
3181                            value_item.type_name()
3182                        );
3183                    }
3184                }
3185                _ => {
3186                    // Track unknown global keys for validation
3187                    fragment
3188                        .unknown_keys
3189                        .push(("[global]".to_string(), key.to_string(), Some(path.to_string())));
3190                    log::warn!("[WARN] Unknown key in [global] section of {path}: {key}");
3191                }
3192            }
3193        }
3194    }
3195
3196    // Handle [per-file-ignores] section
3197    if let Some(per_file_item) = doc.get("per-file-ignores")
3198        && let Some(per_file_table) = per_file_item.as_table()
3199    {
3200        let mut per_file_map = HashMap::new();
3201        for (pattern, value_item) in per_file_table.iter() {
3202            if let Some(toml_edit::Value::Array(formatted_array)) = value_item.as_value() {
3203                let rules: Vec<String> = formatted_array
3204                    .iter()
3205                    .filter_map(|item| item.as_str())
3206                    .map(normalize_key)
3207                    .collect();
3208                per_file_map.insert(pattern.to_string(), rules);
3209            } else {
3210                let type_name = value_item.type_name();
3211                log::warn!(
3212                    "[WARN] Expected array for per-file-ignores pattern '{pattern}' in {path}, found {type_name}"
3213                );
3214            }
3215        }
3216        fragment
3217            .per_file_ignores
3218            .push_override(per_file_map, source, file.clone(), None);
3219    }
3220
3221    // Rule-specific: all other top-level tables
3222    for (key, item) in doc.iter() {
3223        let norm_rule_name = key.to_ascii_uppercase();
3224
3225        // Skip known special sections
3226        if key == "global" || key == "per-file-ignores" {
3227            continue;
3228        }
3229
3230        // Track unknown rule sections (like [MD999])
3231        if !known_rule_names.contains(&norm_rule_name) {
3232            // Only track if it looks like a rule section (starts with MD or is uppercase)
3233            if norm_rule_name.starts_with("MD") || key.chars().all(|c| c.is_uppercase() || c.is_numeric()) {
3234                fragment
3235                    .unknown_keys
3236                    .push((format!("[{key}]"), String::new(), Some(path.to_string())));
3237            }
3238            continue;
3239        }
3240
3241        if let Some(tbl) = item.as_table() {
3242            let rule_entry = fragment.rules.entry(norm_rule_name.clone()).or_default();
3243            for (rk, rv_item) in tbl.iter() {
3244                let norm_rk = normalize_key(rk);
3245                let maybe_toml_val: Option<toml::Value> = match rv_item.as_value() {
3246                    Some(toml_edit::Value::String(formatted)) => Some(toml::Value::String(formatted.value().clone())),
3247                    Some(toml_edit::Value::Integer(formatted)) => Some(toml::Value::Integer(*formatted.value())),
3248                    Some(toml_edit::Value::Float(formatted)) => Some(toml::Value::Float(*formatted.value())),
3249                    Some(toml_edit::Value::Boolean(formatted)) => Some(toml::Value::Boolean(*formatted.value())),
3250                    Some(toml_edit::Value::Datetime(formatted)) => Some(toml::Value::Datetime(*formatted.value())),
3251                    Some(toml_edit::Value::Array(formatted_array)) => {
3252                        // Convert toml_edit Array to toml::Value::Array
3253                        let mut values = Vec::new();
3254                        for item in formatted_array.iter() {
3255                            match item {
3256                                toml_edit::Value::String(formatted) => {
3257                                    values.push(toml::Value::String(formatted.value().clone()))
3258                                }
3259                                toml_edit::Value::Integer(formatted) => {
3260                                    values.push(toml::Value::Integer(*formatted.value()))
3261                                }
3262                                toml_edit::Value::Float(formatted) => {
3263                                    values.push(toml::Value::Float(*formatted.value()))
3264                                }
3265                                toml_edit::Value::Boolean(formatted) => {
3266                                    values.push(toml::Value::Boolean(*formatted.value()))
3267                                }
3268                                toml_edit::Value::Datetime(formatted) => {
3269                                    values.push(toml::Value::Datetime(*formatted.value()))
3270                                }
3271                                _ => {
3272                                    log::warn!(
3273                                        "[WARN] Skipping unsupported array element type in key '{norm_rule_name}.{norm_rk}' in {path}"
3274                                    );
3275                                }
3276                            }
3277                        }
3278                        Some(toml::Value::Array(values))
3279                    }
3280                    Some(toml_edit::Value::InlineTable(_)) => {
3281                        log::warn!(
3282                            "[WARN] Skipping inline table value for key '{norm_rule_name}.{norm_rk}' in {path}. Table conversion not yet fully implemented in parser."
3283                        );
3284                        None
3285                    }
3286                    None => {
3287                        log::warn!(
3288                            "[WARN] Skipping non-value item for key '{norm_rule_name}.{norm_rk}' in {path}. Expected simple value."
3289                        );
3290                        None
3291                    }
3292                };
3293                if let Some(toml_val) = maybe_toml_val {
3294                    let sv = rule_entry
3295                        .values
3296                        .entry(norm_rk.clone())
3297                        .or_insert_with(|| SourcedValue::new(toml_val.clone(), ConfigSource::Default));
3298                    sv.push_override(toml_val, source, file.clone(), None);
3299                }
3300            }
3301        } else if item.is_value() {
3302            log::warn!("[WARN] Ignoring top-level value key in {path}: '{key}'. Expected a table like [{key}].");
3303        }
3304    }
3305
3306    Ok(fragment)
3307}
3308
3309/// Loads and converts a markdownlint config file (.json or .yaml) into a SourcedConfigFragment.
3310fn load_from_markdownlint(path: &str) -> Result<SourcedConfigFragment, ConfigError> {
3311    // Use the unified loader from markdownlint_config.rs
3312    let ml_config = crate::markdownlint_config::load_markdownlint_config(path)
3313        .map_err(|e| ConfigError::ParseError(format!("{path}: {e}")))?;
3314    Ok(ml_config.map_to_sourced_rumdl_config_fragment(Some(path)))
3315}
3316
3317#[cfg(test)]
3318#[path = "config_intelligent_merge_tests.rs"]
3319mod config_intelligent_merge_tests;